2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: boolpars.c /main/5 1996/11/25 18:49:27 drk $
25 * (c) Copyright 1996 Digital Equipment Corporation.
26 * (c) Copyright 1996 Hewlett-Packard Company.
27 * (c) Copyright 1996 International Business Machines Corp.
28 * (c) Copyright 1996 Sun Microsystems, Inc.
29 * (c) Copyright 1996 Novell, Inc.
30 * (c) Copyright 1996 FUJITSU LIMITED.
31 * (c) Copyright 1996 Hitachi.
34 * COMPONENT_NAME: austext
36 * FUNCTIONS: add_syntax_errmsg
55 * (C) COPYRIGHT International Business Machines Corp. 1996
57 * Licensed Materials - Property of IBM
58 * US Government Users Restricted Rights - Use, duplication or
59 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
61 /********************* BOOLPARS.C ********************
62 * $Id: boolpars.c /main/5 1996/11/25 18:49:27 drk $
64 * AusText/DtSearch yacc-based boolean query parser.
65 * Converts boolean query into stems array and truth table
66 * for subsequent search. Boolyac.y is the yacc source.
67 * After processing by yacc, it becomes boolyac.c and boolyac.h.
68 * This module contains all the related C source code: yylex,
69 * yacc action functions, and the main AusText driver function, boolean_parse.
70 * Additional information (format of TRUTHTAB) in header file boolpars.h.
73 * Revision 1.4 1996/03/22 23:12:50 miker
74 * Added string.h header and correctly cast strcspn() calls.
76 * Revision 1.3 1996/03/20 19:14:30 miker
77 * Enable collocation expressions in stem (type 'S') searches.
79 * Revision 1.2 1996/03/13 22:35:59 miker
80 * Changed char to UCHAR several places; similar typecasts.
82 * Revision 1.1 1996/03/05 15:52:06 miker
90 #if (DtSrMAX_STEMCOUNT != 8)
91 #error DtSrMAX_STEMCOUNT is not defined to be 8.
94 #define PROGNAME "BOOLPARS"
95 #define WORD_ENDERS " \t\n\f()|@~&"
96 #define MAX_YYERRORS 4
97 #define MS_boolpars 28
100 /****************************************/
104 /****************************************/
105 int qry_has_no_NOTs = FALSE;
106 int qry_is_all_ANDs = FALSE;
107 TRUTHTAB final_truthtab = { 0 };
108 int parser_invalid_wordcount = 0;
110 static int debugging_boolpars = FALSE;
112 *final_permutes = NULL;
113 static int last_token_was_boolop = TRUE;
114 static char *msgbuf = NULL;
115 static UCHAR *next_lex_char = NULL;
116 static int paren_count = 0;
117 static TRUTHTAB *ttlist = NULL;
118 static int yyerror_count = 0;
119 static size_t yyleng; /* same as in lex API */
120 static char *yytext; /* same as in lex API */
123 /****************************************/
125 /* add_syntax_errmsg */
127 /****************************************/
128 /* Action function called for yacc rules used to trap syntax errors.
129 * Adds error message identified by msgno to user's msglist.
131 void add_syntax_errmsg (int msgno)
135 /* Message #2 is called in two places */
136 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 2,
137 "%s Query field is empty."),
139 DtSearchAddMessage (msgbuf);
143 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 5,
144 "%s Boolean operators must be positioned\n"
145 "between words or expressions. Two sequential words\n"
146 "without an operator are interpreted as being separated\n"
147 "by the AND operator (&)."),
149 DtSearchAddMessage (msgbuf);
153 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 6,
154 "%s Expression in parentheses is missing."),
156 DtSearchAddMessage (msgbuf);
160 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 7,
161 "%s NOT operator (~) must be positioned to\n"
162 "the left of the word or expression it qualifies."),
164 DtSearchAddMessage (msgbuf);
168 /* Message #3 is called in two places */
169 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 3,
170 "%s COLLOCATION operator (@) may\n"
171 "only be positioned between two words."),
173 DtSearchAddMessage (msgbuf);
177 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 4,
178 "%s One or more words in your\n"
179 "query are not stored in database '%s'.") ,
180 PROGNAME"089", usrblk.dblk->label);
181 DtSearchAddMessage (msgbuf);
185 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 8,
186 "%s Invalid boolean query. Syntax Error #%d.") ,
187 PROGNAME"100", msgno);
188 DtSearchAddMessage (msgbuf);
192 } /* add_syntax_errmsg() */
195 /****************************************/
199 /****************************************/
200 /* Constructor for new truth table.
201 * Allocates it, inits it, and links it into ttlist.
203 static TRUTHTAB *creatett (int stemno, int pmsz, unsigned char *permutes)
205 TRUTHTAB *newtt = austext_malloc (sizeof(TRUTHTAB) + pmsz + 4,
206 PROGNAME"140", NULL);
207 memset (newtt, 0, sizeof(TRUTHTAB));
208 newtt->stemno = stemno;
210 newtt->permutes = (unsigned char *) (newtt + 1);
211 memcpy (newtt->permutes, permutes, pmsz);
212 newtt->next = ttlist;
218 /****************************************/
222 /****************************************/
223 /* Destructor of passed truth table.
224 * Unlinks it from ttlist and frees it.
226 static void *freett (TRUTHTAB *argtt)
229 TRUTHTAB **lastlink = &ttlist;
230 for (tt = ttlist; tt; tt = tt->next) {
232 *lastlink = tt->next;
236 lastlink = &tt->next;
242 /****************************************/
244 /* copy_final_truthtab */
246 /****************************************/
247 /* Copys passed truth table into global final_truthtab.
248 * Returns final_truthtab.
250 TRUTHTAB *copy_final_truthtab (TRUTHTAB *tt)
252 memset (&final_truthtab, 0, sizeof(TRUTHTAB));
254 final_permutes = austext_malloc (300, PROGNAME"788", NULL);
255 final_truthtab.pmsz = tt->pmsz;
256 final_truthtab.permutes = final_permutes;
257 memcpy (final_permutes, tt->permutes, final_truthtab.pmsz);
258 return &final_truthtab;
259 } /* copy_final_truthtab() */
262 /****************************************/
264 /* get_stem_truthtab */
266 /****************************************/
267 /* Subroutine of yylex. Also used in yacc action functions.
268 * Creates and returns truth table for passed stem.
269 * If stem is new, adds it to saveusr.stems array, and adds
270 * the original query word string to usrblk.stems for msgs.
271 * Returns NULL and posts err msg if array is full
272 * or has other error.
274 static TRUTHTAB *get_stem_truthtab (char *newstem, char *origword)
277 unsigned char bitmask;
279 unsigned char new_permutes [128];
282 /* Check if stem is already in array */
283 for (stemno = 0; stemno < saveusr.stemcount; stemno++)
284 if (strcmp (newstem, saveusr.stems[stemno]) == 0)
287 /* Add new stem to array */
288 if (stemno == saveusr.stemcount) {
289 if (++saveusr.stemcount > DtSrMAX_STEMCOUNT) {
290 sprintf (msgbuf, catgets (dtsearch_catd, MS_boolpars, 9,
291 "%s Too many terms in boolean query."),
293 DtSearchAddMessage (msgbuf);
297 strncpy (saveusr.stems[stemno], newstem, DtSrMAXWIDTH_HWORD);
298 saveusr.stems [stemno] [DtSrMAXWIDTH_HWORD - 1] = 0;
300 strncpy (usrblk.stems[stemno], origword, DtSrMAXWIDTH_HWORD);
301 usrblk.stems [stemno] [DtSrMAXWIDTH_HWORD - 1] = 0;
305 /* Stemno now indicates correct term in saveusr.stems.
306 * Truth table for a single term has 128 8-bit permutes,
307 * the 1/2 of all 256 possible permutations that have
308 * that term's bit switched on.
310 bitmask = 1 << stemno; /* mask with only newstem's bit on */
312 for (i=0; i<256; i++)
313 if ((i & bitmask) != 0) {
317 newtt = creatett (stemno, 128, new_permutes);
318 if (debugging_boolpars) {
319 fprintf (aa_stderr, " WORD: stem[%d]='%c%s' expr=%p pmsz=%d\n",
321 (saveusr.stems[stemno][0] == STEM_CH) ?
322 '~' : saveusr.stems[stemno][0],
323 &saveusr.stems[stemno][1],
328 } /* get_stem_truthtab() */
331 /****************************************/
335 /****************************************/
336 /* Action function for AND expression rule.
337 * Returns set INTERSECTION of passed truth tables,
338 * ie only the permutes they have in common.
339 * Any truth table, input or output, can be the empty or
340 * the universal set. For example: "(A & B) & ~A" is empty.
342 TRUTHTAB *boolyac_AND (TRUTHTAB *tt1, TRUTHTAB *tt2) {
344 unsigned char new_permutes [256];
347 pm1 = pm2 = newpm = 0;
348 while (pm1 < tt1->pmsz && pm2 < tt2->pmsz) {
349 if (tt1->permutes[pm1] < tt2->permutes[pm2])
351 else if (tt1->permutes[pm1] > tt2->permutes[pm2])
354 new_permutes [newpm++] = tt1->permutes [pm1];
360 /* Free old truthtabs, create new one. */
363 newtt = creatett (-1, newpm, new_permutes);
364 if (debugging_boolpars) {
365 fprintf (aa_stderr, " AND: exprs=%p,%p-->expr=%p pmsz=%d\n",
366 tt1, tt2, newtt, newtt->pmsz);
370 } /* boolyac_AND() */
373 /****************************************/
377 /****************************************/
378 /* Action function for OR expression rule.
379 * Returns set UNION of passed truth tables.
380 * Any truth table, input or output, can be the empty or
381 * the universal set. For example: "A | ~A" is universal.
383 TRUTHTAB *boolyac_OR (TRUTHTAB *tt1, TRUTHTAB *tt2) {
385 unsigned char new_permutes [256];
386 unsigned char *permutes1 = tt1->permutes;
387 unsigned char *permutes2 = tt2->permutes;
390 pm1 = pm2 = newpm = 0;
392 /* While neither permutes array is exhausted... */
393 while (pm1 < tt1->pmsz && pm2 < tt2->pmsz) {
394 if (permutes1[pm1] < permutes2[pm2])
395 new_permutes [newpm++] = permutes1[pm1++];
396 else if (permutes2[pm2] < permutes1[pm1])
397 new_permutes [newpm++] = permutes2[pm2++];
399 new_permutes [newpm++] = permutes1[pm1++];
403 /* After one or both permutes arrays are exhausted... */
404 while (pm1 < tt1->pmsz)
405 new_permutes [newpm++] = permutes1[pm1++];
406 while (pm2 < tt2->pmsz)
407 new_permutes [newpm++] = permutes2[pm2++];
409 /* Free old truthtabs, create new one. */
412 newtt = creatett (-1, newpm, new_permutes);
413 if (debugging_boolpars) {
414 fprintf (aa_stderr, " OR: exprs=%p,%p-->expr=%p pmsz=%d\n",
415 tt1, tt2, newtt, newtt->pmsz);
422 /****************************************/
426 /****************************************/
427 /* Action function for NOT expression rule.
428 * Returns set COMPLEMENT of passed truth table,
429 * ie the universal set minus the passed set,
430 * ie all possible permutes except those passed.
431 * Either the old or the new truth table can be
432 * the empty or the universal set.
434 TRUTHTAB *boolyac_NOT (TRUTHTAB *oldtt) {
436 unsigned char new_permutes [256];
441 for (candidate = 0; candidate < 256; candidate++) {
442 if (oldpm >= oldtt->pmsz || candidate < oldtt->permutes [oldpm]) {
443 new_permutes [newpm++] = candidate;
446 * oldtt not done && candidate == oldtt.
447 * (candidate > oldtt not possible).
454 newtt = creatett (-1, newpm, new_permutes);
455 if (debugging_boolpars) {
456 fprintf (aa_stderr, " NOT: expr=%p-->expr=%p pmsz=%d\n",
457 oldtt, newtt, newtt->pmsz);
461 } /* boolyac_NOT() */
464 /****************************************/
468 /****************************************/
469 /* Action function for COLLOCATION expression rule.
470 * The record set satisfying a collocation expression is
471 * generated dynamically. At the parse level it is equivalent
472 * to a separate 'word' with its own (undetermined) record set.
473 * So it's given its own slot in saveusr.stems. The word
474 * in saveusr.stems is formated "@ssttv[v...]" where ss and tt are
475 * ascii numbers that index the original collocated words
476 * in saveusr.stems, and v... is the collocation value integer.
477 * For example, "@03005" represents the collocation of stem
478 * number 3 and stem number 0, with collocation value 5.
480 * Returns NULL and errmsg on msglist if any problems.
482 TRUTHTAB *boolyac_COLLOC (
488 char wordbuf [DtSrMAXWIDTH_HWORD];
490 if (word1tt->stemno < 0 || word2tt->stemno < 0) {
491 /* Message #3 is called in two places */
492 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 3,
493 "%s COLLOCATION operator (@) may\n"
494 "only be positioned between two words."),
496 DtSearchAddMessage (msgbuf);
499 if (word1tt->stemno == word2tt->stemno) {
500 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 12,
501 "%s Collocation operator is not\n"
502 "permitted between identical words."),
504 DtSearchAddMessage (msgbuf);
507 sprintf (wordbuf, COLLOC_STEM_FORMAT,
508 word1tt->stemno, word2tt->stemno, colloc_val);
509 if ((newtt = get_stem_truthtab (wordbuf, wordbuf)) == NULL)
513 if (debugging_boolpars) {
514 fprintf (aa_stderr, " COLLOC: exprs=%p,%p-->expr=%p pmsz=%d\n",
515 word1tt, word2tt, newtt, newtt->pmsz);
519 } /* boolyac_COLLOC() */
522 /****************************************/
526 /****************************************/
527 /* Replaces standard yacc error routine. */
528 void yyerror (char *msg) {
529 if (strcmp (msg, "syntax error") == 0) {
530 if (DtSearchHasMessages())
532 else if (parser_invalid_wordcount > 0)
533 add_syntax_errmsg(6);
535 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 1,
536 "%s Your search string is an invalid\n"
537 "boolean query. Please reformulate and try again."),
539 DtSearchAddMessage (msgbuf);
543 DtSearchAddMessage (msg);
548 /****************************************/
552 /****************************************/
553 /* Subroutine of yylex(). Copies passed substring
554 * Into a zero-terminated buffer of its own.
555 * Static buffer good until next call.
557 static char *copy_token (UCHAR *tokenp, size_t toklen)
559 static char *buf = NULL;
560 static size_t bufsz = 0;
561 if (toklen > bufsz) {
564 bufsz = toklen + (toklen >> 1); /* 1.5 times size needed */
565 buf = austext_malloc (bufsz + 4, PROGNAME"182", NULL);
567 strncpy (buf, (char *) tokenp, toklen);
573 /****************************************/
577 /****************************************/
578 /* Delivers tokens to yyparse() from usrblk.query */
584 char mystembuf [DtSrMAXWIDTH_HWORD + 4];
588 /* Skip white space */
589 while (ascii_charmap[*next_lex_char] & WHITESPACE)
592 /* Terminating zero indicates end of query and end of parse.
593 * Automatically close unbalanced parentheses.
595 if (*next_lex_char == 0) {
596 if (paren_count > 0) {
609 switch (*next_lex_char) {
610 case '|': /* OR operator */
611 last_token_was_boolop = TRUE;
618 case '~': /* NOT operator */
619 if (!last_token_was_boolop) {
620 /* Generate implied AND between words
621 * and parenthesized expressions.
622 * A NOT is not itself boolean; it must
623 * precede the next word or expression.
625 last_token_was_boolop = TRUE;
631 last_token_was_boolop = TRUE;
638 case '&': /* AND operator */
639 if (last_token_was_boolop && qry_is_all_ANDs) {
640 /* Ignore multiple AND operators.
641 * These might occur if we silently
642 * discarded some invalid words.
645 goto GET_ANOTHER_TOKEN;
647 last_token_was_boolop = TRUE;
654 case '(': /* OPEN parentheses */
655 if (!last_token_was_boolop) {
656 /* Generate implied AND between words
657 * and parenthesized expressions.
659 last_token_was_boolop = TRUE;
672 case ')': /* CLOSE parentheses */
673 /* Just discard excessive right parentheses */
674 if (--paren_count < 0) {
677 goto GET_ANOTHER_TOKEN;
679 last_token_was_boolop = FALSE;
686 case '@': /* COLLOCATION operator */
687 /* Collocation token:
688 * Token is defined as the collocation char followed
689 * by one or more numeric digits: "@#[#...]".
690 * Syntactically it's a kind of an AND operator.
691 * Semantically it's a pseudo word token
692 * (it will occupy a slot in the stems array).
693 * The yylval is the integer value following
694 * the collocation character.
696 yyleng = strcspn ((char *) next_lex_char + 1, WORD_ENDERS) + 1;
697 yytext = copy_token (next_lex_char, yyleng);
698 next_lex_char += yyleng;
700 if ((usrblk.dblk->dbrec.or_dbaccess & ORA_BLOB) == 0) {
701 retn_token = ERROR_TOKEN;
702 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 10,
703 "%s Collocation searches not available for database '%s'."),
704 PROGNAME"2567", usrblk.dblk->label);
705 DtSearchAddMessage (msgbuf);
708 yylval.int_val = atoi (yytext + 1);
709 if (yylval.int_val <= 0) {
710 retn_token = ERROR_TOKEN;
711 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 11,
712 "%s Collocation operator '%.*s' is invalid.\n"
713 "Correct format is '@n' where n is greater than zero.") ,
714 PROGNAME"294", DtSrMAXWIDTH_HWORD, yytext);
715 DtSearchAddMessage (msgbuf);
718 last_token_was_boolop = TRUE;
719 retn_token = COLLOC_TOKEN;
724 /* Presumed word token:
725 * Token is all text chars until next whitespace,
726 * next lex token, or end of string.
727 * Linguistically parse it and optionally stem it.
728 * The token value is the truth table for one
729 * word: all permutes with only that word's
730 * bits turned on. If the word is already
731 * in the stems array, then the permutes
732 * position is the word's index in the array.
733 * If the word is not in the array, it's added.
734 * If the array is full, then an error is reported.
736 if (!last_token_was_boolop) {
737 /* Generate implied AND between words
738 * and parenthesized expressions.
740 last_token_was_boolop = TRUE;
746 yyleng = strcspn ((char *) next_lex_char, WORD_ENDERS);
747 yytext = copy_token (next_lex_char, yyleng);
748 next_lex_char += yyleng;
750 * Linguistically parse the token.
751 * Failure can occur because word is too short
752 * or too long, it's on the stoplist, etc.
753 * Setting PA_MSGS causes parser to explain
754 * invalid words with a msg.
756 memset (&parg, 0, sizeof(PARG));
757 parg.dblk = usrblk.dblk;
758 parg.string = yytext;
759 /*****if (!qry_is_all_ANDs)********/
760 parg.flags = PA_MSGS;
761 stembufp = usrblk.dblk->parser (&parg);
762 if (debugging_boolpars) {
763 fprintf (aa_stderr, " lang: '%s' -> '%s'\n",
764 yytext, (stembufp)? stembufp : "<null>");
768 * If token is not a linguistically valid word,
769 * one of two things can happen. If the query
770 * is all_ANDs (most common type) we silently
772 * Otherwise report error and quit now.
774 if (stembufp == NULL) {
775 parser_invalid_wordcount++;
777 goto GET_ANOTHER_TOKEN;
778 retn_token = ERROR_TOKEN;
779 if (!DtSearchHasMessages()) {
780 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 13,
781 "%s Word '%.*s' is invalid.") ,
782 PROGNAME"315", DtSrMAXWIDTH_HWORD, yytext);
783 DtSearchAddMessage (msgbuf);
787 if (strlen(stembufp) != strlen(yytext)) {
788 retn_token = ERROR_TOKEN;
789 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 14,
790 "%s String '%.*s' is not a single word.") ,
791 PROGNAME"634", DtSrMAXWIDTH_HWORD, yytext);
792 DtSearchAddMessage (msgbuf);
796 * If stemming, we must prefix term with
797 * special stem char in the stems array.
799 if (usrblk.request == OE_SRCH_STEMS) {
800 stembufp = usrblk.dblk->stemmer (stembufp, usrblk.dblk);
801 if (debugging_boolpars) {
802 fprintf (aa_stderr, " stemer: -> '%s'\n", stembufp);
805 mystembuf[0] = STEM_CH;
806 strncpy (mystembuf + 1, stembufp, DtSrMAXWIDTH_HWORD);
807 mystembuf [DtSrMAXWIDTH_HWORD - 1] = 0;
808 stembufp = mystembuf;
811 /* Load stem into stems arrays and return it's truth table. */
812 if (yylval.truthtab = get_stem_truthtab (stembufp, yytext)) {
813 retn_token = WORD_TOKEN;
814 last_token_was_boolop = FALSE;
817 retn_token = ERROR_TOKEN;
820 } /* switch on *next_lex_char */
823 if (debugging_boolpars) {
825 " yylex: op?=%d parct=%d tok#=%d lval=%p%sYYTEXT='%s'\n",
826 last_token_was_boolop, paren_count,
827 retn_token, yylval.truthtab,
828 (retn_token == COLLOC_TOKEN)? "\t\t" : "\t",
837 /****************************************/
841 /****************************************/
842 /* Called from Opera_Engine for boolean searches.
843 * Driver for yyparse().
844 * Expects usrblk.request == OE_SRCH_STEMS or OE_SRCH_WORDS.
845 * If parse is completely successful (query is valid), outputs
847 * saveusr.stems (stemmed if necessary with STEM_CH as first char,
848 * and phony colloc words with '@' as first char),
849 * usrblk.stems (original unstemmed query terms for err msgs),
853 * and returns TRUE. Truthtab allocation good until next call.
854 * If parse fails, returns FALSE and err msg(s) on msglist.
856 int boolean_parse (void)
860 TRUTHTAB *tt, *ttnext;
862 debugging_boolpars = (usrblk.debug & USRDBG_BOOL);
864 msgbuf = austext_malloc (300 + DtSrMAXWIDTH_HWORD,
865 PROGNAME"255", NULL);
867 /* Test for empty query */
868 if (usrblk.query == NULL) {
870 /* Message #2 is called in two places */
871 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 2,
872 "%s Query is empty."), PROGNAME"289");
873 DtSearchAddMessage (msgbuf);
876 for (cptr = usrblk.query; *cptr; cptr++) {
877 if ((ascii_charmap[*cptr] & WHITESPACE) == 0)
883 /* Init globals for yylex and yyparse */
884 next_lex_char = (UCHAR *) usrblk.query;
887 last_token_was_boolop = TRUE;
888 saveusr.stemcount = 0;
889 parser_invalid_wordcount = 0;
891 /* Query "is all ANDS" if it has no ORs, NOTs, or COLLOCs.
892 * Missing or linguistically invalid words will be silently
893 * discarded for all_ANDs queries.
894 * Query "has no NOTs" if it has no NOTs.
895 * Results from queries without NOTs can be statistically sorted.
897 qry_has_no_NOTs = !strchr (usrblk.query, '~');
898 qry_is_all_ANDs = !strpbrk (usrblk.query, "|~@");
900 if (debugging_boolpars || (usrblk.debug & USRDBG_SRCHCMPL)) {
902 "start boolean_parse: stem?=%d allANDs?=%d noNOTs?=%d\n"
904 (usrblk.request == OE_SRCH_STEMS),
905 qry_is_all_ANDs, qry_has_no_NOTs, usrblk.query);
912 /* Free entire remaining ttlist. Only you
913 * can prevent forest fires and memory leaks.
923 if (debugging_boolpars || (usrblk.debug & USRDBG_SRCHCMPL)) {
924 print_stems (saveusr.stemcount, saveusr.stems,
925 PROGNAME"815 end boolean_parse, syntax ok,");
926 fprintf (aa_stderr, " permutes=%d:", final_truthtab.pmsz);
927 for (i=0; i<16; i++) {
928 if (i >= final_truthtab.pmsz)
930 fprintf (aa_stderr, " %02x", final_truthtab.permutes [i]);
932 fputc ('\n', aa_stderr);
936 if (final_truthtab.pmsz <= 0) {
937 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 15,
938 "%s Your query cannot logically return\n"
939 "any records. Please reformulate and try again."),
941 DtSearchAddMessage (msgbuf);
944 if (final_truthtab.pmsz >= 256) {
945 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 16,
946 "%s Your query will return entire database\n"
947 "'%s'. Please reformulate and try again.") ,
948 PROGNAME"341", usrblk.dblk->label);
949 DtSearchAddMessage (msgbuf);
953 } /* boolean_parse() */
956 #ifdef TESTBOOL /*-----------------------------------------------*/
958 USRBLK usrblk = { 0 };
960 SAVEUSR saveusr = { 0 };
961 extern int debugging_teskey;
962 extern int debugging_paice;
963 extern int debugging_jpn;
965 /****************************************/
967 /* process_user_args */
969 /****************************************/
970 /* Subroutine of main(). Validates and loads global
971 * variables with values from command line arguments.
973 static void process_user_args (int argc, char *argv[])
981 /* Each pass grabs new parm of "-xxx" format */
989 if (argptr[2] == 'x')
990 dblk.dbrec.or_maxwordsz = atoi (argptr + 3);
991 else if (argptr[2] == 'n')
992 dblk.dbrec.or_minwordsz = atoi (argptr + 3);
998 dblk.dbrec.or_language = atoi (argptr + 2);
1002 for (cptr = argptr+2; *cptr != 0; cptr++) {
1004 case 't': debugging_teskey = TRUE; break;
1005 case 'p': debugging_paice = TRUE; break;
1006 case 'j': debugging_jpn = TRUE; break;
1010 "%s Invalid debug option %c.\a\n",
1011 PROGNAME"049", *cptr);
1021 "%s Invalid command line argument '%s'.\a\n",
1022 PROGNAME"059", argptr);
1027 } /* main loop on each arg */
1032 "\nUSAGE: %s [options]\n"
1033 " -mx# maximum word size.\n"
1034 " -mn# minimum word size.\n"
1035 " -dtpj Debug: Teskey, Paice, Japanese.\n"
1036 " -l# language number. Default 0.\n",
1041 } /* process_user_args() */
1044 /****************************************/
1048 /****************************************/
1049 int main (int argc, char *argv[])
1054 char linebuf [1024];
1056 /* Init global variables */
1059 memset (&usrblk, 0, sizeof(USRBLK));
1060 usrblk.dblk = &dblk;
1061 usrblk.debug |= USRDBG_BOOL; /* set debugging_boolpars */
1063 memset (&dblk, 0, sizeof(DBLK));
1064 strcpy (dblk.name, "testbool");
1065 dblk.label = dblk.name;
1066 dblk.dbrec.or_dbaccess |= ORA_BLOB; /* enable collocations */
1068 /* Read command line args */
1069 process_user_args (argc, argv);
1071 if (!load_language (&dblk, NULL)) {
1073 PROGNAME"140 load_language() failed. Msgs:\n%s\n",
1074 DtSearchGetMessages());
1077 fprintf (aa_stderr, " lang=%d minwdsz=%d maxwdsz=%d.\n",
1078 dblk.dbrec.or_language,
1079 dblk.dbrec.or_minwordsz,
1080 dblk.dbrec.or_maxwordsz);
1082 /* Main loop. Each line is a boolean query. */
1083 printf ("Enter an AusText boolean query. 'q' or '.' to quit.\n"
1084 "If first char is '$', words will be stemmed:\n> ");
1086 while (fgets (linebuf, sizeof(linebuf), stdin) != NULL) {
1088 linebuf [sizeof(linebuf) - 1] = 0;
1089 if (strcmp (linebuf, ".\n") == 0)
1091 if (strcmp (linebuf, "q\n") == 0)
1093 if (linebuf[0] == '\n')
1095 linebuf [strlen(linebuf) - 1] = 0; /* overlay \n */
1097 if (linebuf[0] == '$') {
1098 usrblk.query = linebuf + 1;
1099 usrblk.request = OE_SRCH_STEMS;
1102 usrblk.query = linebuf;
1103 usrblk.request = OE_SRCH_WORDS;
1106 if (!boolean_parse())
1107 puts (PROGNAME"707 boolean_parse() returned FALSE (OE_BAD_QUERY).");
1108 if (DtSearchHasMessages()) {
1109 printf ("mmmmm Messages returned to user mmmmmmmmmmmmmmmmmm\n"
1110 "%s\nmmmmm End of messages to user mmmmmmmmmmmmmmmmmmmm\n",
1111 DtSearchGetMessages());
1112 DtSearchFreeMessages();
1115 printf ("--------------------------------\n> ");
1117 } /* main read loop for each query line */
1121 #endif /* TESTBOOL */
1123 /********************* BOOLPARS.C ********************/