2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: boolpars.c /main/5 1996/11/25 18:49:27 drk $
25 * (c) Copyright 1996 Digital Equipment Corporation.
26 * (c) Copyright 1996 Hewlett-Packard Company.
27 * (c) Copyright 1996 International Business Machines Corp.
28 * (c) Copyright 1996 Sun Microsystems, Inc.
29 * (c) Copyright 1996 Novell, Inc.
30 * (c) Copyright 1996 FUJITSU LIMITED.
31 * (c) Copyright 1996 Hitachi.
34 * COMPONENT_NAME: austext
36 * FUNCTIONS: add_syntax_errmsg
55 * (C) COPYRIGHT International Business Machines Corp. 1996
57 * Licensed Materials - Property of IBM
58 * US Government Users Restricted Rights - Use, duplication or
59 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
61 /********************* BOOLPARS.C ********************
62 * $Id: boolpars.c /main/5 1996/11/25 18:49:27 drk $
64 * AusText/DtSearch yacc-based boolean query parser.
65 * Converts boolean query into stems array and truth table
66 * for subsequent search. Boolyac.y is the yacc source.
67 * After processing by yacc, it becomes boolyac.c and boolyac.h.
68 * This module contains all the related C source code: yylex,
69 * yacc action functions, and the main AusText driver function, boolean_parse.
70 * Additional information (format of TRUTHTAB) in header file boolpars.h.
73 * Revision 1.4 1996/03/22 23:12:50 miker
74 * Added string.h header and correctly cast strcspn() calls.
76 * Revision 1.3 1996/03/20 19:14:30 miker
77 * Enable collocation expressions in stem (type 'S') searches.
79 * Revision 1.2 1996/03/13 22:35:59 miker
80 * Changed char to UCHAR several places; similar typecasts.
82 * Revision 1.1 1996/03/05 15:52:06 miker
91 #if (DtSrMAX_STEMCOUNT != 8)
92 #error DtSrMAX_STEMCOUNT is not defined to be 8.
95 #define PROGNAME "BOOLPARS"
96 #define WORD_ENDERS " \t\n\f()|@~&"
97 #define MAX_YYERRORS 4
98 #define MS_boolpars 28
101 /****************************************/
105 /****************************************/
106 int qry_has_no_NOTs = FALSE;
107 int qry_is_all_ANDs = FALSE;
108 TRUTHTAB final_truthtab = { 0 };
109 int parser_invalid_wordcount = 0;
111 static int debugging_boolpars = FALSE;
113 *final_permutes = NULL;
114 static int last_token_was_boolop = TRUE;
115 static char *msgbuf = NULL;
116 static UCHAR *next_lex_char = NULL;
117 static int paren_count = 0;
118 static TRUTHTAB *ttlist = NULL;
119 static int yyerror_count = 0;
120 static size_t yyleng; /* same as in lex API */
121 static char *yytext; /* same as in lex API */
124 /****************************************/
126 /* add_syntax_errmsg */
128 /****************************************/
129 /* Action function called for yacc rules used to trap syntax errors.
130 * Adds error message identified by msgno to user's msglist.
132 void add_syntax_errmsg (int msgno)
136 /* Message #2 is called in two places */
137 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 2,
138 "%s Query field is empty."),
140 DtSearchAddMessage (msgbuf);
144 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 5,
145 "%s Boolean operators must be positioned\n"
146 "between words or expressions. Two sequential words\n"
147 "without an operator are interpreted as being separated\n"
148 "by the AND operator (&)."),
150 DtSearchAddMessage (msgbuf);
154 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 6,
155 "%s Expression in parentheses is missing."),
157 DtSearchAddMessage (msgbuf);
161 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 7,
162 "%s NOT operator (~) must be positioned to\n"
163 "the left of the word or expression it qualifies."),
165 DtSearchAddMessage (msgbuf);
169 /* Message #3 is called in two places */
170 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 3,
171 "%s COLLOCATION operator (@) may\n"
172 "only be positioned between two words."),
174 DtSearchAddMessage (msgbuf);
178 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 4,
179 "%s One or more words in your\n"
180 "query are not stored in database '%s'.") ,
181 PROGNAME"089", usrblk.dblk->label);
182 DtSearchAddMessage (msgbuf);
186 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 8,
187 "%s Invalid boolean query. Syntax Error #%d.") ,
188 PROGNAME"100", msgno);
189 DtSearchAddMessage (msgbuf);
193 } /* add_syntax_errmsg() */
196 /****************************************/
200 /****************************************/
201 /* Constructor for new truth table.
202 * Allocates it, inits it, and links it into ttlist.
204 static TRUTHTAB *creatett (int stemno, int pmsz, unsigned char *permutes)
206 TRUTHTAB *newtt = austext_malloc (sizeof(TRUTHTAB) + pmsz + 4,
207 PROGNAME"140", NULL);
208 memset (newtt, 0, sizeof(TRUTHTAB));
209 newtt->stemno = stemno;
211 newtt->permutes = (unsigned char *) (newtt + 1);
212 memcpy (newtt->permutes, permutes, pmsz);
213 newtt->next = ttlist;
219 /****************************************/
223 /****************************************/
224 /* Destructor of passed truth table.
225 * Unlinks it from ttlist and frees it.
227 static void freett (TRUTHTAB *argtt)
230 TRUTHTAB **lastlink = &ttlist;
231 for (tt = ttlist; tt; tt = tt->next) {
233 *lastlink = tt->next;
237 lastlink = &tt->next;
243 /****************************************/
245 /* copy_final_truthtab */
247 /****************************************/
248 /* Copys passed truth table into global final_truthtab.
249 * Returns final_truthtab.
251 TRUTHTAB *copy_final_truthtab (TRUTHTAB *tt)
253 memset (&final_truthtab, 0, sizeof(TRUTHTAB));
255 final_permutes = austext_malloc (300, PROGNAME"788", NULL);
256 final_truthtab.pmsz = tt->pmsz;
257 final_truthtab.permutes = final_permutes;
258 memcpy (final_permutes, tt->permutes, final_truthtab.pmsz);
259 return &final_truthtab;
260 } /* copy_final_truthtab() */
263 /****************************************/
265 /* get_stem_truthtab */
267 /****************************************/
268 /* Subroutine of yylex. Also used in yacc action functions.
269 * Creates and returns truth table for passed stem.
270 * If stem is new, adds it to saveusr.stems array, and adds
271 * the original query word string to usrblk.stems for msgs.
272 * Returns NULL and posts err msg if array is full
273 * or has other error.
275 static TRUTHTAB *get_stem_truthtab (char *newstem, char *origword)
278 unsigned char bitmask;
280 unsigned char new_permutes [128];
283 /* Check if stem is already in array */
284 for (stemno = 0; stemno < saveusr.stemcount; stemno++)
285 if (strcmp (newstem, saveusr.stems[stemno]) == 0)
288 /* Add new stem to array */
289 if (stemno == saveusr.stemcount) {
290 if (++saveusr.stemcount > DtSrMAX_STEMCOUNT) {
291 sprintf (msgbuf, catgets (dtsearch_catd, MS_boolpars, 9,
292 "%s Too many terms in boolean query."),
294 DtSearchAddMessage (msgbuf);
298 strncpy (saveusr.stems[stemno], newstem, DtSrMAXWIDTH_HWORD);
299 saveusr.stems [stemno] [DtSrMAXWIDTH_HWORD - 1] = 0;
301 strncpy (usrblk.stems[stemno], origword, DtSrMAXWIDTH_HWORD);
302 usrblk.stems [stemno] [DtSrMAXWIDTH_HWORD - 1] = 0;
306 /* Stemno now indicates correct term in saveusr.stems.
307 * Truth table for a single term has 128 8-bit permutes,
308 * the 1/2 of all 256 possible permutations that have
309 * that term's bit switched on.
311 bitmask = 1 << stemno; /* mask with only newstem's bit on */
313 for (i=0; i<256; i++)
314 if ((i & bitmask) != 0) {
318 newtt = creatett (stemno, 128, new_permutes);
319 if (debugging_boolpars) {
320 fprintf (aa_stderr, " WORD: stem[%d]='%c%s' expr=%p pmsz=%d\n",
322 (saveusr.stems[stemno][0] == STEM_CH) ?
323 '~' : saveusr.stems[stemno][0],
324 &saveusr.stems[stemno][1],
325 (void *) newtt, newtt->pmsz);
329 } /* get_stem_truthtab() */
332 /****************************************/
336 /****************************************/
337 /* Action function for AND expression rule.
338 * Returns set INTERSECTION of passed truth tables,
339 * ie only the permutes they have in common.
340 * Any truth table, input or output, can be the empty or
341 * the universal set. For example: "(A & B) & ~A" is empty.
343 TRUTHTAB *boolyac_AND (TRUTHTAB *tt1, TRUTHTAB *tt2) {
345 unsigned char new_permutes [256];
348 pm1 = pm2 = newpm = 0;
349 while (pm1 < tt1->pmsz && pm2 < tt2->pmsz) {
350 if (tt1->permutes[pm1] < tt2->permutes[pm2])
352 else if (tt1->permutes[pm1] > tt2->permutes[pm2])
355 new_permutes [newpm++] = tt1->permutes [pm1];
361 /* Free old truthtabs, create new one. */
364 newtt = creatett (-1, newpm, new_permutes);
365 if (debugging_boolpars) {
366 fprintf (aa_stderr, " AND: exprs=%p,%p-->expr=%p pmsz=%d\n",
367 (void *) tt1, (void *) tt2, (void *) newtt, newtt->pmsz);
371 } /* boolyac_AND() */
374 /****************************************/
378 /****************************************/
379 /* Action function for OR expression rule.
380 * Returns set UNION of passed truth tables.
381 * Any truth table, input or output, can be the empty or
382 * the universal set. For example: "A | ~A" is universal.
384 TRUTHTAB *boolyac_OR (TRUTHTAB *tt1, TRUTHTAB *tt2) {
386 unsigned char new_permutes [256];
387 unsigned char *permutes1 = tt1->permutes;
388 unsigned char *permutes2 = tt2->permutes;
391 pm1 = pm2 = newpm = 0;
393 /* While neither permutes array is exhausted... */
394 while (pm1 < tt1->pmsz && pm2 < tt2->pmsz) {
395 if (permutes1[pm1] < permutes2[pm2])
396 new_permutes [newpm++] = permutes1[pm1++];
397 else if (permutes2[pm2] < permutes1[pm1])
398 new_permutes [newpm++] = permutes2[pm2++];
400 new_permutes [newpm++] = permutes1[pm1++];
404 /* After one or both permutes arrays are exhausted... */
405 while (pm1 < tt1->pmsz)
406 new_permutes [newpm++] = permutes1[pm1++];
407 while (pm2 < tt2->pmsz)
408 new_permutes [newpm++] = permutes2[pm2++];
410 /* Free old truthtabs, create new one. */
413 newtt = creatett (-1, newpm, new_permutes);
414 if (debugging_boolpars) {
415 fprintf (aa_stderr, " OR: exprs=%p,%p-->expr=%p pmsz=%d\n",
416 (void *) tt1, (void *) tt2, (void *) newtt, newtt->pmsz);
423 /****************************************/
427 /****************************************/
428 /* Action function for NOT expression rule.
429 * Returns set COMPLEMENT of passed truth table,
430 * ie the universal set minus the passed set,
431 * ie all possible permutes except those passed.
432 * Either the old or the new truth table can be
433 * the empty or the universal set.
435 TRUTHTAB *boolyac_NOT (TRUTHTAB *oldtt) {
437 unsigned char new_permutes [256];
442 for (candidate = 0; candidate < 256; candidate++) {
443 if (oldpm >= oldtt->pmsz || candidate < oldtt->permutes [oldpm]) {
444 new_permutes [newpm++] = candidate;
447 * oldtt not done && candidate == oldtt.
448 * (candidate > oldtt not possible).
455 newtt = creatett (-1, newpm, new_permutes);
456 if (debugging_boolpars) {
457 fprintf (aa_stderr, " NOT: expr=%p-->expr=%p pmsz=%d\n",
458 (void *) oldtt, (void *) newtt, newtt->pmsz);
462 } /* boolyac_NOT() */
465 /****************************************/
469 /****************************************/
470 /* Action function for COLLOCATION expression rule.
471 * The record set satisfying a collocation expression is
472 * generated dynamically. At the parse level it is equivalent
473 * to a separate 'word' with its own (undetermined) record set.
474 * So it's given its own slot in saveusr.stems. The word
475 * in saveusr.stems is formatted "@ssttv[v...]" where ss and tt are
476 * ascii numbers that index the original collocated words
477 * in saveusr.stems, and v... is the collocation value integer.
478 * For example, "@03005" represents the collocation of stem
479 * number 3 and stem number 0, with collocation value 5.
481 * Returns NULL and errmsg on msglist if any problems.
483 TRUTHTAB *boolyac_COLLOC (
489 char wordbuf [DtSrMAXWIDTH_HWORD];
491 if (word1tt->stemno < 0 || word2tt->stemno < 0) {
492 /* Message #3 is called in two places */
493 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 3,
494 "%s COLLOCATION operator (@) may\n"
495 "only be positioned between two words."),
497 DtSearchAddMessage (msgbuf);
500 if (word1tt->stemno == word2tt->stemno) {
501 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 12,
502 "%s Collocation operator is not\n"
503 "permitted between identical words."),
505 DtSearchAddMessage (msgbuf);
508 sprintf (wordbuf, COLLOC_STEM_FORMAT,
509 word1tt->stemno, word2tt->stemno, colloc_val);
510 if ((newtt = get_stem_truthtab (wordbuf, wordbuf)) == NULL)
514 if (debugging_boolpars) {
515 fprintf (aa_stderr, " COLLOC: exprs=%p,%p-->expr=%p pmsz=%d\n",
516 (void *) word1tt, (void *) word2tt, (void *) newtt, newtt->pmsz);
520 } /* boolyac_COLLOC() */
523 /****************************************/
527 /****************************************/
528 /* Replaces standard yacc error routine. */
529 void yyerror (char *msg) {
530 if (strcmp (msg, "syntax error") == 0) {
531 if (DtSearchHasMessages())
533 else if (parser_invalid_wordcount > 0)
534 add_syntax_errmsg(6);
536 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 1,
537 "%s Your search string is an invalid\n"
538 "boolean query. Please reformulate and try again."),
540 DtSearchAddMessage (msgbuf);
544 DtSearchAddMessage (msg);
549 /****************************************/
553 /****************************************/
554 /* Subroutine of yylex(). Copies passed substring
555 * Into a zero-terminated buffer of its own.
556 * Static buffer good until next call.
558 static char *copy_token (UCHAR *tokenp, size_t toklen)
560 static char *buf = NULL;
561 static size_t bufsz = 0;
562 if (toklen > bufsz) {
565 bufsz = toklen + (toklen >> 1); /* 1.5 times size needed */
566 buf = austext_malloc (bufsz + 4, PROGNAME"182", NULL);
568 strncpy (buf, (char *) tokenp, toklen);
574 /****************************************/
578 /****************************************/
579 /* Delivers tokens to yyparse() from usrblk.query */
585 char mystembuf [DtSrMAXWIDTH_HWORD + 4];
589 /* Skip white space */
590 while (ascii_charmap[*next_lex_char] & WHITESPACE)
593 /* Terminating zero indicates end of query and end of parse.
594 * Automatically close unbalanced parentheses.
596 if (*next_lex_char == 0) {
597 if (paren_count > 0) {
610 switch (*next_lex_char) {
611 case '|': /* OR operator */
612 last_token_was_boolop = TRUE;
619 case '~': /* NOT operator */
620 if (!last_token_was_boolop) {
621 /* Generate implied AND between words
622 * and parenthesized expressions.
623 * A NOT is not itself boolean; it must
624 * precede the next word or expression.
626 last_token_was_boolop = TRUE;
632 last_token_was_boolop = TRUE;
639 case '&': /* AND operator */
640 if (last_token_was_boolop && qry_is_all_ANDs) {
641 /* Ignore multiple AND operators.
642 * These might occur if we silently
643 * discarded some invalid words.
646 goto GET_ANOTHER_TOKEN;
648 last_token_was_boolop = TRUE;
655 case '(': /* OPEN parentheses */
656 if (!last_token_was_boolop) {
657 /* Generate implied AND between words
658 * and parenthesized expressions.
660 last_token_was_boolop = TRUE;
673 case ')': /* CLOSE parentheses */
674 /* Just discard excessive right parentheses */
675 if (--paren_count < 0) {
678 goto GET_ANOTHER_TOKEN;
680 last_token_was_boolop = FALSE;
687 case '@': /* COLLOCATION operator */
688 /* Collocation token:
689 * Token is defined as the collocation char followed
690 * by one or more numeric digits: "@#[#...]".
691 * Syntactically it's a kind of an AND operator.
692 * Semantically it's a pseudo word token
693 * (it will occupy a slot in the stems array).
694 * The yylval is the integer value following
695 * the collocation character.
697 yyleng = strcspn ((char *) next_lex_char + 1, WORD_ENDERS) + 1;
698 yytext = copy_token (next_lex_char, yyleng);
699 next_lex_char += yyleng;
701 if ((usrblk.dblk->dbrec.or_dbaccess & ORA_BLOB) == 0) {
702 retn_token = ERROR_TOKEN;
703 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 10,
704 "%s Collocation searches not available for database '%s'."),
705 PROGNAME"2567", usrblk.dblk->label);
706 DtSearchAddMessage (msgbuf);
709 yylval.int_val = atoi (yytext + 1);
710 if (yylval.int_val <= 0) {
711 retn_token = ERROR_TOKEN;
712 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 11,
713 "%s Collocation operator '%.*s' is invalid.\n"
714 "Correct format is '@n' where n is greater than zero.") ,
715 PROGNAME"294", DtSrMAXWIDTH_HWORD, yytext);
716 DtSearchAddMessage (msgbuf);
719 last_token_was_boolop = TRUE;
720 retn_token = COLLOC_TOKEN;
725 /* Presumed word token:
726 * Token is all text chars until next whitespace,
727 * next lex token, or end of string.
728 * Linguistically parse it and optionally stem it.
729 * The token value is the truth table for one
730 * word: all permutes with only that word's
731 * bits turned on. If the word is already
732 * in the stems array, then the permutes
733 * position is the word's index in the array.
734 * If the word is not in the array, it's added.
735 * If the array is full, then an error is reported.
737 if (!last_token_was_boolop) {
738 /* Generate implied AND between words
739 * and parenthesized expressions.
741 last_token_was_boolop = TRUE;
747 yyleng = strcspn ((char *) next_lex_char, WORD_ENDERS);
748 yytext = copy_token (next_lex_char, yyleng);
749 next_lex_char += yyleng;
751 * Linguistically parse the token.
752 * Failure can occur because word is too short
753 * or too long, it's on the stoplist, etc.
754 * Setting PA_MSGS causes parser to explain
755 * invalid words with a msg.
757 memset (&parg, 0, sizeof(PARG));
758 parg.dblk = usrblk.dblk;
759 parg.string = yytext;
760 /*****if (!qry_is_all_ANDs)********/
761 parg.flags = PA_MSGS;
762 stembufp = usrblk.dblk->parser (&parg);
763 if (debugging_boolpars) {
764 fprintf (aa_stderr, " lang: '%s' -> '%s'\n",
765 yytext, (stembufp)? stembufp : "<null>");
769 * If token is not a linguistically valid word,
770 * one of two things can happen. If the query
771 * is all_ANDs (most common type) we silently
773 * Otherwise report error and quit now.
775 if (stembufp == NULL) {
776 parser_invalid_wordcount++;
778 goto GET_ANOTHER_TOKEN;
779 retn_token = ERROR_TOKEN;
780 if (!DtSearchHasMessages()) {
781 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 13,
782 "%s Word '%.*s' is invalid.") ,
783 PROGNAME"315", DtSrMAXWIDTH_HWORD, yytext);
784 DtSearchAddMessage (msgbuf);
788 if (strlen(stembufp) != strlen(yytext)) {
789 retn_token = ERROR_TOKEN;
790 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 14,
791 "%s String '%.*s' is not a single word.") ,
792 PROGNAME"634", DtSrMAXWIDTH_HWORD, yytext);
793 DtSearchAddMessage (msgbuf);
797 * If stemming, we must prefix term with
798 * special stem char in the stems array.
800 if (usrblk.request == OE_SRCH_STEMS) {
801 stembufp = usrblk.dblk->stemmer (stembufp, usrblk.dblk);
802 if (debugging_boolpars) {
803 fprintf (aa_stderr, " stemer: -> '%s'\n", stembufp);
806 mystembuf[0] = STEM_CH;
807 strncpy (mystembuf + 1, stembufp, DtSrMAXWIDTH_HWORD);
808 mystembuf [DtSrMAXWIDTH_HWORD - 1] = 0;
809 stembufp = mystembuf;
812 /* Load stem into stems arrays and return it's truth table. */
813 if ((yylval.truthtab = get_stem_truthtab (stembufp, yytext))) {
814 retn_token = WORD_TOKEN;
815 last_token_was_boolop = FALSE;
818 retn_token = ERROR_TOKEN;
821 } /* switch on *next_lex_char */
824 if (debugging_boolpars) {
826 " yylex: op?=%d parct=%d tok#=%d lval=%p%sYYTEXT='%s'\n",
827 last_token_was_boolop, paren_count,
828 retn_token, (void *) yylval.truthtab,
829 (retn_token == COLLOC_TOKEN)? "\t\t" : "\t",
838 /****************************************/
842 /****************************************/
843 /* Called from Opera_Engine for boolean searches.
844 * Driver for yyparse().
845 * Expects usrblk.request == OE_SRCH_STEMS or OE_SRCH_WORDS.
846 * If parse is completely successful (query is valid), outputs
848 * saveusr.stems (stemmed if necessary with STEM_CH as first char,
849 * and phony colloc words with '@' as first char),
850 * usrblk.stems (original unstemmed query terms for err msgs),
854 * and returns TRUE. Truthtab allocation good until next call.
855 * If parse fails, returns FALSE and err msg(s) on msglist.
857 int boolean_parse (void)
861 TRUTHTAB *tt, *ttnext;
863 debugging_boolpars = (usrblk.debug & USRDBG_BOOL);
865 msgbuf = austext_malloc (300 + DtSrMAXWIDTH_HWORD,
866 PROGNAME"255", NULL);
868 /* Test for empty query */
869 if (usrblk.query == NULL) {
871 /* Message #2 is called in two places */
872 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 2,
873 "%s Query is empty."), PROGNAME"289");
874 DtSearchAddMessage (msgbuf);
877 for (cptr = usrblk.query; *cptr; cptr++) {
878 if ((ascii_charmap[*cptr] & WHITESPACE) == 0)
884 /* Init globals for yylex and yyparse */
885 next_lex_char = (UCHAR *) usrblk.query;
888 last_token_was_boolop = TRUE;
889 saveusr.stemcount = 0;
890 parser_invalid_wordcount = 0;
892 /* Query "is all ANDS" if it has no ORs, NOTs, or COLLOCs.
893 * Missing or linguistically invalid words will be silently
894 * discarded for all_ANDs queries.
895 * Query "has no NOTs" if it has no NOTs.
896 * Results from queries without NOTs can be statistically sorted.
898 qry_has_no_NOTs = !strchr (usrblk.query, '~');
899 qry_is_all_ANDs = !strpbrk (usrblk.query, "|~@");
901 if (debugging_boolpars || (usrblk.debug & USRDBG_SRCHCMPL)) {
903 "start boolean_parse: stem?=%d allANDs?=%d noNOTs?=%d\n"
905 (usrblk.request == OE_SRCH_STEMS),
906 qry_is_all_ANDs, qry_has_no_NOTs, usrblk.query);
913 /* Free entire remaining ttlist. Only you
914 * can prevent forest fires and memory leaks.
924 if (debugging_boolpars || (usrblk.debug & USRDBG_SRCHCMPL)) {
925 print_stems (saveusr.stemcount, saveusr.stems,
926 PROGNAME"815 end boolean_parse, syntax ok,");
927 fprintf (aa_stderr, " permutes=%d:", final_truthtab.pmsz);
928 for (i=0; i<16; i++) {
929 if (i >= final_truthtab.pmsz)
931 fprintf (aa_stderr, " %02x", final_truthtab.permutes [i]);
933 fputc ('\n', aa_stderr);
937 if (final_truthtab.pmsz <= 0) {
938 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 15,
939 "%s Your query cannot logically return\n"
940 "any records. Please reformulate and try again."),
942 DtSearchAddMessage (msgbuf);
945 if (final_truthtab.pmsz >= 256) {
946 sprintf (msgbuf, catgets(dtsearch_catd, MS_boolpars, 16,
947 "%s Your query will return entire database\n"
948 "'%s'. Please reformulate and try again.") ,
949 PROGNAME"341", usrblk.dblk->label);
950 DtSearchAddMessage (msgbuf);
954 } /* boolean_parse() */
957 #ifdef TESTBOOL /*-----------------------------------------------*/
959 USRBLK usrblk = { 0 };
961 SAVEUSR saveusr = { 0 };
962 extern int debugging_teskey;
963 extern int debugging_paice;
964 extern int debugging_jpn;
966 /****************************************/
968 /* process_user_args */
970 /****************************************/
971 /* Subroutine of main(). Validates and loads global
972 * variables with values from command line arguments.
974 static void process_user_args (int argc, char *argv[])
982 /* Each pass grabs new parm of "-xxx" format */
990 if (argptr[2] == 'x')
991 dblk.dbrec.or_maxwordsz = atoi (argptr + 3);
992 else if (argptr[2] == 'n')
993 dblk.dbrec.or_minwordsz = atoi (argptr + 3);
999 dblk.dbrec.or_language = atoi (argptr + 2);
1003 for (cptr = argptr+2; *cptr != 0; cptr++) {
1005 case 't': debugging_teskey = TRUE; break;
1006 case 'p': debugging_paice = TRUE; break;
1007 case 'j': debugging_jpn = TRUE; break;
1011 "%s Invalid debug option %c.\a\n",
1012 PROGNAME"049", *cptr);
1022 "%s Invalid command line argument '%s'.\a\n",
1023 PROGNAME"059", argptr);
1028 } /* main loop on each arg */
1033 "\nUSAGE: %s [options]\n"
1034 " -mx# maximum word size.\n"
1035 " -mn# minimum word size.\n"
1036 " -dtpj Debug: Teskey, Paice, Japanese.\n"
1037 " -l# language number. Default 0.\n",
1042 } /* process_user_args() */
1045 /****************************************/
1049 /****************************************/
1050 int main (int argc, char *argv[])
1055 char linebuf [1024];
1057 /* Init global variables */
1060 memset (&usrblk, 0, sizeof(USRBLK));
1061 usrblk.dblk = &dblk;
1062 usrblk.debug |= USRDBG_BOOL; /* set debugging_boolpars */
1064 memset (&dblk, 0, sizeof(DBLK));
1065 strcpy (dblk.name, "testbool");
1066 dblk.label = dblk.name;
1067 dblk.dbrec.or_dbaccess |= ORA_BLOB; /* enable collocations */
1069 /* Read command line args */
1070 process_user_args (argc, argv);
1072 if (!load_language (&dblk, NULL)) {
1074 PROGNAME"140 load_language() failed. Msgs:\n%s\n",
1075 DtSearchGetMessages());
1078 fprintf (aa_stderr, " lang=%d minwdsz=%d maxwdsz=%d.\n",
1079 dblk.dbrec.or_language,
1080 dblk.dbrec.or_minwordsz,
1081 dblk.dbrec.or_maxwordsz);
1083 /* Main loop. Each line is a boolean query. */
1084 printf ("Enter an AusText boolean query. 'q' or '.' to quit.\n"
1085 "If first char is '$', words will be stemmed:\n> ");
1087 while (fgets (linebuf, sizeof(linebuf), stdin) != NULL) {
1089 linebuf [sizeof(linebuf) - 1] = 0;
1090 if (strcmp (linebuf, ".\n") == 0)
1092 if (strcmp (linebuf, "q\n") == 0)
1094 if (linebuf[0] == '\n')
1096 linebuf [strlen(linebuf) - 1] = 0; /* overlay \n */
1098 if (linebuf[0] == '$') {
1099 usrblk.query = linebuf + 1;
1100 usrblk.request = OE_SRCH_STEMS;
1103 usrblk.query = linebuf;
1104 usrblk.request = OE_SRCH_WORDS;
1107 if (!boolean_parse())
1108 puts (PROGNAME"707 boolean_parse() returned FALSE (OE_BAD_QUERY).");
1109 if (DtSearchHasMessages()) {
1110 printf ("mmmmm Messages returned to user mmmmmmmmmmmmmmmmmm\n"
1111 "%s\nmmmmm End of messages to user mmmmmmmmmmmmmmmmmmmm\n",
1112 DtSearchGetMessages());
1113 DtSearchFreeMessages();
1116 printf ("--------------------------------\n> ");
1118 } /* main read loop for each query line */
1122 #endif /* TESTBOOL */
1124 /********************* BOOLPARS.C ********************/