2 * COMPONENT_NAME: austext
4 * FUNCTIONS: hilite_cleartext
9 * (C) COPYRIGHT International Business Machines Corp. 1992,1996
11 * Licensed Materials - Property of IBM
12 * US Government Users Restricted Rights - Use, duplication or
13 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
15 /******************************* HILITE.C ********************************
16 * $XConsortium: hilite.c /main/5 1996/05/07 13:36:46 drk $
18 * Opera Engine (OE) functions that create the usrblk.hitwords
19 * array in response to several user requests.
20 * The hitwords array identifies the offset and
21 * length of words or substrings in cleartext to be hilited
22 * by the User Interface (UI).
23 * Formerly the hilite.c module was named oehitw.c
26 * Revision 2.4 1996/03/20 19:33:36 miker
27 * Changed hilite_cleartext() usage to pass in stems array
28 * so code could be reused for collocations searches.
30 * Revision 2.3 1996/02/01 19:06:53 miker
31 * AusText 2.1.11, DtSearch 0.3: Major rewrite for new parsers.
32 * Replaced oe_stems_to_hitwords with hilite_cleartext.
34 * Revision 2.2 1995/10/24 22:03:53 miker
37 * Revision 2.1 1995/09/22 20:53:52 miker
38 * Freeze DtSearch 0.1, AusText 2.1.8
40 * Revision 1.1 1995/08/31 21:02:32 miker
47 #define PROGNAME "HILITE"
51 /****************************************/
53 /* hilite_cleartext */
55 /****************************************/
56 /* Builds usrblk.hitwords array from usrblk.cleartext
57 * and passed stems array. Arg parse_type must
58 * contain 'W' or 'S' to correctly indicate whether
59 * the stems array is loaded with parsed words or stems.
60 * Stems array must be same format at usrblk.stems (double
61 * indexed array with max DtSrMAXWIDTH_HWORD columns per row).
62 * (Formerly this function was named 'build_hitwords',
63 * then 'oe_stems_to_hitwords').
64 * Returns new value of usrblk.hitwords.
65 * If zero hitwords, returns NULL in usrblk.hitwords.
67 long hilite_cleartext (int parse_type, char *stems, int stemcount)
71 DBLK *dblk = usrblk.dblk;
73 char * (*parser)() = usrblk.dblk->parser;
74 char * (*stemmer)() = usrblk.dblk->stemmer;
75 DtSrHitword *hitwords;
76 size_t hitwords_size = 0;
80 int debugging_hilite = (usrblk.debug & USRDBG_HILITE);
82 /* Ensure current usrblk.hitwords values are clear */
85 memset (&parg, 0, sizeof(PARG));
87 parg.string = usrblk.cleartext;
88 parg.offsetp = &offset;
89 parg.flags |= PA_HILITING;
91 /* OUTER LOOP: Parse/stem each word in cleartext */
92 for ( textp = parser (&parg);
94 textp = parser (NULL)) {
95 wordlen = strlen (textp);
96 if (parse_type == 'S')
97 textp = stemmer (textp, dblk);
98 if (debugging_hilite) {
100 PROGNAME"127 offs:%5ld '%s' %n",
103 fputc (' ', aa_stderr);
106 /* INNER LOOP: Search for word in stems array */
108 for (i=stemcount; i; i--) {
109 if (strcmp (textp, stemp) == 0)
111 stemp += DtSrMAXWIDTH_HWORD;
114 /* Miss. If text word is not in
115 * stems array, go parse next word.
118 if (debugging_hilite)
119 fprintf (aa_stderr, "miss.\n");
123 /* HIT! Add to hitwords table. */
124 if (hitwcount >= hitwords_size) {
125 if (hitwords_size == 0) {
128 hitwords_size * sizeof(DtSrHitword) + 16);
131 hitwords_size += hitwords_size >> 1; /* 1.5 times */
132 if (debugging_hilite || (usrblk.debug & USRDBG_RETRVL))
134 PROGNAME"098 realloc for %ld hitwords.\n",
136 hitwords = realloc (hitwords,
137 hitwords_size * sizeof(DtSrHitword) + 16);
140 fputs (PROGNAME"091 Out of Memory!\n", aa_stderr);
144 hitwords[hitwcount].offset = offset;
145 hitwords[hitwcount].length = wordlen;
147 if (debugging_hilite)
148 fprintf (aa_stderr, "HIT! hwct=%ld.\n", hitwcount);
149 } /* end OUTER parse loop */
151 usrblk.hitwcount = hitwcount;
152 usrblk.hitwords = (hitwcount)? hitwords : NULL;
153 if (debugging_hilite || (usrblk.debug & USRDBG_RETRVL))
155 PROGNAME"138 parstyp='%c' stemct=%d hitwcount=%ld\n",
156 parse_type, stemcount, usrblk.hitwcount);
158 } /* hilite_cleartext() */
160 /******************************* HILITE.C ********************************/