2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
26 * FUNCTIONS: findstr_workproc
31 * restore_findstr_hitl
35 * IBM CONFIDENTIAL -- (IBM Confidential Restricted when
36 * combined with the aggregated modules for this product)
37 * OBJECT CODE ONLY SOURCE MATERIALS
39 * (C) COPYRIGHT International Business Machines Corp. 1992,1995
41 * US Government Users Restricted Rights - Use, duplication or
42 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
44 /******************************* OEKWIC.C ********************************
45 * $XConsortium: oekwic.c /main/4 1996/05/07 13:42:36 drk $
47 * Opera Engine (OE) functions that create KeyWord In Context (KWIC)
48 * abstracts to replace database abstract when requested by user.
49 * KWIC abstract is a string extracted
50 * from the cleartext where the first hitword appears.
51 * Also includes find-string functions which use similar logic.
52 * Also includes a few generic OE utilities.
55 * Revision 1.6 1995/09/05 19:02:02 miker
56 * Made usrblk universal global. Deleted refs to socblk.
57 * Other minor name and function changes for DtSearch.
63 /******#define DEBUG_KWIC*********/
64 /********#define DEBUG_FINDSTR_ITER***********/
65 /******#define DEBUG_FINDSTR********/
67 #define PROGNAME "OEKWIC"
68 #define START_KWIC_ITERATIONS 10
69 #define START_FINDSTR_ITERATIONS 10
70 #define MIN_KWIC_ITERATIONS 2
71 #define MIN_FINDSTR_ITERATIONS 2
72 #define MIN_KWIC_BUFSZ 20
74 static int found_one_substring = FALSE;
76 /************************************************/
80 /************************************************/
81 /* Builds abstract for record in usrblk.cleartext to
82 * KWIC string where first word/substring in
83 * usrblk.hitwords array is in center of abstract string.
84 * Returns new abstract in passed buffer (in a ditto list).
85 * General format of new abstract is: "...text <<word>> text...".
87 static void make_one_kwic (char *abstract)
91 long from; /* offset from beginning of cleartext */
95 abstrsz = usrblk.dblk->dbrec.or_abstrsz;
96 if (usrblk.kwiclen > 0 && usrblk.kwiclen < abstrsz)
97 abstrsz = usrblk.kwiclen;
99 lastto = to + abstrsz - 2;
101 /* No hitwords to abstract */
102 if (usrblk.hitwcount <= 0L)
105 /* Find beginning of 'from' string */
106 from = usrblk.hitwords[0].offset -
107 ((abstrsz - usrblk.hitwords[0].length - 14) >> 1L);
108 if (from < 0L) from = 0L;
110 /* If abstract doesnt begin at start of cleartext, print ellipsis */
111 if (from != 0L) for (i=3; i>0; i--)
114 /* Move text up to the start of the word.
115 * Replace cntrl chars with single space.
117 while (from < usrblk.hitwords[0].offset)
119 c = usrblk.cleartext [from++];
120 *to++ = (iscntrl(c))? ' ' : c;
123 /* Move the word itself, hilited with angle brackets */
126 for (i=0; i<usrblk.hitwords[0].length; i++)
127 *to++ = usrblk.cleartext [from++];
131 /* Move text beyond the word until end of input or end of abstract buffer */
132 while (from < usrblk.clearlen && to < lastto - 4)
134 c = usrblk.cleartext [from++];
135 *to++ = (iscntrl(c))? ' ' : c;
138 /* If not end of input, print another ellipsis */
139 if (from < usrblk.clearlen) for (i=3; i>0; i--) *to++ = '.';
144 printf("%s\n", abstract);
148 } /* make_one_kwic() */
151 /************************************************/
155 /************************************************/
156 /* Converts all abstracts in dittolist to KWIC strings */
157 void oe_ditto2kwic (void)
159 void kwic_workproc (void);
160 int search_type = usrblk.search_type;
162 /* First validate the input fields in usrblk */
163 if (usrblk.dittocount <= 0L || usrblk.stemcount <= 0)
165 usrblk.retncode = OE_BAD_QUERY;
169 /* Set up various global variables for calls to oe_stems_to_hitwords().
170 * Only an exact words search (search_type == 'W') does not require
171 * stemming. Statistical, semantic and exact stems searches
172 * do require stemming.
174 OE_kind_of_stems = (search_type == 'W') ? WORD_KIND : STEM_KIND;
176 /* Initialize where user loop status maintained between workproc calls */
177 saveusr.dittolist = usrblk.dittolist;
178 saveusr.iterations = START_KWIC_ITERATIONS;
180 /* Call the work procedure that traverses the entire dittolist.
181 * If NO_ITERATE is specified, it will run to completion.
182 * Otherwise it will run just its first set of iterations,
183 * move its own address into usrblk.workproc, then return.
184 * Until workproc/mainloop is done, the static variable 'saveusr.dittolist'
185 * will always indicate where the last execution of the loop ended.
187 usrblk.flags &= ~USR_STOPSRCH; /* init stop button to OFF */
188 kwic_workproc(); /* work proc will set retncode */
190 } /* oe_ditto2kwic() */
193 /************************************************/
197 /************************************************/
198 /* called repeatedly to convert abstracts in dittolist to KWIC strings */
199 void kwic_workproc (void)
202 int entire_list_done;
209 /* test whether user has pushed STOP button since last call */
210 if (usrblk.flags & USR_STOPSRCH)
212 usrblk.retncode = OE_USER_STOP;
217 printf ("\nSTART ITERATIONS = %d\n", saveusr.iterations);
220 /* initialize the loop */
221 dit = saveusr.dittolist;
222 entire_list_done = FALSE;
225 /* Traverse entire dittolist. Unblob each record,
226 * create a hitword list, then use it to convert abstract.
228 for (iter = saveusr.iterations; iter > 0; )
230 /* Set usrblk.dblk ptr to correct database for curr ditto node */
231 if (dbn_to_dblk (dit->dbn))
232 saveusr.vistano = usrblk.dblk->vistano;
235 usrblk.retncode = OE_NOTAVAIL;
239 /* If this record's database has too small an
240 * abstract buffer, just skip the conversion.
242 if (usrblk.dblk->dbrec.or_abstrsz < MIN_KWIC_BUFSZ)
245 /* Skip any records which do not have blobs. */
246 if (usrblk.dblk->dbrec.or_dbaccess != ORA_BLOB)
249 if ((bloblist = ve_getblobs (dit->dba, saveusr.vistano)) == NULL)
251 sprintf (sprintbuf, PROGNAME"149 "
252 "Corrupted database address on hitlist. "
253 "Erroneous record: '%s' in database '%s'.",
254 dit->reckey, usrblk.dblk->label);
255 DtSearchAddMessage (sprintbuf);
256 usrblk.retncode = OE_NOTAVAIL;
259 usrblk.retncode = oe_unblob (bloblist, FALSE);
260 if (usrblk.retncode != OE_OK)
262 sprintf (sprintbuf, PROGNAME"213 "
263 "Erroneous record: '%s' in database '%s'.",
264 dit->reckey, usrblk.dblk->label);
265 DtSearchAddMessage (sprintbuf);
268 if (!oe_stems_to_hitwords (1))
270 make_one_kwic (dit->abstractp);
272 /* Increment ditto pointer. Check for end of dittolist. */
274 if ((dit = dit->link) == NULL)
276 entire_list_done = TRUE;
280 /* Decrement iteration counter unless user said not to */
281 if (!(usrblk.flags & USR_NO_ITERATE))
283 } /* end iteration loop */
285 if (usrblk.debug & (USRDBG_SRCHCMPL | USRDBG_HITLIST))
286 print_dittolist (saveusr.dittolist, PROGNAME"888");
288 /* End of current set of iterations.
289 * If main loop is not completed,
290 * adjust number of iterations to about 1 second,
291 * save current status, and return to caller.
293 if (!entire_list_done)
295 time_dif = difftime (time(NULL), start_time);
297 saveusr.iterations = (float) saveusr.iterations * 1.5;
298 else if (time_dif > 1.)
299 saveusr.iterations = (double) saveusr.iterations / time_dif;
300 if (saveusr.iterations < MIN_KWIC_ITERATIONS)
301 saveusr.iterations = MIN_KWIC_ITERATIONS;
304 printf ("\nEND ITERATIONS = %d, time_dif = %lf\n",
305 saveusr.iterations, time_dif);
308 saveusr.dittolist = dit; /* temp save curr loc in usrblk.dittolist */
309 usrblk.workproc = kwic_workproc;
310 usrblk.retncode = OE_SEARCHING;
312 } /* endif where we are still searching */
314 /* Unless user specified no_iterate, calling program should always
315 * reset workproc to NULL. Dummy_workproc just appends an
316 * error message to notify programmer of his problem.
318 if (!(usrblk.flags & USR_NO_ITERATE))
319 usrblk.workproc = dummy_workproc;
321 usrblk.retncode = OE_OK;
323 } /* kwic_workproc() */
326 /************************************************/
328 /* oe_findstr_hitl */
330 /************************************************/
331 /* Reduces dittolist to the subset of records that
332 * contain string in query, and converts their
333 * abstracts to a KWIC-like string.
335 void oe_findstr_hitl (void)
337 void findstr_workproc (void);
341 /* Validate input fields */
342 if (usrblk.query == NULL || usrblk.query[0] == '\0')
344 usrblk.retncode = OE_BAD_QUERY;
347 if (strlen(usrblk.query) >= DtSrMAXWIDTH_HWORD)
349 sprintf (msgbuf, PROGNAME"516 "
350 "No more than %d characters are allowed in the search string.",
351 DtSrMAXWIDTH_HWORD - 1);
352 DtSearchAddMessage (msgbuf);
353 usrblk.retncode = OE_BAD_QUERY;
356 if (usrblk.dittocount <= 0 || usrblk.dittolist == NULL)
358 DtSearchAddMessage (PROGNAME"317 Hitlist empty. Nothing to search.");
359 usrblk.retncode = OE_BAD_HITLIST;
363 /* Save stems array so we can restore it later,
364 * then copy the query to it for use by oe_stems_to_hitwords call.
365 * Convert to uppercase as we copy.
367 saveusr.stemcount = usrblk.stemcount;
368 memcpy (saveusr.stems, usrblk.stems,
369 (size_t) (usrblk.stemcount * DtSrMAXWIDTH_HWORD));
370 usrblk.stemcount = 1;
371 ptr = usrblk.stems[0]; /* target */
372 ptr2 = usrblk.query; /* source */
374 *ptr++ = toupper(*ptr2++);
377 OE_kind_of_stems = STRING_KIND;
379 /* initialize saveusr loop status stuff maintained between workproc calls */
380 saveusr.vistano = usrblk.dblk->vistano;
381 saveusr.iterations = START_FINDSTR_ITERATIONS;
382 saveusr.dittolist = usrblk.dittolist; /* curr start of each iteration */
383 saveusr.dittocount = 0; /* number of records containing substring */
385 /* Call the work procedure that traverses the entire dittolist.
386 * If NO_ITERATE is specified, it will run to completion.
387 * Otherwise it will run just its first set of iterations,
388 * move its own address into usrblk.workproc, then return.
389 * Until workproc/mainloop is done, the static variable 'saveusr.dittolist'
390 * will always indicate where the last execution of the loop ended.
391 * The ditto node for each record which is found to contain
392 * the string will be removed from dittolist and added to newditlist.
394 usrblk.flags &= ~USR_STOPSRCH; /* init stop button to OFF */
395 found_one_substring = FALSE; /* init HIT indicator */
396 findstr_workproc(); /* work proc will set retncode */
398 } /* oe_findstr_hitl() */
401 /************************************************/
403 /* restore_findstr_hitl */
405 /************************************************/
406 /* Each time the find-string workproc detected the search
407 * string in a ditto node, it converted its abstract.
408 * Other nodes were just marked for deletion.
409 * When the entire dittolist has been traversed,
410 * or if the user pushes the stop button to stop traversal,
411 * this cleanup function is called to delete those nodes that
412 * were marked for deletion. However if NO records were
413 * ever found containing the string, nothing is deleted and
414 * the hitlist is restored as it was prior to the beginning of the search.
415 * The hitwords array is always blown away but the stems
416 * array is always restored no matter what.
418 static void restore_findstr_hitl (void)
420 DtSrResult *dit, *nextdit;
421 DtSrResult **lastlink;
422 long newdittocount = 0L;
424 /* If the string was ever found in any record,
425 * delete all nonhits up to the last successful find.
428 printf ("\nRESTORE FINDSTR: totnumhits = %ld\n", saveusr.dittocount);
430 if (saveusr.dittocount > 0)
432 dit = usrblk.dittolist;
433 lastlink = &usrblk.dittolist;
436 /* On a hit, advance the pointers, advance new dittocount.
437 * Break the loop on the last hit.
439 if (dit->flags & DIT_FINDSTR)
442 printf ("#%ld HIT %s, \"%s\"\n", newdittocount+1,
443 dit->reckey, dit->abstract);
445 lastlink = &dit->link;
447 if (++newdittocount >= saveusr.dittocount)
450 /* If this was NOT a hit, delete node,
451 * and link up loose ends.
456 printf ("RESTORE DELETING %s\n", dit->reckey);
465 /* At this point, we've cleaned up the list
466 * down to the last hit, or to its end.
467 * Keep deleting until end of list, or user's stop point.
471 if (!(usrblk.flags & USR_STOPSRCH) /* never stopped */
472 || !(dit->flags & DIT_STOP)) /* stopped somewhere ahead */
475 printf ("PAST LAST HIT, DELETING %s\n", dit->reckey);
484 break; /* user stopped at this exact node */
488 /* If there's anything left on the list,
489 * its because user stopped the search here.
490 * Just count the remaining records for the final tally.
496 printf ("#%ld SAVING AFTER STOP %s\n", newdittocount, dit->reckey);
500 usrblk.dittocount = newdittocount;
501 } /* endif where at least one record had a string hit */
503 /* restore the original stems array */
504 usrblk.stemcount = saveusr.stemcount;
505 memcpy (usrblk.stems, saveusr.stems,
506 (size_t) (saveusr.stemcount * DtSrMAXWIDTH_HWORD));
508 printf ("LEAVING RESTORE now real dittocount = %ld, stemcount = %d\n"
509 " first stem = '%s'\n", usrblk.dittocount,
510 usrblk.stemcount, usrblk.stems[0]);
513 } /* restore_findstr_hitl() */
516 /************************************************/
518 /* findstr_workproc */
520 /************************************************/
521 /* Called repeatedly to search for character substrings in records
522 * on the hitlist, and convert their abstracts to KWIC strings.
524 void findstr_workproc (void)
527 int entire_list_done;
530 DtSrResult *dit, *cutdit;
533 /* Test whether user has pushed STOP button since last call.
534 * The DIT_STOP marks where the search ended for restore_findstr_hitl().
536 if (usrblk.flags & USR_STOPSRCH)
538 saveusr.dittolist->flags |= DIT_STOP;
539 usrblk.retncode = (found_one_substring)? OE_OK : OE_USER_STOP;
540 restore_findstr_hitl();
544 #ifdef DEBUG_FINDSTR_ITER
545 printf ("\nSTART ITERATIONS = %d\n", saveusr.iterations);
548 /* initialize the loop */
549 dit = saveusr.dittolist;
550 entire_list_done = FALSE;
553 /* Traverse entire dittolist, starting where we last left off.
554 * Unblob each record and search for the query string.
555 * If found, mark it and convert its abstract.
556 * If not found, make sure it's not marked and then continue.
558 for (iter = saveusr.iterations; iter > 0; )
560 /*****usrblk.dba = dit->dba;**** unnecessary?******/
561 if ((bloblist = ve_getblobs (dit->dba, saveusr.vistano)) == NULL)
564 PROGNAME"390 Corrupted database address on hitlist.");
565 restore_findstr_hitl();
566 usrblk.retncode = OE_BAD_HITLIST;
569 usrblk.retncode = oe_unblob (bloblist, FALSE);
570 if (usrblk.retncode != OE_OK)
572 if (!oe_stems_to_hitwords (1))
575 if (usrblk.hitwcount > 0) /* string FOUND */
577 found_one_substring = TRUE;
578 saveusr.dittocount++;
579 #ifdef DEBUG_FINDSTR_ITER
580 printf ("<<--->> HIT numhits=%ld, reckey = %s\n",
581 saveusr.dittocount, dit->reckey);
583 make_one_kwic (dit->abstractp);
584 dit->flags |= DIT_FINDSTR;
586 else dit->flags = 0; /* string NOT found */
588 /* advance to next ditto node */
589 if ((dit = dit->link) == NULL)
591 entire_list_done = TRUE;
595 /* decrement iteration counter unless user said not to */
596 if (!(usrblk.flags & USR_NO_ITERATE)) iter--;
597 } /* end iteration loop */
599 /* End of current set of iterations.
600 * If main loop is not completed,
601 * adjust number of iterations to about 1 second,
602 * save current status, and return to caller.
604 if (!entire_list_done)
606 time_dif = difftime (time(NULL), start_time);
608 saveusr.iterations = (float) saveusr.iterations * 1.5;
609 else if (time_dif > 1.)
610 saveusr.iterations = (double) saveusr.iterations / time_dif;
611 if (saveusr.iterations < MIN_FINDSTR_ITERATIONS)
612 saveusr.iterations = MIN_FINDSTR_ITERATIONS;
614 #ifdef DEBUG_FINDSTR_ITER
615 printf ("\nEND ITERATIONS = %d, time_dif = %lf\n",
616 saveusr.iterations, time_dif);
619 saveusr.dittolist = dit; /* temp save curr loc in usrblk.dittolist */
620 usrblk.workproc = findstr_workproc;
621 usrblk.retncode = OE_SEARCHING;
623 } /* endif where we are still searching */
625 /* Completely done! Unless user specified no_iterate,
626 * calling program should always reset workproc to NULL.
627 * In case he forgets, dummy_workproc just appends an
628 * error message to notify programmer of his problem.
630 if (!(usrblk.flags & USR_NO_ITERATE))
631 usrblk.workproc = dummy_workproc;
633 if (saveusr.dittocount > 0)
634 usrblk.retncode = OE_OK;
636 usrblk.retncode = OE_NOTAVAIL;
637 restore_findstr_hitl();
639 } /* findstr_workproc() */
641 /******************************* OEKWIC.C ********************************/