2 * COMPONENT_NAME: austext
9 * (C) COPYRIGHT International Business Machines Corp. 1991,1995
11 * Licensed Materials - Property of IBM
12 * US Government Users Restricted Rights - Use, duplication or
13 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
17 /***************************** SearchE.h ******************************
18 * $XConsortium: SearchE.h /main/5 1996/08/12 13:16:47 cde-ibm $
20 * Header file for online DtSearch Engine (OE.C etc).
21 * SearchE.h was formerly called oe.h (Opera Engine).
22 * The old OE engine is pretty much invisible now, having
23 * been surrounded by the ausapi/DtSearch interface.
25 * The main data structure for passing and receiving arguments
26 * between the callers and the engine is USRBLK which is
27 * the only argument in the Opera_Engine() call.
28 * The actual input and output arguments in the USRBLK for each
29 * function are described below with the function description.
31 * All OE_functions pass a return code
32 * to the caller. OE_OK = successful completion,
33 * anything else may be partial completion/success or failure.
34 * Return code mnemonics are also defined below.
37 * Revision 2.7 1996/03/20 19:11:21 miker
38 * Changed prototype of hilite_cleartext().
40 * Revision 2.6 1996/03/13 22:34:53 miker
41 * Changed char to UCHAR several places.
43 * Revision 2.5 1996/03/05 19:17:50 miker
44 * oe_unblob no longer converts to uppercase.
46 * Revision 2.4 1996/03/05 15:54:06 miker
47 * Minor changes to support yacc-based boolean search.
49 * Revision 2.3 1996/02/01 16:14:38 miker
50 * Obsoleted requests OE_DITTO2KWIC, OE_VALIDATE_PWD, OE_CHANGE_PWD,
51 * OE_FINDSTR_REC, OE_FINDSTR_HITL, OE_ASSIST, OE_KILL.
52 * Restored OE_PING as the official "null" function.
53 * Added USRDBG_HILITE and USRDBG_PARSE.
54 * Replaced STRKIND, OE_kind_of_stems, and oe_stems_to_hitwords
55 * with hilite_cleartext. Deleted oe_ditto2kwic, oe_findstr_hitl.
56 * Deleted usrblk.kwiclen.
58 * Revision 2.2 1995/10/25 22:35:45 miker
61 * Revision 2.1 1995/09/22 22:39:39 miker
62 * Freeze DtSearch 0.1, AusText 2.1.8
64 * Revision 1.1 1995/08/31 20:42:33 miker
67 * The DtSearch return codes wrap around the OE return codes.
71 #define AUDIT_FORMAT "UID=%-8s TIME=%s ELAP=%-3ld DB=%-8s HITS=%-5ld "
72 #define AUDIT_WHOWHEN "UID=%-8s TIME=%s"
73 #define DISCARD_FORMAT "%s\t\"%s\"\t%s\t%s\n"
74 #define HARDCOPY_SCRIPT "opprt.bat"
75 #define MAX_HITWCOUNT 200 /* max number hitwords that can be hilited */
76 #define WORDS_HITLIMIT 300000L
79 /*-------------------- Request Codes ------------------------
80 * All Engine requests, in addition to input below, require valid
83 * All Engine requests, in addition to output below,
84 * may return ausapi_msglist.
85 * There is no zero request code.
86 * See list of return codes for the ones that are marked 'common retncodes'.
89 #define OE_INITIALIZE 1
90 /* does lotsa stuff--see the function.
92 * .query = AUSAPI_VERSION of UI code
93 * .dblist = data from user config file
94 * .dblk = data from user config file
96 * .dblist = modified after load of site config file
97 * .dblk = modified after load of site config file
98 * .retncode = OE_OK, OE_NOTAVAIL, OE_ABORT
101 #define OE_TEXT2FZKEY 2
102 /* converts problem description text to fzkey.
104 * .query = problem description text
105 * .dblk = desired database and db parameters
107 * .fzkey = inference from query text analysis
108 * .retncode = common retncodes + OE_BAD_QUERY
111 #define OE_SRCH_FZKEY 3
112 /* Converts fzkey to hitlist of dba's.
113 * Not available for databases without semantic dictionaries.
115 * .fzkey = database search vector
116 * .dblk = desired database and db parameters
118 * .workproc = function to be called until !OE_SEARCHING
119 * .dittolist = hitlist sorted by proximity
120 * .dittocount = # of nodes on dittolist
121 * .retncode = common retncodes + OE_SEARCHING, OE_NOTAVAIL,
122 * OE_USER_STOP, OE_SYSTEM_STOP, OE_BAD_QUERY
125 #define OE_SRCH_STEMS 4
126 /* converts string of wordstems and booleans to
127 * hitlist of dba's and stems array.
129 * .query = text string of wordstems and booleans
130 * .dblk = desired database and db parameters
132 * .fzkey = (value undefined)
133 * .workproc = function to be called until !OE_SEARCHING
134 * .dittolist = hitlist
135 * .dittocount = # of nodes on dittolist
136 * .stems = array of stems for hiliting in text of hits
137 * .stemcount = size of 'stems' array
138 * .retncode = common retncodes + OE_BAD_QUERY, OE_SEARCHING,
139 * OE_NOTAVAIL, OE_USER_STOP
142 #define OE_SRCH_WORDS 5
143 /* converts string of exact words and booleans to
144 * hitlist of dba's and stems array.
146 * .query = text string of exact words and booleans
147 * .dblk = desired database and db parameters
149 * .fzkey = (value undefined)
150 * .workproc = function to be called until !OE_SEARCHING
151 * .dittolist = hitlist
152 * .dittocount = # of nodes on dittolist
153 * .stems = array of words for hiliting in text of hits
154 * .stemcount = size of 'stems' array
155 * .retncode = common retncodes + OE_BAD_QUERY, OE_SEARCHING,
156 * OE_NOTAVAIL, OE_USER_STOP
159 #define OE_STOP_SRCH 6
160 /* sets global switch to cancel search work procedure.
161 * workproc actually cancels itself after reading switch.
162 * input: .request = OE_STOP_SEARCH
163 * output: .retncode = 'common retncodes' only
166 #define OE_APPEND_NOTES 7
167 /* appends user's notes to record at current dba.
169 * .query = freeform text of append
170 * .dba = address of record to append
171 * .dblk = desired database and db parameters
173 * .retncode = common retncodes + OE_TIMEOUT, OE_DISABLED
177 /* retrieves record, cleartext, and notes for specified dba.
178 * Clears hitwords array, does not use or change stems array.
180 * .dba = address of record to retrieve
181 * .dblk = desired database and db parameters
183 * .objrec = austext record itself, as is from vista
184 * .cleartext = NULL or uncompressed text string
185 * .clearlen = 0 or size of cleartext
186 * .hitwords = array pointer cleared to NULL
187 * .hitwcount = size of hitwords array set to 0
188 * .notes = NULL, or list of notes records, as is
189 * .retncode = common retncodes + OE_NOTAVAIL
192 #define OE_GETREC_STEMS 9
193 /* retrieves record, cleartext, notes, and hitwords
194 * array for specified dba and stems (from OE_SRCH_STEMS).
195 * If no text in repository, equivalent to OE_GETREC.
197 * .dba = address of record to retrieve
198 * .stems = array of word stems to hilite
199 * .stemcount = size of stems array
200 * .dblk = desired database and db parameters
202 * .objrec = austext record itself, as is from vista
203 * .cleartext = NULL or uncompressed text string
204 * .clearlen = 0 or size of cleartext
205 * .hitwords = NULL or array of words in cleartext to hilite
206 * .hitwcount = 0 or size of hitwords array
207 * .notes = NULL, or list of notes records, as is
208 * .retncode = common retncodes + OE_NOTAVAIL
211 #define OE_GETREC_WORDS 10
212 /* retrieves record, cleartext, notes, and hitwords
213 * array for specified dba and stems array (from OE_SRCH_WORDS).
214 * If no text in repository, equivalent to OE_GETREC.
216 * .dba = address of record to retrieve
217 * .stems = array of exact words to hilite
218 * .stemcount = size of stems array
219 * .dblk = desired database and db parameters
221 * .objrec = austext record itself, as is from vista
222 * .cleartext = NULL or uncompressed text string
223 * .clearlen = 0 or size of cleartext
224 * .hitwords = NULL or array of words in cleartext to hilite
225 * .hitwcount = 0 or size of hitwords array
226 * .notes = NULL, or list of notes records, as is
227 * .retncode = common retncodes + OE_NOTAVAIL
230 #define OE_NEXT_DBA 11
231 /* advances dba to next valid b-tree address. Wraps if necessary.
233 * .dba = current address of record
234 * .dblk = desired database and db parameters
236 * .dba = address of next record in b-tree
237 * .retncode = common retncodes + OE_WRAPPED
240 #define OE_PREV_DBA 12
241 /* retreats dba to previous valid b-tree address.
242 * Wraps if necessary.
244 * .dba = current address of record
245 * .dblk = desired database and db parameters
247 * .dba = address of previous record in b-tree
248 * .retncode = common retncodes + OE_WRAPPED
251 #define OE_RECKEY2DBA 13
252 /* converts an austext record key into a dba. Wraps if not found.
254 * .query = desired record key
255 * .dblk = desired database and db parameters
257 * .dba = address of record found,
258 * or next avail address if not found
259 * .retncode = common retncodes + OE_WRAPPED
262 #define OE_MARK_DELETION 14
263 /* writes record id to an external file for
264 * possible later deletion by external program.
266 * .query = record key to be marked for deletion
267 * .dblk = database where record is located
269 * .retncode = common retncodes + OE_NOTAVAIL, OE_DISABLED
272 #define OE_GETREC_DIC 15
273 /* THIS FUNCTION IS NO LONGER SUPPORTED.
274 * IF RECEIVED BY ENGINE, IT IS TREATED EXACTLY AS OE_GETREC.
275 * Retrieves record, cleartext, notes, and hitwords
276 * array for specified dba. Hitwords are not derived from the
277 * stems array; they are all the cleartext words that are in
278 * the dictionary. (Compare to OE_GETREC_WORDS and OE_GETREC_STEMS).
279 * If no text in repository, equivalent to OE_GETREC.
281 * .dba = address of record to retrieve
282 * .dblk = desired database and db parameters
284 * .objrec = austext record itself, as is from vista
285 * .cleartext = NULL or uncompressed text string
286 * .clearlen = 0 or size of cleartext
287 * .hitwords = NULL or array of words in cleartext to hilite
288 * .hitwcount = 0 or size of hitwords array
289 * .notes = NULL, or list of notes records, as is
290 * .retncode = common retncodes + OE_NOTAVAIL
293 #define OE_DITTO2KWIC 16
294 #define OE_VALIDATE_PWD 17
295 #define OE_CHANGE_PWD 18
296 /* (These functions are obsolete) */
298 #define OE_DELETE_RECID 19
299 /* Deletes header record, all text, user notes,
300 * and word/stems references for specified record.
301 * Currently can only be called from offline program
302 * when all online austext users have been logged off.
303 * THIS FUNCTION IS <<<VERY>>> SLOW (about 15 min on large dbase)!
305 * .query = desired record key
306 * .dblk = desired database and db parameters
308 * .dba = address of record deleted
309 * THIS ADDRESS IS NO LONGER VALID!
310 * .dbatab = (undefined)
312 * .retncode = common retncodes + OE_NOTAVAIL
315 #define OE_DELETE_BATCH 20
316 /* Deletes header records, all text, user notes,
317 * and word/stems references for all records in a
318 * datbase address table. Currently can only be called
319 * from offline program when all online austext users
320 * have been logged off. This function is the preferred
321 * deletion method because it is faster than deleting
322 * single records at a time. Addresses not found are ignored.
324 * .dblk = desired database and db parameters
325 * .dbatab = table of valid addresses to be deleted
326 * .dbacount = number of addresses on the table
328 * .retncode = common retncodes only
332 #define OE_FINDSTR_REC 22
333 #define OE_FINDSTR_HITL 23
334 /* (These functions are obsolete) */
336 #define OE_SRCH_STATISTICAL 24
337 /* Converts string of natural language text to
338 * hitlist of dba's and stems array. Uses stems only,
339 * no booleans, all words are ORed together. Hitlist sorted
340 * based on statistics of included word stems.
342 * .query = natural language text string
343 * .dblk = desired database and db parameters
345 * .stems = array of stems for hiliting in text of hits
346 * .stemcount = size of 'stems' array (up to max allowed)
347 * .workproc = function to be called until !OE_SEARCHING
348 * .dittolist = hitlist
349 * .dittocount = # of nodes on dittolist
350 * .retncode = common retncodes + OE_BAD_QUERY, OE_SEARCHING,
351 * OE_NOTAVAIL, OE_USER_STOP
354 #define OE_HILITE_STEMS 25
355 /* Creates a hitwords array for hiliting using the text
356 * in cleartext (however it may have been obtained),
357 * and the stems array from the last search.
359 * .cleartext = text to be hilited
360 * .clearlen = size of cleartext
361 * .stems = array of words or stems to hilite
362 * .stemcount = size of stems array
363 * .search_type = 'W', 'P' or 'S', indicating type of
364 * search that generated stems array.
366 * .hitwords = array of words in cleartext to hilite
367 * .hitwcount = size of hitwords array
368 * .retncode = common retncodes + OE_NOTAVAIL, OE_BAD_QUERY
371 #define OE_GET_EXPIRE 26
372 /* Returns in 'dba' field the expiration date of OE as a timestamp.
373 * Zero means no expiration date. Overlays previous value in dba.
374 * input: .request = OE_GET_EXPIRE
375 * output: .dba = unix timestamp of expiration date or 0
376 * .retncode = common retncodes only
381 #define OE_SHUTDOWN 9999
382 /* (These functions are obsolete) */
384 /*-------------------- Return Codes ------------------------
385 * The return codes marked 'common retncodes' are OE_OK, OE_NOOP, OE_REINIT,
386 * and OE_ABORT, and can be returned by almost all functions.
387 * OE_BAD_QUERY will be returned for any unknown function request.
388 * There is no zero return code.
390 #define OE_OK 1 /* normal successful completion */
391 #define OE_REINIT 2 /* request canceled: OE reinitialized
392 * databases so UI's dba's may be bad */
393 #define OE_SEARCHING 3 /* keep calling workproc */
394 #define OE_BAD_DBLK 4
395 #define OE_BAD_REQUEST 5 /* invalid request field */
396 #define OE_BAD_QUERY 6 /* invalid query or other input fld */
397 #define OE_NOTAVAIL 7 /* no record, hits, function disabled */
399 #define OE_WRAPPED 9 /* got next item instead of reqstd item */
400 #define OE_SYSTEM_STOP 10 /* error: search canceled by OE */
401 #define OE_BAD_PASSWD 11 /* invalid password */
402 #define OE_BAD_HITLIST 12 /* invalid hitlist */
403 #define OE_DISABLED 13 /* requested function disabled at this site */
404 #define OE_USER_STOP 14 /* search canceled by user */
405 #define OE_BAD_COMM 15 /* request canceled by comm layer */
406 #define OE_NOOP 888 /* No Operation, nothing done */
407 #define OE_ABORT 999 /* fatal OE error, OE permanently disabled */
410 /****************************************/
414 /****************************************/
415 /* Table used in load_ocf() oe_uninitialize() to allow overriding default
416 * locations of various files. Complete discussion in .ocf documentation.
420 char *id; /* keyword identifier */
421 char **OEFptr; /* addr of variable to change */
422 char previously_specified; /* bool ensures only one spec */
425 /****************************************/
429 /****************************************/
432 char userid [10]; /* 1 - 8 alphanumeric char */
433 int search_type; /* single char = curr search type.
434 * 'T' = Semantic Text search
435 * 'W' = Exact Words search
438 * 'N' = Navigator string (unpacked fzk) srch
439 * 'P' = Statistical (Probabilistic) search
442 long flags; /* bit switches... */
443 #define USR_BIT_1 0x0001L /* (reserved) */
444 #define USR_NO_ITERATE 0x0002L /* override iterations in workprocs */
445 #define USR_STOPSRCH 0x0004L /* the "stop" button, cancels workproc */
446 #define USR_MAXMIN 0x0008L /* symdif() algorithm = fuzzy max min */
447 #define USR_OBJDATES 0x0010L /* restrict hitlists to objdate ranges */
448 #define USR_KWIC_ABSTR 0x0020L /* retn KeyWord In Context for abstract */
449 #define USR_NO_INFOMSGS 0x0040L /* do not retn information-only msgs to UI */
450 #define USR_MAXHITS_MSG 0x0080L /* show # hits each keytype if sum > maxhits */
451 #define USR_SORT_WHITL 0x0100L /* sort word/stem hitlists by semantics */
453 long debug; /* Nonproduction bit switches */
454 #define USRDBG_RARE 0x0001L /* 1 Misc initialzatn trace msgs */
455 #define USRDBG_SRCHCMPL 0x0002L /* 2 trace ui search_completed functions */
456 #define USRDBG_RETRVL 0x0004L /* 4 trace record retrieval funcs */
457 #define USRDBG_ITERATE 0x0008L /* 8 forces iteration on all iterable cmds */
458 #define USRDBG_UTIL 0x0010L /* 16 trace misc utility functions */
459 #define USRDBG_MEDPRMPT 0x0020L /* 32 Prints prompt of medley sockets cmds */
460 #define USRDBG_HITLIST 0x0040L /* 64 print hitlists after searches */
461 #define USRDBG_SYMP 0x0080L /* 128 trace symptom search funcs */
462 #define USRDBG_DELETE 0x0100L /* 256 trace record deletion functions */
463 #define USRDBG_RPC 0x0200L /* 512 trace RPC communications funcs */
464 #define USRDBG_VERBOSE 0x0400L /* 1024 verbose debugging: iterative details */
465 #define USRDBG_HILITE 0x0800L /* 2048 trace hiliting functions */
466 #define USRDBG_PARSE 0x1000L /* 4096 trace linguistic parse/stem funcs */
467 #define USRDBG_BOOL 0x2000L /* 8192 trace boolean parse funcs */
471 char *query; /* input data for text searches */
472 DtSrObjdate objdate1; /* only retn hit objects >= (after) date1 */
473 DtSrObjdate objdate2; /* only retn hit objects <= (before) date2 */
474 DB_ADDR dba; /* for direct dba reads */
475 DB_ADDR *dbatab; /* array of dba's for batch deletes */
476 int dbacount; /* # of dba's in dbatab */
477 void (*workproc) (void);
478 /* (1) If single tasking (iterative), OE places ptr to work
479 procedure. (2) If multitasking (no iterations), UI places
480 ptr of func to call when OE's spawned subtask is done. */
482 DBLK *dblist; /* linked list of all databases */
483 DBLK *dblk; /* users curr database selection */
485 DtSrResult *dittolist; /* hitlist retnd from various searches */
486 long dittocount; /* # of items on hitlist */
487 int stemcount; /* # of wordstems in 'stems' array */
488 char stems [DtSrMAX_STEMCOUNT] [DtSrMAXWIDTH_HWORD];
489 /* for hiliting words in text records */
491 struct or_objrec objrec; /* austext record buffer */
492 char *abstrbuf; /* buf to hold abstracts */
493 int abstrbufsz; /* maximum abstract size all databases */
494 char *cleartext; /* decompressed austext record text */
495 long clearlen; /* size of cleartext in bytes */
496 LLIST *notes; /* uncompressed, right out of vista */
497 DtSrHitword *hitwords; /* array of hit words inside cleartext */
498 long hitwcount; /* number of elements in hitwords array */
504 /****************************************/
508 /****************************************/
509 /* The following data is saved between calls of the workprocs.
510 * In a future msg passing protocol where there may be multiple UIs
511 * per OE, this data would be maintained in a list of structures,
512 * one for each currently active search (UI only passes user id name
513 * in iterative calls, OE searches list to match curr status).
514 * But for now, this will have to do.
521 DtSrResult *dittolist;
524 char stems [DtSrMAX_STEMCOUNT] [DtSrMAXWIDTH_HWORD];
525 char ktchars [MAX_KTCOUNT + 2];
527 long ktsum [MAX_KTCOUNT + 1];
531 /*--------------- GLOBALS in oe.c, loadocf.c -------------------*/
532 extern char **ausapi_dbnamesv;
533 extern int ausapi_dbnamesc;
534 extern USRBLK usrblk;
536 extern char *global_memory_ptr;
537 extern OEFTAB oef_table[];
538 extern SAVEUSR saveusr; /* (only one for now) */
541 extern int OE_bmhtab_strlen [DtSrMAX_STEMCOUNT];
542 extern size_t OE_bmhtables [DtSrMAX_STEMCOUNT] [MAX_BMHTAB];
544 extern int OE_enable_markdel;
545 extern int OE_enable_usernotes;
546 extern time_t *OE_expiration;
547 extern int OE_fastdecode;
548 extern char *OE_fileio;
549 extern long OE_flags;
550 #define OE_AUDIT 1L /* enables audit file logging */
551 #define OE_INITOK (1L<<1) /* ensures first reqst was INITIALIZE */
552 #define OE_PERMERR (1L<<2) /* disables engine on fatal errors */
553 #define OE_NO_ITERATE (1L<<3) /* override iterations in workprocs */
554 extern char *OE_inittab_dir; /* local dir of server daemon */
555 extern long OE_objsize;
556 extern char *OE_prodname;
557 extern int OE_search_type;
558 extern char *OE_server_dir; /* local dir of server daemon */
559 extern char *OE_sitecnfg_fname;
560 extern time_t OE_sitecnfg_mtime;
561 extern int OE_uppercase_keys;
562 extern long OE_words_hitlimit;
564 /* Global pointers to formerly hardcoded path/file names.
565 * The comment names the #define constant under which the
566 * filename is/was specified in either fuzzy.h or oe.h.
568 extern char *OEF_audit; /* FNAME_AUDIT */
569 extern char *OEF_discard; /* FNAME_DISCARD_DATA */
570 extern char *OEF_news; /* FNAME_SITENEWS */
571 extern char *OEF_notesnot; /* FNAME_NOTES_BAC */
572 extern char *OEF_notessem; /* FNAME_NOTES_SEM */
573 extern char *OEF_readme; /* FNAME_README */
575 /*---------------- FUNCTION PROTOTYPES ----------------------*/
576 extern char *calloe_getrec (char *dbname, DB_ADDR dba,
577 LLIST **global_msglist);
578 extern long calloe_hilite (char *cleartext, DtSrHitword *hitwords,
579 LLIST **global_msglist);
581 *calloe_search (char *qry, char *dbname,
582 int search_type, LLIST **global_msglist);
583 extern int call_output_script (char *shellcmd, char *text);
584 extern void clear_hitwords (void);
585 extern void clear_usrblk_record (void);
586 extern void dummy_workproc (void);
587 extern char *ensure_end_slash (char *charbuf);
588 extern void fasthuf (UCHAR *input_bitstring, UCHAR *output_charbuf,
589 int outbuf_size, time_t encode_id);
590 extern char *get_hitlist_text (int maxlen);
591 extern long hilite_cleartext (int parse_type, char *stems, int stemcount);
592 extern int load_ocf (void);
593 extern char *nowstring (time_t *now);
594 extern void oe_initialize (void);
595 extern int oe_unblob (LLIST *bloblist);
596 extern void oe_write_audit_rec (long numhits);
597 extern void Opera_Engine (void);
598 extern void print_dittolist (DtSrResult *dittolist, char *label);
599 extern void print_stems (int stemcount, void *stems, char *locstr);
600 extern void print_usrblk_record (char *label);
601 extern void release_shm_mem (void);
602 extern char *retncode_str (int num);
603 extern void symptom_search (void);
604 extern int ve_append_notes (void);
605 extern void ve_browse_dba (int direction);
606 extern LLIST *ve_getblobs (DtSrINT32 dba, int vistano);
607 extern int ve_getrec_dba (LLIST **bloblist);
608 extern int ve_initialize (void);
609 extern void ve_ditto (void);
610 extern DtSrINT32 ve_reckey2dba (void);
611 extern void ve_statistical (void);
612 extern void ve_stem_search (void);
613 extern void ve_word_search (void);
614 extern void ve_shutdown (void);
616 /***************************** SearchE.h ******************************/
617 #endif /* _SearchE_h */