2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 // $TOG: DtSR_SearchEngine.C /main/19 1998/04/17 11:41:48 mgreess $
24 /* Copyright (c) 1995,1996 FUJITSU LIMITED */
25 /* All Rights Reserved */
34 #include "Registration.hh"
35 #include "Managers/CatMgr.hh"
37 #include "Util_Classes/Dict.hh"
38 #include "Util_Classes/DictIter.hh"
40 #include "UAS_Factory.hh"
41 #include "UAS_Exceptions.hh"
42 #include "UAS_Collection.hh"
43 #include "UAS_ObjList.hh"
44 #include "DtSR_SearchEngine.hh"
45 #include "DtSR_BookcaseEntry.hh"
46 #include "DtSR_SearchResultsEntry.hh"
48 enum fine_scopes { scope_section, scope_book };
57 DtSR_SearchEngine *DtSR_SearchEngine::f_search_engine = NULL;
59 int DtSR_SearchEngine::f_init_count = 0;
61 // interface to DtSearchEngine constructor
63 DtSR_SearchEngine::search_engine(UAS_PtrList<const char> *bcases)
65 if (f_search_engine == (DtSR_SearchEngine *)NULL) {
66 // constructor does only what it really needs to do.
67 // let init do the rest of initialization.
68 f_search_engine = new DtSR_SearchEngine();
71 // init was originally introduced to prevent circular instantiation
72 // But now we need init anyway to implement re-initialization
73 if (bcases != NULL && bcases->numItems() > 0)
74 f_search_engine->init(bcases);
76 return *f_search_engine;
81 // build UAS_Common object give a mmdb section id
82 static UAS_Pointer<UAS_Common>
83 uas_common(const UAS_String id)
85 UAS_String url = "mmdb:LOCATOR=";
89 UAS_Pointer<UAS_Common> doc = UAS_Common::create(url);
94 // build url given mmdb section id
96 build_url(const UAS_String id)
98 UAS_Pointer<UAS_Common> doc = uas_common(id);
99 UAS_String url(doc->locator());
101 fprintf(stderr, "(DEBUG) URL=\"%s\"\n", (char*)url);
108 build_bc_url(UAS_String bookcase_path)
110 const char* str = (char*)bookcase_path;
111 if (str == NULL || *str == '\0')
112 return UAS_String("");
114 char* bc_path = strdup((char*)bookcase_path);
115 char* sep = bc_path + strlen(bc_path) - 1;
116 for (; sep > bc_path && *sep != '/'; --sep);
118 if (sep == bc_path) {
120 return UAS_String("");
124 UAS_String bc_locator = "mmdb:INFOLIB=";
125 bc_locator = bc_locator + bc_path + "&BOOKCASE=" + sep;
127 fprintf(stderr, "(DEBUG) URL=\"%s\"\n", (char*)bc_locator);
139 IsAccessible(const char* path, mode_t type = S_IFREG)
141 if (path == NULL || *path == '\0')
144 struct stat stat_buf;
146 if (stat(path, &stat_buf) < 0) {
148 fprintf(stderr, "(ERROR) cannot stat %s\n", (char*)path);
152 else if (type == S_IFDIR) {
153 if ((stat_buf.st_mode & S_IFMT & S_IFDIR) == 0) {
155 fprintf(stderr, "(ERROR) %s not a directory\n",
162 assert( type == S_IFREG );
163 if ((stat_buf.st_mode & S_IFMT & S_IFREG) == 0) {
165 fprintf(stderr, "(ERROR) %s not a ordinary file\n",
172 if (access((char*)path, R_OK) < 0) {
174 fprintf(stderr, "(ERROR) you do not have right to read %s\n",
183 UpdateConfigFile(UAS_PtrList<const char> *bcases, UAS_String ocf_path)
185 if (bcases == NULL || bcases->numItems() == 0)
188 if ((char*)ocf_path == NULL || *(char*)ocf_path == '\0')
191 ofstream dtiocf((char*)ocf_path,
192 ios::out | ios::trunc | ios::nocreate);
193 if (! dtiocf) { // could not open ocf_path in specified mode
195 fprintf(stderr, "(ERROR) could not open %s\n", (char*)ocf_path);
200 unsigned int bitfield = 0;
202 // dbnames_dict is used to ensure uniqueness of dbnames
203 // NOTE: dbnames should really be ensured to be unique
204 // at infolib build-time.
205 Dict<UAS_String, int> dbnames_dict(NULL, False);
207 for (int i = 0; i < bcases->numItems(); i++) {
208 UAS_String bcase_path = (*bcases)[i];
209 UAS_String dtsrpath = bcase_path + "/dtsearch";
210 UAS_String ocfpath = dtsrpath + "/dtsearch.ocf";
212 char* bcase = (char*)bcase_path + bcase_path.length();
213 for (; *bcase != '/' && bcase > (char*)bcase_path; bcase--);
216 // check if austext files are in place
217 if (IsAccessible((char*)bcase_path, S_IFDIR) != True ||
218 IsAccessible((char*)dtsrpath, S_IFDIR) != True ||
219 IsAccessible((char*)ocfpath) != True ||
220 dbnames_dict[bcase] == True) // name has been used already
222 bitfield &= ~(0x01 << i);
224 fprintf(stderr, "(ERROR) search files are not installed "
225 "correctly in %s\n", (char*)bcase_path);
230 dbnames_dict[bcase] = True;
231 bitfield |= 0x01 << i;
234 // bcase : book case name; e.g. "OLIASDOC"
235 dtiocf << "PATH " << bcase << " = " << (char*)dtsrpath << '\n';
237 // append the contents of "dtsearch.ocf" to dtinfo's ocf
238 ifstream dtsrocf((char*)ocfpath);
239 char buf[256]; // buffer to get a line from "dtsearch.ocf"
240 while (dtsrocf.get(buf, 256, '\n')) {
242 dtiocf << buf << '\n';
252 DtSR_SearchEngine::~DtSR_SearchEngine()
254 if (*(char*)f_config_path != '\0') {
255 unlink((char*)f_config_path);
257 fprintf(stderr, "(DEBUG) %s removed\n", (char*)f_config_path);
259 f_config_path = NULL;
261 if (f_oql_parser != NULL) {
267 // DtSearchEngine constructor
268 DtSR_SearchEngine::DtSR_SearchEngine()
269 : f_dbnames(NULL), f_dbcount(0), f_valid_bc_map(0)
271 char* ocf_path = tempnam(NULL, ".DtSR_");
272 f_config_path = ocf_path;
277 ofstream dtiocf((char*)f_config_path, ios::out | ios::noreplace);
278 if (! dtiocf) // could not open ocf_path in specified mode
279 throw(CASTEXCEPT Exception());
281 f_oql_parser = new DtSR_Parser();
285 DtSR_SearchEngine::init(UAS_PtrList<const char> *bcases)
287 if (bcases->numItems() == 0 || *(char*)f_config_path == '\0')
290 if (bcases->numItems() == DtSR_BookcaseSearchEntry::bcases().length()) {
291 // if sets of bookcases are same, you do not have to (re)init
294 if ((f_valid_bc_map = UpdateConfigFile(bcases, f_config_path)) == 0) {
295 // no valid bookcases available
301 if (f_init_count == 0) {
303 fprintf(stderr, "(DEBUG) DtSearch is being initialized.\n");
305 status = DtSearchInit("DtSearch", NULL, 0, (char*)f_config_path,
306 NULL, &f_dbnames, &f_dbcount);
310 fprintf(stderr, "(DEBUG) DtSearch is being re-initialized.\n");
312 status = DtSearchReinit(&f_dbnames, &f_dbcount);
316 if (status == DtSrOK || status == DtSrREINIT) {
317 if (f_init_count++ == 0) {
318 DtSearchSetMaxResults(INT_MAX);
319 assert( DtSearchGetMaxResults() == INT_MAX );
322 else { // DtSearch (re)initialization failed
324 if (f_init_count == 0) {
325 msg = "(ERROR) DtSearchInit:\n";
328 msg = "(ERROR) DtSearchReinit:\n";
331 msg = msg + DtSearchGetMessages();
332 DtSearchFreeMessages();
334 // zero f_valid_bc_map should represent bad condition
338 // make DtSR_BookcaseSearchEntry::bcases() empty
339 while (DtSR_BookcaseSearchEntry::bcases().length() > 0)
340 DtSR_BookcaseSearchEntry::bcases().remove_item(0);
342 if (f_valid_bc_map == 0) {
344 throw(CASTUASEXCEPT UAS_Exception(msg));
352 fprintf(stderr, "(DEBUG) valid_bc_map = 0x%x\n", f_valid_bc_map);
353 fprintf(stderr, "\tdbcount = %d\n", f_dbcount);
354 for (i = 0; i < f_dbcount; i++)
355 fprintf(stderr, "\tdb[%d]=%s\n", i, f_dbnames[i]);
359 for (i = 0; i < bcases->numItems(); i++) {
360 if ((f_valid_bc_map & (0x01 << i)) == 0)
363 UAS_String bookcase_path((*bcases)[i]);
364 UAS_String bookcase_url = build_bc_url(bookcase_path);
365 if ((char*)bookcase_url == NULL || *(char*)bookcase_url == '\0')
368 UAS_Pointer<UAS_Common> obj;
369 if ((obj = UAS_Factory::create(bookcase_url)) == (int)NULL)
371 UAS_Pointer<UAS_Common> bookcase;
373 ((UAS_Collection*)(UAS_Common *)obj)->root()) == (int)NULL)
376 new DtSR_BookcaseSearchEntry(dbn++, bookcase, True);
383 // parse url (mmdb) out into infolib,infobase,id fields
385 parse_url(char* locator, const char* &infolib, const char* &infobase,
388 if (locator == NULL || *locator == '\0')
393 if ((head = strchr(locator, '=')) == NULL)
395 if ((tail = strchr(++head, '&')) == NULL)
397 assert( head < tail );
402 if ((head = strchr(head, '=')) == NULL)
404 if ((tail = strchr(++head, '&')) == NULL)
406 assert( head < tail );
411 if ((head = strchr(head, '=')) == NULL)
413 if ((tail = strchr(++head, '&')) == NULL)
415 assert( head < tail );
424 typedef int (*Scan_Abstract)(char*, const char*);
427 apply_scope(DtSrResult* res, fine_scopes scope, const char* id, long &count)
429 if (res == NULL || id == NULL || *id == '\0') // garbage input
432 Scan_Abstract scan_abstract = DtSR_SearchResultsEntry::section_in_abstract;
434 DtSrResult *rval = NULL, *tail = NULL, *iter;
436 for (iter = res, count = 0; iter; iter = iter->link) {
437 if (scan_abstract(iter->abstractp, id)) {
439 fprintf(stderr, "(DEBUG) match found in scan_abstract\n");
441 DtSrResult *item = (DtSrResult *)malloc(sizeof(DtSrResult));
442 *item = *iter; // copy as it is
443 item->abstractp = (char*)malloc(strlen(iter->abstractp) + 1);
444 strcpy(item->abstractp, iter->abstractp);
453 fprintf(stderr, "(DEBUG) abstract=%s\n", item->abstractp);
458 if (scope == scope_section)
466 // resolve bookid (NOTE: serial is 1-based)
468 resolve_bookid(UAS_Pointer<UAS_Common> &bcase, int serial)
472 if (bcase == (int)NULL || bcase->type() != UAS_BOOKCASE)
475 if (serial < 1) // apparently wrong serial number
478 UAS_Pointer<UAS_Common> book = bcase->children()[serial - 1];
480 fprintf(stderr, "(DEBUG) resolved bookid=%s, type=%s\n",
481 (char*)book->id(), (char*)book->content_type());
484 return rval = book->id();
487 UAS_Pointer<UAS_SearchResults>
488 DtSR_SearchEngine::search(UAS_String oql, UAS_SearchScope& scope,
489 unsigned int /* maxdocs */)
491 UAS_Pointer<DtSR_SearchResults> DtSR_result = NULL ;
492 UAS_Pointer<UAS_SearchResults> UAS_result = NULL;
494 if (f_valid_bc_map == 0) // DtSearch initialization failed
497 UAS_PtrList<UAS_BookcaseEntry>& targets = scope.bookcases();
498 if (scope.search_zones().section() == False && targets.numItems() == 0) {
499 // no bookcases to search against
500 UAS_result = new UAS_SearchResults(new UAS_String(oql),
501 new UAS_String(scope.name()));
503 fprintf(stderr, "(WARNING) no bookcases specified to search against, "
504 "returning an empty search result...\n");
509 UAS_Pointer<UAS_BookcaseEntry> current_bc = NULL;
510 if (scope.search_zones().section()) { // search for current section
512 if (targets.numItems() == 0) {
514 UAS_String url = build_url(scope.search_zones().search_section());
516 const char *infolib, *infobase, *section_id;
517 if (parse_url((char*)url, infolib, infobase, section_id) == NULL) {
519 fprintf(stderr, "(ERROR) could not parse url\n");
522 // NOTE: parse_url is tampering url,
523 // that's why it's cast to (char*)
524 throw(CASTEXCEPT Exception());
527 UAS_String bc_path = infolib;
528 bc_path = bc_path + "/" + infobase;
530 UAS_String bookcase_url = build_bc_url(bc_path);
531 assert( (char*)bookcase_url && *(char*)bookcase_url );
533 UAS_Pointer<UAS_Common> obj = UAS_Factory::create(bookcase_url);
534 UAS_Pointer<UAS_Common> bookcase =
535 ((UAS_Collection*)(UAS_Common *)obj)->root();
537 // NOTE: hack! tampering bookcases
538 current_bc = new UAS_BookcaseEntry(bookcase, True);
539 targets.append(current_bc);
544 assert( targets.numItems() == 1 );
547 scope.search_zones().all(True); // hack! tampering zones
550 UAS_String aus_query;
552 aus_query = f_oql_parser->parse((char*)oql);
554 catch_any() { // OQL parse failed
559 DtSR_BookcaseSearchEntry::search_zones(scope.search_zones());
561 // do search for each bookcase
562 UAS_List<DtSR_BookcaseSearchEntry>& bookcases =
563 DtSR_BookcaseSearchEntry::bcases();
565 DtSrResult* DtSr_res = NULL;
567 // for each bookcase specified in scope
568 for (n = 0; n < targets.numItems(); n++, DtSr_res = NULL, rescount = 0) {
570 // look for the correspondent index
571 for (index = 0; index < f_dbcount; index++) {
572 if (bookcases[index]->bid() == targets[n]->bid() &&
573 bookcases[index]->lid() == targets[n]->lid())
576 if (index == f_dbcount) {
578 fprintf(stderr, "(ERROR) cannot not find bookcase, bid=\"%s\", "
579 "just ignore\n", (char*)targets[n]->bid());
584 bookcases[index]->stems()->clear();
587 // switch austext search option based on completion being specified
588 int stype = ((DtSR_Parser*)f_oql_parser)->stemming_suggested() ? 'S' : 'W';
590 // switch austext search option with regards to languages
591 int stype = (bookcases[index]->language() == DtSrLaJPN)? 'W' : 'S';
594 UAS_String eff_query = aus_query;
596 fprintf(stderr, "(DEBUG) effective query=\"%s\"\n", (char*)eff_query);
599 int status = DtSearchQuery(
600 (char*)eff_query, f_dbnames[index], stype, NULL, NULL,
601 &DtSr_res, &rescount,
602 (char*)(bookcases[index]->stems()->stems()),
603 &(bookcases[index]->stems()->count())
606 if (status != DtSrOK && status != DtSrNOTAVAIL) { // error
609 DtSearchFreeResults(&DtSr_res);
611 UAS_String msg(CATGETS(Set_DtSR_SearchEngine, 1,
612 "DtSearch does not support the query."));
613 DtSearchFreeMessages();
614 throw(CASTUASEXCEPT UAS_Exception(msg));
619 if (scope.search_zones().section()) {
621 DtSrResult* res = apply_scope(DtSr_res, scope_section,
622 (char*) scope.search_zones().search_section(),
626 DtSearchFreeResults(&DtSr_res);
628 DtSr_res = res; // replace the results with an artifact
631 if (DtSr_res == NULL)
634 UAS_Pointer<UAS_List<UAS_SearchResultsEntry> > res;
636 if ((res = compress_DtSrResult(DtSr_res, rescount)) == (int)NULL)
639 // book#s specified, apply book-level scope here
640 if (targets[n]->book_list().numItems() > 0 && res->length() > 0) {
641 Dict<UAS_String, int> bookid_dict(NULL, False);
642 UAS_ObjList<int> &books = targets[n]->book_list();
643 // register bookids in Dict<UAS_String, int>
644 for (int i = 0; i < books.numItems(); i++) {
645 UAS_Pointer<UAS_Common> bcase(bookcases[index]->bcase());
646 UAS_String bookid = resolve_bookid(bcase, books[i]);
647 if ((char*)bookid == (int)NULL || *(char*)bookid == '\0')
650 bookid_dict[bookid] = True;
652 for (i = 0; i < res->length(); i++) {
654 // all these temporary variables are needed to get this
655 // code to compile on novell
657 UAS_List<UAS_SearchResultsEntry> * temp_lst = res;
659 UAS_Pointer<UAS_SearchResultsEntry> tmp_sre = temp_lst->item(i);
661 UAS_String temp_id = tmp_sre->id();
663 UAS_Pointer<UAS_Common> uas_book =
664 UAS_Common::create(temp_id);
666 while (uas_book->type() != UAS_BOOK)
667 uas_book = uas_book->parent();
669 UAS_String uas_book_id = uas_book->id();
671 if (bookid_dict[uas_book_id] == False)
672 res->set_item(NULL, i);
675 for (i = 0; i < res->length(); i++) {
676 if (res->item(i) == (int)NULL)
677 res->remove_item(i--);
681 // take over stems from DtSR_BookcaseSearchEntry
682 UAS_Pointer<DtSR_Stems> stems = bookcases[index]->takeover_stems();
684 UAS_Pointer<UAS_String> q = new UAS_String(oql);
685 UAS_Pointer<UAS_String> n = new UAS_String(scope.name());
687 UAS_Pointer<DtSR_SearchResults> DtSR_res =
688 new DtSR_SearchResults(q, n, res, res->length(),
689 stems, scope.search_zones(), stype);
691 if (DtSR_result == (int)NULL)
692 DtSR_result = DtSR_res;
693 else // merge uas_res into result
694 DtSR_result->merge(DtSR_res);
697 if (DtSR_result == (int)NULL)
698 UAS_result = new UAS_SearchResults(new UAS_String(oql),
699 new UAS_String(scope.name()));
702 if (UAS_result == (int)NULL) {
703 assert( DtSR_result != (int)NULL );
704 UAS_result = (UAS_SearchResults*)(DtSR_SearchResults*)DtSR_result;
707 assert( UAS_result != (int)NULL );
709 if (current_bc != 0) {
710 targets.remove(current_bc);
717 UAS_Pointer<UAS_List<UAS_SearchResultsEntry> >
718 DtSR_SearchEngine::compress_DtSrResult(DtSrResult*& res, long& count)
720 UAS_Pointer<UAS_List<UAS_SearchResultsEntry> >
721 result_list = new UAS_List<UAS_SearchResultsEntry>;
725 assert( count == 0 );
726 return result_list; // return empty list
728 else if (! count) { // should never enter here
729 DtSearchFreeResults(&res);
730 assert( res == NULL );
731 return result_list; // return empty list
734 Dict<UAS_String, UAS_Pointer<DtSR_SearchResultsEntry> >
737 DtSrResult* iter = res;
738 for (iter = res; iter; iter = iter->link) {
739 // may we change abstract in DtSrResult before free it?
740 char* abstract = iter->abstractp;
741 UAS_Pointer<UAS_String> id, book, section;
742 if (DtSR_SearchResultsEntry::
743 parse_abstract(abstract, id, book, section) == (int)NULL) {
745 fprintf(stderr, "parse_abstract failed\n");
750 UAS_String Id(*(UAS_String*)id);
751 UAS_Pointer<DtSR_SearchResultsEntry>& sre = map[Id];
752 if (sre == (int)NULL) { // not found in map, create one
754 cerr << "(DEBUG) " << (char*)Id << ' ' <<
755 "not found in map" << '\n' << flush;
757 sre = new DtSR_SearchResultsEntry(
758 (char*)*(UAS_String*)id,
759 (char*)*(UAS_String*)book,
760 (char*)*(UAS_String*)section,
761 iter->dbn, iter->language, NULL);
764 if (sre->set_proximity(DtSR_SearchZones::keytype2zone(*iter->reckey),
765 iter->proximity) >= 0) {
767 fprintf(stderr, "(DEBUG) keytype=%c, proximity=%d\n", *iter->reckey,
768 sre->get_proximity(DtSR_SearchZones::
769 keytype2zone(*iter->reckey)));
772 else if (sre->overlay_proximity
773 (DtSR_SearchZones::keytype2zone(*iter->reckey),
774 iter->proximity) >= 0) {
776 fprintf(stderr, "(DEBUG) keytype=%c, overlayed proximity=%d\n", *iter->reckey,
777 sre->get_proximity(DtSR_SearchZones::
778 keytype2zone(*iter->reckey)));
783 fprintf(stderr, "(ERROR) set_proximity failed\n");
786 // NOTE: need to delete sre here
791 assert( map.size() > 0 );
796 DictIter<UAS_String, UAS_Pointer<DtSR_SearchResultsEntry> > mapiter;
797 for (mapiter = map.first(); mapiter(); mapiter++) {
798 mapiter.value()->relevance(); // initialize relevance
799 // NASTY code to convert UAS_Pointer<DtSR_SearchResultsEntry>
800 // to UAS_Pointer<UAS_SearchResultsEntry>
801 DtSR_SearchResultsEntry *Cptr_DtSR_sre = mapiter.value();
802 UAS_SearchResultsEntry *Cptr_UAS_sre = Cptr_DtSR_sre;
803 UAS_Pointer<UAS_SearchResultsEntry> uas_res(Cptr_UAS_sre);
804 result_list->insert_item(uas_res);
807 DtSR_SearchResults::sort(result_list);
809 DtSearchFreeResults(&res);
813 fprintf(stderr, "(DEBUG) # of results = %d\n", result_list->length());
820 UAS_Pointer<UAS_String>
821 DtSR_SearchEngine::db_name(int n)
823 if (n < 0 || n >= f_dbcount)
826 UAS_Pointer<UAS_String> rval = new UAS_String(f_dbnames[n]);
833 DtSR_SearchEngine::char_db_name(int n)
835 if (n < 0 || n >= f_dbcount) {