2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 // $TOG: DtSR_SearchResultsEntry.C /main/13 1998/04/17 11:42:06 mgreess $
24 /* Copyright (c) 1995,1996 FUJITSU LIMITED */
25 /* All Rights Reserved */
33 #include <StyleSheet/DocParser.h>
34 #include <StyleSheet/Resolver.h>
35 #include <StyleSheet/StyleSheet.h>
36 #include "Tml_TextRenderer.hh"
39 #include "Basic/Error.hh"
40 #include "Basic/FolioObject.hh"
41 #include "Basic/List.hh"
42 #include "Basic/Long_Lived.hh"
43 #include "Managers/MessageMgr.hh"
44 #include "Managers/StyleSheetMgr.hh"
46 #include "DtSR_SearchResultsEntry.hh"
47 #include "DtSR_SearchEngine.hh"
49 #include "TextParser.hh"
51 #include "Other/XmStringLocalized.hh"
52 #include "Managers/CatMgr.hh"
61 DtSR_SearchResultsEntry::DtSR_SearchResultsEntry(const char* id,
64 int dbn, short language,
65 UAS_Pointer<DtSR_SearchResults>)
66 : UAS_SearchResultsEntry(id, book, section, Inv_Relevance),
67 f_dbn(dbn), f_language(language), f_zone(0)
70 for (i=0; i<=UAS_SearchZones::uas_all; i++)
73 UAS_String url("mmdb:LOCATOR=");
75 UAS_Pointer<UAS_Common> sec = UAS_Common::create(url);
77 f_id = sec->locator();
80 DtSR_SearchResultsEntry::~DtSR_SearchResultsEntry()
84 UAS_Pointer<UAS_List<UAS_TextRun> >
85 DtSR_SearchResultsEntry::matches() const
88 // preserve logical constness
89 ((DtSR_SearchResultsEntry*)this)->f_matches
90 = ((DtSR_SearchResultsEntry*)this)->create_matches();
97 DtSR_SearchResultsEntry::unset_proximity(DtSR_SearchZones::uas_zones uas_zone)
99 if (uas_zone > UAS_SearchZones::uas_all) { // range error
102 f_zone &= ~(0x01<<uas_zone);
103 return f_proximity[uas_zone] = 0;
107 DtSR_SearchResultsEntry::set_proximity(DtSR_SearchZones::uas_zones uas_zone,
110 if (uas_zone > UAS_SearchZones::uas_all) { // range error
113 else if (f_zone & 0x01<<uas_zone) { // already set, reject
117 f_zone |= 0x01<<uas_zone;
119 return f_proximity[uas_zone] = proximity;
123 DtSR_SearchResultsEntry::overlay_proximity(DtSR_SearchZones::uas_zones uas_zone,
126 if (uas_zone > UAS_SearchZones::uas_all) // range error
129 if (f_zone & 0x01<<uas_zone == 0)
130 return set_proximity(uas_zone, proximity);
132 return f_proximity[uas_zone] = (f_proximity[uas_zone] + proximity) / 2;
136 DtSR_SearchResultsEntry::get_proximity(DtSR_SearchZones::uas_zones uas_zone) const
138 if (uas_zone > UAS_SearchZones::uas_all) { // range error
141 else if ((f_zone & 0x01<<uas_zone) == 0) { // proximity not set yet
145 return f_proximity[uas_zone];
148 UAS_Pointer<UAS_String>
149 DtSR_SearchResultsEntry::parse_abstract(const char* abs,
150 UAS_Pointer<UAS_String> &id, UAS_Pointer<UAS_String> &book,
151 UAS_Pointer<UAS_String> §ion)
154 char* abstract = (char *)abs; // logical constness
156 assert( abstract && *abstract );
158 if (abstract == NULL || *abstract == '\0')
161 char *p = abstract, *head = p;
162 if ((p = strchr(p, '\t')) == NULL) {
167 id = new UAS_String(head);
168 *p++ = '\t'; // essential to maintain logical constness
171 if (*p == '\0' || (p = strchr(p, '\t')) == NULL) {
176 book = new UAS_String(head);
177 *p++ = '\t'; // essential to maintain logical constness
179 section = new UAS_String(p);
184 fprintf(stderr, "id=%s, book=%s, section=%s\n", (char*)*(UAS_String*)id,
185 (char*)*(UAS_String*)book, (char*)*(UAS_String*)section);
192 DtSR_SearchResultsEntry::section_in_abstract(char* abstract, const char* id)
194 if (abstract == NULL || *abstract == '\0' || id == NULL || *id == '\0')
197 UAS_Pointer<UAS_String> section_id, book, section;
199 if (parse_abstract(abstract, section_id, book, section) == (const int)NULL) {
201 fprintf(stderr, "(ERROR) parse_abstract failed\n");
206 if (strcmp(id, (char*)*(UAS_String*)section_id))
212 static unsigned int proximity2relevance(int prox)
215 return DtSR_SearchResultsEntry::Utmost_Relevance;
219 if (prox == (int)INT_MAX)
222 inv_prox = 1 / (float)prox; // 0 to 1;
224 return inv_prox * DtSR_SearchResultsEntry::Utmost_Relevance;
228 DtSR_SearchResultsEntry::relevance()
230 if (f_relevance != Inv_Relevance)
234 if ((prox = get_proximity(DtSR_SearchZones::uas_all)) >= 0) {
235 return f_relevance = proximity2relevance(prox);
239 unsigned int relevance = 0;
240 for (int i=0; i<DtSR_SearchZones::uas_all; i++) {
241 if ((prox = get_proximity((DtSR_SearchZones::uas_zones) i)) >= 0) {
242 relevance += proximity2relevance(prox);
246 assert( nzones > 0 );
247 return f_relevance = relevance / nzones;
250 UAS_Pointer<UAS_List<UAS_TextRun> >
251 DtSR_SearchResultsEntry::create_matches()
255 fprintf(stderr, "(DEBUG) UAS_Common is being created from id=\"%s\"\n",
258 UAS_Pointer<UAS_Common> doc = UAS_Common::create(f_id);
263 "book_name=%s title=%s\n",
265 (char*)(doc->book_name()), (char*)(doc->title()));
270 ofstream out("OriginalText");
271 out << (char *) doc->data();
277 style_sheet_mgr().initOnlineStyleSheet(doc);
279 // catch_noarg (StyleSheetSyntaxError)
282 #ifdef JOE_HATES_THIS
283 message_mgr().error_dialog(
284 (char*)UAS_String(CATGETS(Set_Messages, 39, "File a Bug")));
286 throw(CASTEXCEPT Exception());
291 istringstream input((char *)doc->data());
292 ostringstream output;
296 Tml_TextRenderer renderer(output, f_search_res->search_zones());
297 Resolver resolver(*gPathTab, renderer);
298 DocParser docparser(resolver);
300 docparser.parse(input);
304 ON_DEBUG(cerr << "DtSR_SearchResultsEntry::create_matches...exception thrown" << '\n' << flush);
309 char* text = (char*)output.str().c_str();
310 *(text + output.str().size()) = '\0';
314 ofstream out("ParsedText");
320 fprintf(stderr, "(DEBUG) stems=0x%p, count=%d\n",
321 (char*)f_search_res->stems(f_dbn)->stems(),
322 f_search_res->stems(f_dbn)->count());
325 for (; n_of_stems < f_search_res->stems(f_dbn)->count(); n_of_stems++) {
326 fprintf(stderr, "(DEBUG) %dth stem = %s\n", n_of_stems,
327 (f_search_res->stems(f_dbn)->stems())[n_of_stems]);
331 int stype = f_search_res->search_type();
333 DtSrHitword* kwics = NULL;
336 char* parseout = NULL;
338 // hack! overwrite f_language, since austext's value is wrong
339 // In future, the next lines should be removed.
340 const char* lang = getenv("LANG");
341 if (lang && !strncmp(lang, "ja", strlen("ja")))
342 f_language = DtSrLaJPN;
344 f_language = DtSrLaENG;
346 if (f_language == DtSrLaJPN) { // do not trust DtSearchHighlight!
347 int count = f_search_res->stems(f_dbn)->count();
349 ostringstream stemsbuf;
350 for (int i = 0; i < count; i++) {
351 stemsbuf << (f_search_res->stems(f_dbn)->stems())[i] << '\n';
353 char* stems = (char*)stemsbuf.str().c_str();
354 *(stems + stemsbuf.str().size()) = '\0';
356 parseout = StringParser::hilite(text, count, stems);
358 assert( parseout != NULL );
364 static DtSR_SearchEngine& search_engine = DtSR_SearchEngine::search_engine();
365 if (DtSearchHighlight(
366 search_engine.char_db_name(f_dbn),
367 text, &kwics, &n_kwics, stype,
368 (char*)f_search_res->stems(f_dbn)->stems(),
369 f_search_res->stems(f_dbn)->count()) != DtSrOK) {
371 fprintf(stderr, "(ERROR) DtSearchHighlight failed\n");
378 fprintf(stderr, "(DEBUG) %ld hit found in %s\n", n_kwics, (char*)f_id);
382 UAS_Pointer<UAS_List<UAS_TextRun> >
383 matches = new UAS_List<UAS_TextRun>;
385 // convert kwics to textrun
386 if (parseout == NULL && kwics) {
387 ostringstream textrunbuf;
388 for (int i = 0; i < n_kwics; i++)
389 textrunbuf << kwics[i].offset << '\t' << kwics[i].length << '\n';
390 parseout = (char*)textrunbuf.str().c_str();
391 *(parseout + textrunbuf.str().size()) = '\0';
393 else if (parseout == NULL)
401 fprintf(stderr, "(DEBUG) byte offset and length\n%s", parseout);
404 istringstream textruns(parseout);
407 while (textruns.get(linebuf, 128, '\n')) {
409 textruns.get(newline);
410 assert( newline == '\n');
412 char* off_str = linebuf;
413 char* len_str = strchr(linebuf, '\t');
414 assert( len_str && *len_str == '\t' );
419 const char* cursor = (const char*)text;
420 assert( *cursor == ShiftIn || *cursor == ShiftOut );
422 int off = atoi(off_str);
428 if (*cursor == '\n' || *cursor == '\t' || *cursor == ' ' ||
429 *cursor == 0x0D || (unsigned char)*cursor == 0xA0) {
432 else if (*cursor == ShiftIn || *cursor == ShiftOut) {
433 if (*cursor == ShiftIn)
440 scanned = mblen(cursor, MB_CUR_MAX);
441 assert( scanned >= 0 );
454 int len = atoi(len_str);
455 // remove leading white-spaces
456 for (; len && (*cursor == ' ' || *cursor == '\t' ||
457 *cursor == '\n'|| *cursor == 0x0D); cursor++, len--);
459 // remove trailing white-spaces
461 for (const char* p = cursor + len - 1;
462 *p==' ' || *p=='\t' || *p=='\n' || *p==0x0D; p--, len--);
469 for (; len > 0; vlen++) {
470 int scanned = mblen(cursor, MB_CUR_MAX);
471 assert( scanned >= 0 );
476 UAS_Pointer<UAS_TextRun> textrun = new UAS_TextRun(vcc, vlen);
477 matches->insert_item(textrun);
488 UAS_Pointer<DtSR_SearchResults>
489 DtSR_SearchResultsEntry::search_result(UAS_Pointer<DtSR_SearchResults>& res)
491 UAS_Pointer<DtSR_SearchResults> rval = f_search_res;
499 DtSR_SearchResultsEntry::unreference()
503 UAS_Base::unreference();