2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 // $XConsortium: TextParser.cc /main/4 1996/06/11 17:41:43 cde-hal $
24 /* Copyright (c) 1995,1996 FUJITSU LIMITED */
25 /* All Rights Reserved */
32 #include <strstream.h>
34 #include "TextParser.hh"
44 StringParser::brute_force(const char* text_in, int n_of_pats,
45 const char* patterns, int sensitive)
47 if (text_in == NULL || *text_in == '\0')
49 else if (patterns == NULL || *patterns == '\0')
55 char** pat_tbl = new char*[n_of_pats + 1];
56 char** pat_tbl_end = pat_tbl + n_of_pats;
59 for (npat = 0; *patterns && n_of_pats > 0; npat++, n_of_pats--) {
60 char* del = strchr(patterns, '\n');
61 if (del != NULL) { // more pattern specified
62 pat_tbl[npat] = new char[del - patterns + 1];
63 strncpy(pat_tbl[npat], patterns, del - patterns);
64 pat_tbl[npat][del - patterns] = '\0';
68 pat_tbl[npat] = new char[strlen(patterns) + 1];
69 strcpy(pat_tbl[npat], patterns);
70 patterns += strlen(patterns);
71 assert( *patterns == '\0' );
74 pat_tbl[npat] = NULL; // pointer table terminated
76 assert( npat > 0 ); // at least one pattern available here
80 fprintf(stderr, "(WARNING) more patterns available than specified\n");
82 fprintf(stderr, "(WARNING) less patterns available than specified\n");
85 int text_len = strlen(text_in);
87 // remove null and too long patterns
89 for (i = 0 ; pat_tbl[i]; i++) {
90 if (*(pat_tbl[i]) == '\0' || text_len < strlen(pat_tbl[i])) {
99 for (cursor = pat_tbl; cursor < pat_tbl_end; cursor++) {
102 char** p = cursor + 1;
103 for (; p < pat_tbl_end; p++) {
106 if (strcmp(*cursor, *p) == 0)
109 if (p < pat_tbl_end) { // same pattern found
118 for (free_slot = pat_tbl; *free_slot; free_slot++);
119 if (free_slot < pat_tbl_end) { // there is a free slot
121 for (i = 0; i < npat; i++, cursor++) {
123 for (; *cursor == NULL && cursor < pat_tbl_end; cursor++);
124 assert( cursor < pat_tbl_end );
125 if (free_slot && free_slot < cursor) {
126 *free_slot = *cursor;
128 // find next available free slot
130 for (; *free_slot; free_slot++);
131 if (free_slot == pat_tbl_end)
140 if (npat == 0) { // there is no effective patterns after all
146 fprintf(stderr, "(DEBUG) %d effective patterns=", npat);
147 for (int k = 0; pat_tbl[k]; k++) {
148 fprintf(stderr, "\"%s\" ", pat_tbl[k]);
150 fprintf(stderr, "\n");
153 char* caped_text = NULL;
155 if (sensitive == False) { // case-insensitive search
157 for (int i = 0; i < npat; i++) {
158 for (p = (unsigned char*)pat_tbl[i]; *p; p++) {
159 if (*p < 0x7B && *p > 0x60) // a ... z
163 ostrstream capitalized;
164 for (p = (unsigned char*)text_in; *p; p++) {
165 if (*p < 0x7B && *p > 0x60) // a ... z
166 capitalized << (char)(*p - 0x20); // capitalize
170 text_in = caped_text = capitalized.str();
171 *(char*)(text_in + capitalized.pcount()) = '\0';
176 for (int index = 0; index < text_len;) {
177 unsigned int candidate = (1 << npat) - 1;
178 unsigned int success = 0;
180 for (i = index, j = 0 ; i < text_len + 1 && candidate; i++, j++) {
181 for (int n = 0; n < npat; n++) {
182 unsigned int mask = 1;
183 mask = mask << (npat - 1 - n);
184 if (candidate & mask) { // still candidate
185 if (pat_tbl[n][j] == '\0') {
190 else if (pat_tbl[n][j] != text_in[i]) {
201 if (success) { // matched
202 // select the longest one
203 #ifdef SETECT_LONGEST
206 for (int n = 0; success > 0; success /= 0x02, n++) {
207 if (success & 0x01) {
208 #ifdef SETECT_LONGEST
212 if (strlen(pat_tbl[nth])
213 < strlen(pat_tbl[npat - 1 - n])) {
218 text_run << index << '\t' <<
219 strlen(pat_tbl[npat - 1 - n]) << '\n';
223 #ifdef SETECT_LONGEST
224 text_run << index << '\t' << strlen(pat_tbl[nth]) << '\n';
228 index += mblen(text_in + index, MB_CUR_MAX);
231 for (i = 0; i < npat; i++)
238 char* ret_text = text_run.str();
240 if (ret_text == NULL)
242 else if (*ret_text == '\0') {
252 StringParser::project_textrun(const char* org_textrun)
254 if (org_textrun == NULL || *org_textrun == '\0')
257 istrstream textrun(org_textrun);
260 textrun.get(line, 128, '\n');
261 if (textrun.get() != '\n')
264 char *offstr, *lenstr;
267 if ((lenstr = strchr(line, '\t')) == NULL)
274 if (off < 0 || len <= 0) {
276 fprintf(stderr, "(ERROR) either off=%d or len=%d is invalid\n",
284 while (textrun.get(line, 128, '\n')) {
285 if (textrun.get() != '\n') {
287 fprintf(stderr, "(ERROR) line is not followed by newline\n");
292 int next_off, next_len;
294 if ((lenstr = strchr(line, '\t')) == NULL) {
296 fprintf(stderr, "(ERROR) tab chatacter not found in \"%s\"\n", line);
301 next_off = atoi(offstr);
302 next_len = atoi(lenstr);
303 if (next_off < off || next_len <= 0) {
305 fprintf(stderr, "(ERROR) either off=%d or length=%d is invalid\n",
311 if (next_off <= off + len) { // overlap detected
312 if (off + len < next_off + next_len)
313 len = next_off + next_len - off; // merge
316 ret_text << off << '\t' << len << '\n';
322 ret_text << off << '\t' << len << '\n' << '\0';
324 return ret_text.str();
328 StringParser::hilite(const char* text, int n, const char* pats)
330 char* textrun = brute_force(text, n, pats);
335 char* prjed_textrun = project_textrun(textrun);
338 return prjed_textrun;