2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 // $XConsortium: TextParser.cc /main/4 1996/06/11 17:41:43 cde-hal $
24 /* Copyright (c) 1995,1996 FUJITSU LIMITED */
25 /* All Rights Reserved */
35 #include "TextParser.hh"
45 StringParser::brute_force(const char* text_in, int n_of_pats,
46 const char* patterns, int sensitive)
48 if (text_in == NULL || *text_in == '\0')
50 else if (patterns == NULL || *patterns == '\0')
56 char** pat_tbl = new char*[n_of_pats + 1];
57 char** pat_tbl_end = pat_tbl + n_of_pats;
60 for (npat = 0; *patterns && n_of_pats > 0; npat++, n_of_pats--) {
61 char* del = (char *)strchr(patterns, '\n');
62 if (del != NULL) { // more pattern specified
63 pat_tbl[npat] = new char[del - patterns + 1];
64 strncpy(pat_tbl[npat], patterns, del - patterns);
65 pat_tbl[npat][del - patterns] = '\0';
69 pat_tbl[npat] = new char[strlen(patterns) + 1];
70 strcpy(pat_tbl[npat], patterns);
71 patterns += strlen(patterns);
72 assert( *patterns == '\0' );
75 pat_tbl[npat] = NULL; // pointer table terminated
77 assert( npat > 0 ); // at least one pattern available here
81 fprintf(stderr, "(WARNING) more patterns available than specified\n");
83 fprintf(stderr, "(WARNING) less patterns available than specified\n");
86 int text_len = strlen(text_in);
88 // remove null and too long patterns
90 for (i = 0 ; pat_tbl[i]; i++) {
91 if (*(pat_tbl[i]) == '\0' || text_len < strlen(pat_tbl[i])) {
100 for (cursor = pat_tbl; cursor < pat_tbl_end; cursor++) {
103 char** p = cursor + 1;
104 for (; p < pat_tbl_end; p++) {
107 if (strcmp(*cursor, *p) == 0)
110 if (p < pat_tbl_end) { // same pattern found
119 for (free_slot = pat_tbl; *free_slot; free_slot++);
120 if (free_slot < pat_tbl_end) { // there is a free slot
122 for (i = 0; i < npat; i++, cursor++) {
124 for (; *cursor == NULL && cursor < pat_tbl_end; cursor++);
125 assert( cursor < pat_tbl_end );
126 if (free_slot && free_slot < cursor) {
127 *free_slot = *cursor;
129 // find next available free slot
131 for (; *free_slot; free_slot++);
132 if (free_slot == pat_tbl_end)
141 if (npat == 0) { // there is no effective patterns after all
147 fprintf(stderr, "(DEBUG) %d effective patterns=", npat);
148 for (int k = 0; pat_tbl[k]; k++) {
149 fprintf(stderr, "\"%s\" ", pat_tbl[k]);
151 fprintf(stderr, "\n");
154 char* caped_text = NULL;
156 if (sensitive == False) { // case-insensitive search
158 for (int i = 0; i < npat; i++) {
159 for (p = (unsigned char*)pat_tbl[i]; *p; p++) {
160 if (*p < 0x7B && *p > 0x60) // a ... z
164 ostringstream capitalized;
165 for (p = (unsigned char*)text_in; *p; p++) {
166 if (*p < 0x7B && *p > 0x60) // a ... z
167 capitalized << (char)(*p - 0x20); // capitalize
171 text_in = caped_text = (char *)capitalized.str().c_str();
172 *(char*)(text_in + capitalized.str().size()) = '\0';
175 ostringstream text_run;
177 for (int index = 0; index < text_len;) {
178 unsigned int candidate = (1 << npat) - 1;
179 unsigned int success = 0;
181 for (i = index, j = 0 ; i < text_len + 1 && candidate; i++, j++) {
182 for (int n = 0; n < npat; n++) {
183 unsigned int mask = 1;
184 mask = mask << (npat - 1 - n);
185 if (candidate & mask) { // still candidate
186 if (pat_tbl[n][j] == '\0') {
191 else if (pat_tbl[n][j] != text_in[i]) {
202 if (success) { // matched
203 // select the longest one
204 #ifdef SETECT_LONGEST
207 for (int n = 0; success > 0; success /= 0x02, n++) {
208 if (success & 0x01) {
209 #ifdef SETECT_LONGEST
213 if (strlen(pat_tbl[nth])
214 < strlen(pat_tbl[npat - 1 - n])) {
219 text_run << index << '\t' <<
220 strlen(pat_tbl[npat - 1 - n]) << '\n';
224 #ifdef SETECT_LONGEST
225 text_run << index << '\t' << strlen(pat_tbl[nth]) << '\n';
229 index += mblen(text_in + index, MB_CUR_MAX);
232 for (i = 0; i < npat; i++)
239 char* ret_text = (char *)text_run.str().c_str();
241 if (ret_text == NULL)
243 else if (*ret_text == '\0') {
253 StringParser::project_textrun(const char* org_textrun)
255 if (org_textrun == NULL || *org_textrun == '\0')
258 istringstream textrun(org_textrun);
261 textrun.get(line, 128, '\n');
262 if (textrun.get() != '\n')
265 char *offstr, *lenstr;
268 if ((lenstr = strchr(line, '\t')) == NULL)
275 if (off < 0 || len <= 0) {
277 fprintf(stderr, "(ERROR) either off=%d or len=%d is invalid\n",
283 ostringstream ret_text;
285 while (textrun.get(line, 128, '\n')) {
286 if (textrun.get() != '\n') {
288 fprintf(stderr, "(ERROR) line is not followed by newline\n");
293 int next_off, next_len;
295 if ((lenstr = strchr(line, '\t')) == NULL) {
297 fprintf(stderr, "(ERROR) tab chatacter not found in \"%s\"\n", line);
302 next_off = atoi(offstr);
303 next_len = atoi(lenstr);
304 if (next_off < off || next_len <= 0) {
306 fprintf(stderr, "(ERROR) either off=%d or length=%d is invalid\n",
312 if (next_off <= off + len) { // overlap detected
313 if (off + len < next_off + next_len)
314 len = next_off + next_len - off; // merge
317 ret_text << off << '\t' << len << '\n';
323 ret_text << off << '\t' << len << '\n' << '\0';
325 return (char *)ret_text.str().c_str();
329 StringParser::hilite(const char* text, int n, const char* pats)
331 char* textrun = brute_force(text, n, pats);
336 char* prjed_textrun = project_textrun(textrun);
339 return prjed_textrun;