2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 // $XConsortium: TextParser.cc /main/4 1996/06/11 17:41:43 cde-hal $
24 /* Copyright (c) 1995,1996 FUJITSU LIMITED */
25 /* All Rights Reserved */
35 #include "TextParser.hh"
45 StringParser::brute_force(const char* text_in, int n_of_pats,
46 const char* patterns, int sensitive)
48 if (text_in == NULL || *text_in == '\0')
50 else if (patterns == NULL || *patterns == '\0')
53 if (! (n_of_pats > 0))
56 char** pat_tbl = new char*[n_of_pats + 1];
57 char** pat_tbl_end = pat_tbl + n_of_pats;
60 for (npat = 0; *patterns && n_of_pats > 0; npat++, n_of_pats--) {
61 char* del = (char *)strchr(patterns, '\n');
62 if (del != NULL) { // more pattern specified
64 pat_tbl[npat] = new char[len + 1];
65 *((char *) memcpy(pat_tbl[npat], patterns, len) + len) = '\0';
69 int len = strlen(patterns);
70 pat_tbl[npat] = new char[len + 1];
71 *((char *) memcpy(pat_tbl[npat], patterns, len) + len) = '\0';
72 patterns += strlen(patterns);
73 assert( *patterns == '\0' );
76 pat_tbl[npat] = NULL; // pointer table terminated
78 assert( npat > 0 ); // at least one pattern available here
82 fprintf(stderr, "(WARNING) more patterns available than specified\n");
84 fprintf(stderr, "(WARNING) less patterns available than specified\n");
87 int text_len = strlen(text_in);
89 // remove null and too long patterns
91 for (i = 0 ; pat_tbl[i]; i++) {
92 if (*(pat_tbl[i]) == '\0' || text_len < (int) strlen(pat_tbl[i])) {
101 for (cursor = pat_tbl; cursor < pat_tbl_end; cursor++) {
104 char** p = cursor + 1;
105 for (; p < pat_tbl_end; p++) {
108 if (strcmp(*cursor, *p) == 0)
111 if (p < pat_tbl_end) { // same pattern found
120 for (free_slot = pat_tbl; *free_slot; free_slot++);
121 if (free_slot < pat_tbl_end) { // there is a free slot
123 for (i = 0; i < npat; i++, cursor++) {
125 for (; *cursor == NULL && cursor < pat_tbl_end; cursor++);
126 assert( cursor < pat_tbl_end );
127 if (free_slot && free_slot < cursor) {
128 *free_slot = *cursor;
130 // find next available free slot
132 for (; *free_slot; free_slot++);
133 if (free_slot == pat_tbl_end)
142 if (npat == 0) { // there is no effective patterns after all
148 fprintf(stderr, "(DEBUG) %d effective patterns=", npat);
149 for (int k = 0; pat_tbl[k]; k++) {
150 fprintf(stderr, "\"%s\" ", pat_tbl[k]);
152 fprintf(stderr, "\n");
155 char* caped_text = NULL;
158 if (sensitive == False) { // case-insensitive search
160 for (int i = 0; i < npat; i++) {
161 for (p = (unsigned char*)pat_tbl[i]; *p; p++) {
162 if (*p < 0x7B && *p > 0x60) // a ... z
166 ostringstream capitalized;
167 for (p = (unsigned char*)text_in; *p; p++) {
168 if (*p < 0x7B && *p > 0x60) // a ... z
169 capitalized << (char)(*p - 0x20); // capitalize
173 capstr = capitalized.str();
174 text_in = caped_text = (char *)capstr.c_str();
177 ostringstream text_run;
179 for (int index = 0; index < text_len;) {
180 unsigned int candidate = (1 << npat) - 1;
181 unsigned int success = 0;
183 for (i = index, j = 0 ; i < text_len + 1 && candidate; i++, j++) {
184 for (int n = 0; n < npat; n++) {
185 unsigned int mask = 1;
186 mask = mask << (npat - 1 - n);
187 if (candidate & mask) { // still candidate
188 if (pat_tbl[n][j] == '\0') {
193 else if (pat_tbl[n][j] != text_in[i]) {
204 if (success) { // matched
205 // select the longest one
206 #ifdef SETECT_LONGEST
209 for (int n = 0; success > 0; success /= 0x02, n++) {
210 if (success & 0x01) {
211 #ifdef SETECT_LONGEST
215 if (strlen(pat_tbl[nth])
216 < strlen(pat_tbl[npat - 1 - n])) {
221 text_run << index << '\t' <<
222 strlen(pat_tbl[npat - 1 - n]) << '\n';
226 #ifdef SETECT_LONGEST
227 text_run << index << '\t' << strlen(pat_tbl[nth]) << '\n';
231 index += mblen(text_in + index, MB_CUR_MAX);
234 for (i = 0; i < npat; i++)
241 static string trunstr = text_run.str();
246 return trunstr.c_str();
250 StringParser::project_textrun(const char* org_textrun)
252 if (org_textrun == NULL || *org_textrun == '\0')
255 istringstream textrun(org_textrun);
258 textrun.get(line, 128, '\n');
259 if (textrun.get() != '\n')
262 char *offstr, *lenstr;
265 if ((lenstr = strchr(line, '\t')) == NULL)
272 if (off < 0 || len <= 0) {
274 fprintf(stderr, "(ERROR) either off=%d or len=%d is invalid\n",
280 ostringstream ret_text;
282 while (textrun.get(line, 128, '\n')) {
283 if (textrun.get() != '\n') {
285 fprintf(stderr, "(ERROR) line is not followed by newline\n");
290 int next_off, next_len;
292 if ((lenstr = strchr(line, '\t')) == NULL) {
294 fprintf(stderr, "(ERROR) tab chatacter not found in \"%s\"\n", line);
299 next_off = atoi(offstr);
300 next_len = atoi(lenstr);
301 if (next_off < off || next_len <= 0) {
303 fprintf(stderr, "(ERROR) either off=%d or length=%d is invalid\n",
309 if (next_off <= off + len) { // overlap detected
310 if (off + len < next_off + next_len)
311 len = next_off + next_len - off; // merge
314 ret_text << off << '\t' << len << '\n';
320 ret_text << off << '\t' << len << '\n' << '\0';
322 static string rettstr = ret_text.str();
323 return rettstr.c_str();
327 StringParser::hilite(const char* text, int n, const char* pats)
329 const char* textrun = brute_force(text, n, pats);
334 return project_textrun(textrun);