2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
34 * (C) COPYRIGHT International Business Machines Corp. 1992,1995
36 * Licensed Materials - Property of IBM
37 * US Government Users Restricted Rights - Use, duplication or
38 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
40 /************************* BMSTRSTR.C ****************************
41 * $TOG: bmstrstr.c /main/6 1998/04/17 11:25:23 mgreess $
42 * Original module named fastsearch.c
43 * and included colocation string search functions.
44 * Modification of Boyer-Moore-Horspool algorithm,
45 * Sec 10.5.2 of Information Retrieval, Frakes and Baeza-Yates, editors.
46 * Provides a generalized boyer-moore
47 * strstr() function. The table used in the BMH algorithm is
48 * generated in a separate function to improve efficiency when
49 * looking for the same substring pattern in multiple text strings.
50 * The 'length' arguments can be passed if known, or passed as
51 * strlen(xxx) if not known. HOWEVER the string arrays MUST be at
52 * least 1 char larger then strlen() says ('cause we insert a \0).
53 * This whole thing has been coded for SPEED!
56 * Revision 2.2 1995/10/26 15:37:42 miker
59 * Revision 2.1 1995/09/22 19:10:39 miker
60 * Freeze DtSearch 0.1, AusText 2.1.8
62 * Revision 1.2 1995/08/31 22:12:05 miker
63 * Minor changes for DtSearch.
68 #include <sys/param.h>
71 #define __strcmp strcmp
74 /*********#define TEST_BMSTRSTR**************/
76 /****************************************/
80 /****************************************/
81 /* Builds the table for search substring 'pattern'
82 * used by the BMH core search algorithm.
83 * 'patlen' is the string length of 'pattern'.
84 * The caller defines and passes 'bmhtable',
85 * an array of long integers (size_t) of size MAX_BMHTAB.
86 * This function initializes bmhtable for later search call.
89 unsigned char *pattern,
95 for (k = 0; k < MAX_BMHTAB; k++)
98 for (k = 0; k < patlen; k++)
99 bmhtable[pattern[k]] = (patlen - k);
101 } /* bmhtable_build() */
104 /****************************************/
108 /****************************************/
109 /* Performs 'core' BMH search after bmhtable is built.
110 * Returns ptr to first occurrence of pattern in text or NULL.
111 * WARNING! IF EITHER txtlen OR patlen <= 0, THIS FUNCTION WILL CRASH!!!
112 * Pattern and patlen MUST BE identical with those used to build
119 unsigned char *pattern,
123 register unsigned char
124 lastchar = pattern[patlen - 1];
125 register unsigned char
127 register unsigned char
129 register unsigned char
136 /* Terminate pattern with a char we KNOW is not in text.
137 * Note that this requires string to have room for \0 at end.
139 savechar = pattern[patlen];
140 pattern[patlen] = '\0';
142 last = text + txtlen;
143 for (cp = text + patlen - 1; cp < last; cp += bmhtable[textchar]) {
145 * Check if last character matches. If it doesn't, no need
146 * to check any further.
148 if ((textchar = *cp) != lastchar)
152 if (!__strcmp ((char *) (cp + 1 - patlen), (char *) pattern))
153 result = cp + 1 - patlen;
158 pattern[patlen] = savechar; /* restore last char */
159 return (char *) result;
163 /****************************************/
167 /****************************************/
168 /* Search in text [1..txtlen] for pattern [1..patlen].
169 * Returns ptr to first occurrence of pattern, or NULL.
174 unsigned char *pattern,
177 size_t bmhtable[MAX_BMHTAB];
179 bmhtable_build (pattern, patlen, bmhtable);
180 return bmhcore (text, txtlen, pattern, patlen, bmhtable);
184 #ifdef TEST_BMSTRSTR /* for test only */
185 #include <sys/stat.h>
186 /****************************************/
190 /****************************************/
191 /* tests bmstrstr() against standard strstr() on a specified file */
197 char fname[BUFSIZ+1];
198 char pattern[MAXPATHLEN+1];
199 char *readbuf = NULL;
203 printf ("\nEnter a filename (Ctrl-C quits) > ");
206 fgets (fname, sizeof(fname), stdin);
207 if (strlen(fname) && fname[strlen(fname)-1] == '\n')
208 fname[strlen(fname)-1] = '\0';
210 if ((f = fopen (fname, "r")) == NULL) {
211 printf ("Can't open '%s': %s\n", fname, strerror (errno));
215 fstat (fileno (f), &statbuf);
216 if (fsize > statbuf.st_size) {
220 fsize = statbuf.st_size;
222 readbuf = malloc (fsize + 64L);
224 fread (readbuf, fsize, 1L, f);
227 printf ("Enter a search pattern > ");
230 fgets (pattern, sizeof(pattern), stdin);
231 if (strlen(pattern) && pattern[strlen(pattern)-1] == '\n')
232 pattern[strlen(pattern)-1] = '\0';
234 ptr = bmstrstr (readbuf, fsize, pattern, strlen (pattern));
236 puts ("bmstrstr: Pattern not found.");
238 printf ("bmstrstr: Pattern found at offset %ld.\n", ptr - readbuf);
240 ptr = strstr (readbuf, pattern);
242 puts ("strstr: Pattern not found.");
244 printf ("strstr: Pattern found at offset %ld.\n", ptr - readbuf);
247 } /* main() test program */
251 /************************* BMSTRSTR.C ****************************/