2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
26 * FUNCTIONS: discard_to_ETX
33 * (C) COPYRIGHT International Business Machines Corp. 1996
35 * Licensed Materials - Property of IBM
36 * US Government Users Restricted Rights - Use, duplication or
37 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
39 /************************ READCHAR.C *******************************
40 * $XConsortium: readchar.c /main/3 1996/05/07 13:47:58 drk $
42 * Character reading cofunctions for language parsers.
45 * Revision 1.5 1996/03/25 17:01:19 miker
46 * Clean up compiler warning.
48 * Revision 1.4 1996/03/13 22:59:39 miker
49 * Added prolog. Changed char to UCHAR several places.
51 * Revision 1.3 1996/03/05 18:39:34 miker
52 * Make *all* char ptrs unsigned.
54 * Revision 1.2 1996/03/05 18:08:03 miker
55 * Readchar functions return unsigned chars for compatibility
56 * with compilers whose default char type is signed.
58 * Revision 1.1 1996/02/01 19:20:39 miker
64 #define PROGNAME "READCHAR"
66 /************************************************/
70 /************************************************/
71 /* Generic readchar cofunction for parsers when the
72 * text block is a string. Used for example when
75 UCHAR readchar_string (UCHAR *the_string)
77 static UCHAR *strp = (UCHAR *) "";
80 return ((*strp)? *strp++ : 0);
84 /************************************************/
88 /************************************************/
89 /* Called when dtsrload or dtsrindex wants to skip to next
90 * .fzk record by reading and discarding all text to either
91 * end of record marker or end of file.
92 * Usually called after some error condition in the .fzk file,
93 * such as recid not found in database, or when blobs not
94 * used in dtsrload so they can be discarded.
95 * Wraps around readchar_ftext(), which does the actual read
96 * and checks for ETX with a read-ahead buffer.
98 void discard_to_ETX (PARG *parg)
100 if (!readchar_ftext (parg))
102 while (readchar_ftext (NULL))
105 } /* discard_to_ETX() */
108 /************************************************/
112 /************************************************/
113 /* Returns next char in a text file. Called in 2 different situations:
114 * It's a character reader cofunction called from
115 * linguistic parser functions for supported languages.
116 * It's also called directly from discard_to_ETX() in offline
117 * build tools for *all* languages when for some reason the
118 * current record being indexed must be discarded all the way
119 * to end of text block (ETX).
120 * ETX is when etxdelim string detected, or at end-of-file.
122 * The first call, which passes parg, is a reset trigger
123 * to clear ETX. The globals are then set and used in
124 * subsequent calls (passing NULL). This technique is
125 * used because it will be called many times in a time
126 * critical loop while indexing.
128 * Returns '\0' at ETX, and keeps returning '\0'
129 * without further reads until the ETX flag is reset.
130 * Returns '\0' forever at end-of-file.
132 UCHAR readchar_ftext (PARG *parg)
134 static FILE *ftext = NULL;
135 static UCHAR *etxdelim = NULL;
136 static UCHAR *rabuf = NULL;
137 static int ETX_flag = TRUE;
138 static int delimsz = 0;
139 static int bufcount = 0;
141 static UCHAR *head, *tail, *cptr, *rabufend;
143 /* I'm always going to read ahead just enough chars
144 * to test the delim string. The string is expected
145 * to be small, typically just a few chars.
146 * (A single \0 char indicates there is no record
147 * delimiter--record ends only at end of file.)
148 * I use a circular read ahead buffer with head and tail ptrs.
149 * Bufcount contains current number of chars in buf.
150 * Head is next file read point, ahead of youngest char in buf.
151 * Tail is next char to return, ie oldest char in buf.
164 etxdelim = (UCHAR *) strdup (parg->etxdelim);
166 delimsz = (etxdelim)? strlen((char*) etxdelim) : 0;
168 rabuf = austext_malloc (MAX_ETXDELIM + 2, PROGNAME"479", NULL);
169 rabufend = rabuf + MAX_ETXDELIM;
171 if (delimsz >= MAX_ETXDELIM) {
172 fprintf (aa_stderr, PROGNAME"505 Record delimiter too long.\n");
182 /* Read chars into read ahead buf until we
183 * have enough to compare for etxdelim.
184 * If possible, always read in at least one char.
186 while (bufcount == 0 || bufcount < delimsz) {
189 if ((i = fgetc (ftext)) == EOF)
193 if (head >= rabufend)
197 /* There are now 3 possibilities.
198 * (1) If bufcount == 0 we got EOF and there
199 * are no chars remaining in buffer, quit now.
200 * (2) Most likely bufcount is nonzero and equals delimsz.
201 * Do a wrap-around strcmp looking for delim string.
202 * (3) If bufcount is positive but less than delimsz,
203 * we got EOF before the last record ended.
204 * We'll fall through to the code that returns the next
205 * char in the buffer, returning the remaining chars one
206 * at a time until exhausted.
207 * Note this sequence also handles the case where delimsz == 0.
214 /* Compare chars in read ahead buf for delim string.
215 * (Note that if the compare succeeds, both bufcount
216 * and delimsz must be > 0).
218 if (bufcount == delimsz) {
220 for (i = 0; i < delimsz; i++) {
221 if (etxdelim[i] != *cptr)
224 if (cptr >= rabufend)
233 /* No ETX yet. Return the oldest char in read ahead buffer. */
235 if (tail >= rabufend)
239 } /* readchar_ftext */
241 /*************************** READCHAR.C ****************************/