2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
24 * COMPONENT_NAME: austext
26 * FUNCTIONS: discard_to_ETX
33 * (C) COPYRIGHT International Business Machines Corp. 1996
35 * Licensed Materials - Property of IBM
36 * US Government Users Restricted Rights - Use, duplication or
37 * disclosure restricted by GSA ADP Schedule Contract with IBM Corp.
39 /************************ READCHAR.C *******************************
40 * $XConsortium: readchar.c /main/3 1996/05/07 13:47:58 drk $
42 * Character reading cofunctions for language parsers.
45 * Revision 1.5 1996/03/25 17:01:19 miker
46 * Clean up compiler warning.
48 * Revision 1.4 1996/03/13 22:59:39 miker
49 * Added prolog. Changed char to UCHAR several places.
51 * Revision 1.3 1996/03/05 18:39:34 miker
52 * Make *all* char ptrs unsigned.
54 * Revision 1.2 1996/03/05 18:08:03 miker
55 * Readchar functions return unsigned chars for compatibility
56 * with compilers whose default char type is signed.
58 * Revision 1.1 1996/02/01 19:20:39 miker
65 #define PROGNAME "READCHAR"
67 /************************************************/
71 /************************************************/
72 /* Generic readchar cofunction for parsers when the
73 * text block is a string. Used for example when
76 UCHAR readchar_string (UCHAR *the_string)
78 static UCHAR *strp = (UCHAR *) "";
81 return ((*strp)? *strp++ : 0);
85 /************************************************/
89 /************************************************/
90 /* Called when dtsrload or dtsrindex wants to skip to next
91 * .fzk record by reading and discarding all text to either
92 * end of record marker or end of file.
93 * Usually called after some error condition in the .fzk file,
94 * such as recid not found in database, or when blobs not
95 * used in dtsrload so they can be discarded.
96 * Wraps around readchar_ftext(), which does the actual read
97 * and checks for ETX with a read-ahead buffer.
99 void discard_to_ETX (PARG *parg)
101 if (!readchar_ftext (parg))
103 while (readchar_ftext (NULL))
106 } /* discard_to_ETX() */
109 /************************************************/
113 /************************************************/
114 /* Returns next char in a text file. Called in 2 different situations:
115 * It's a character reader cofunction called from
116 * linguistic parser functions for supported languages.
117 * It's also called directly from discard_to_ETX() in offline
118 * build tools for *all* languages when for some reason the
119 * current record being indexed must be discarded all the way
120 * to end of text block (ETX).
121 * ETX is when etxdelim string detected, or at end-of-file.
123 * The first call, which passes parg, is a reset trigger
124 * to clear ETX. The globals are then set and used in
125 * subsequent calls (passing NULL). This technique is
126 * used because it will be called many times in a time
127 * critical loop while indexing.
129 * Returns '\0' at ETX, and keeps returning '\0'
130 * without further reads until the ETX flag is reset.
131 * Returns '\0' forever at end-of-file.
133 UCHAR readchar_ftext (PARG *parg)
135 static FILE *ftext = NULL;
136 static UCHAR *etxdelim = NULL;
137 static UCHAR *rabuf = NULL;
138 static int ETX_flag = TRUE;
139 static int delimsz = 0;
140 static int bufcount = 0;
142 static UCHAR *head, *tail, *cptr, *rabufend;
144 /* I'm always going to read ahead just enough chars
145 * to test the delim string. The string is expected
146 * to be small, typically just a few chars.
147 * (A single \0 char indicates there is no record
148 * delimiter--record ends only at end of file.)
149 * I use a circular read ahead buffer with head and tail ptrs.
150 * Bufcount contains current number of chars in buf.
151 * Head is next file read point, ahead of youngest char in buf.
152 * Tail is next char to return, ie oldest char in buf.
165 etxdelim = (UCHAR *) strdup (parg->etxdelim);
167 delimsz = (etxdelim)? strlen((char*) etxdelim) : 0;
169 rabuf = austext_malloc (MAX_ETXDELIM + 2, PROGNAME"479", NULL);
170 rabufend = rabuf + MAX_ETXDELIM;
172 if (delimsz >= MAX_ETXDELIM) {
173 fprintf (aa_stderr, PROGNAME"505 Record delimiter too long.\n");
183 /* Read chars into read ahead buf until we
184 * have enough to compare for etxdelim.
185 * If possible, always read in at least one char.
187 while (bufcount == 0 || bufcount < delimsz) {
190 if ((i = fgetc (ftext)) == EOF)
194 if (head >= rabufend)
198 /* There are now 3 possibilities.
199 * (1) If bufcount == 0 we got EOF and there
200 * are no chars remaining in buffer, quit now.
201 * (2) Most likely bufcount is nonzero and equals delimsz.
202 * Do a wrap-around strcmp looking for delim string.
203 * (3) If bufcount is positive but less than delimsz,
204 * we got EOF before the last record ended.
205 * We'll fall through to the code that returns the next
206 * char in the buffer, returning the remaining chars one
207 * at a time until exhausted.
208 * Note this sequence also handles the case where delimsz == 0.
215 /* Compare chars in read ahead buf for delim string.
216 * (Note that if the compare succeeds, both bufcount
217 * and delimsz must be > 0).
219 if (bufcount == delimsz) {
221 for (i = 0; i < delimsz; i++) {
222 if (etxdelim[i] != *cptr)
225 if (cptr >= rabufend)
234 /* No ETX yet. Return the oldest char in read ahead buffer. */
236 if (tail >= rabufend)
240 } /* readchar_ftext */
242 /*************************** READCHAR.C ****************************/