1 /* $XConsortium: traninit.c /main/1 1996/07/23 19:43:52 rws $ */
3 * Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
8 * Open Software Foundation, Inc.
10 * Permission is hereby granted to use, copy, modify and freely distribute
11 * the software in this file and its documentation for any purpose without
12 * fee, provided that the above copyright notice appears in all copies and
13 * that both the copyright notice and this permission notice appear in
14 * supporting documentation. Further, provided that the name of Open
15 * Software Foundation, Inc. ("OSF") not be used in advertising or
16 * publicity pertaining to distribution of the software without prior
17 * written permission from OSF. OSF makes no representations about the
18 * suitability of this software for any purpose. It is provided "as is"
19 * without express or implied warranty.
22 * Copyright (c) 1996 X Consortium
23 * Copyright (c) 1995, 1996 Dalrymple Consulting
25 * Permission is hereby granted, free of charge, to any person obtaining a copy
26 * of this software and associated documentation files (the "Software"), to deal
27 * in the Software without restriction, including without limitation the rights
28 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
29 * copies of the Software, and to permit persons to whom the Software is
30 * furnished to do so, subject to the following conditions:
32 * The above copyright notice and this permission notice shall be included in
33 * all copies or substantial portions of the Software.
35 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
36 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
37 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
38 * X CONSORTIUM OR DALRYMPLE CONSULTING BE LIABLE FOR ANY CLAIM, DAMAGES OR
39 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
40 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
41 * OTHER DEALINGS IN THE SOFTWARE.
43 * Except as contained in this notice, the names of the X Consortium and
44 * Dalrymple Consulting shall not be used in advertising or otherwise to
45 * promote the sale, use or other dealings in this Software without prior
46 * written authorization.
48 /* ________________________________________________________________________
50 * Program to manipulate SGML instances.
52 * This module contains the initialization routines for translation module.
53 * They mostly deal with reading data files (translation specs, SDATA
54 * mappings, character mappings).
57 * ReadTransSpec(transfile) read/store translation spec from file
58 * ReadSDATA(sdatafile) read/store SDATA mappings from file
59 * ReadMapping(mapfile) read/store char mappings from file
60 * ________________________________________________________________________
68 #include <sys/types.h>
71 #include <tptregexp.h>
73 #include "translate.h"
79 /* forward references */
80 void RememberTransSpec(Trans_t *, int);
82 /* ______________________________________________________________________ */
83 /* Read the translation specs from the input file, storing in memory.
85 * Name of translation spec file.
94 char buf[LINESIZE], *cp, *fn, *cp2;
98 if ((fp=OpenFile(transfile)) == NULL) {
99 fprintf(stderr, "Can not open translation spec file '%s'.\n%s\n",
100 transfile, strerror(errno));
104 memset(&T, 0, sizeof T); /* initialize/clear structure */
105 while (fgets(buf, LINESIZE, fp)) /* read line from .ts file */
108 /* skip comment and blank lines */
109 if (buf[0] == '#' || buf[0] == NL) continue;
111 /* '-' indicates end of a spec. When we hit one, remember what we've
112 * accumulated so far, and null-out the accumulating structure. */
115 RememberTransSpec(&T, lineno);
116 memset(&T, 0, sizeof T);
122 /* See if next line is continued from this one -- ie. it starts with
123 * whitespace. If so, append to current line. (This is similar to
124 * how e-mail headers work...) */
126 c = getc(fp); /* 1st char of next line */
127 if (IsWhite(c)) { /* space or tab? */
128 /* keep getting characters until it's a non-whitespace */
130 while (IsWhite(c)) c = getc(fp);
131 ungetc(c, fp); /* put back non-whitespace */
134 fn = buf + i; /* point to end of string in buffer */
135 fgets(fn, LINESIZE-i, fp); /* read and append to buf */
140 ungetc(c, fp); /* put back non-whitespace */
144 /* Isolate field value */
145 if ((cp=strchr(buf, ':'))) {
146 cp++; /* point past colon */
147 while (*cp && IsWhite(*cp)) cp++; /* point to content */
151 "Trans spec error, missing colon (skipping line):\n %s\n", fn);
154 fn = buf; /* fn is name of the field, cp the value. */
156 /* Check field names in order that they're likely to occur. */
157 if (!strncmp("GI:", fn, 3)) {
158 /* if we are folding the case of GIs, make all upper (unless
159 it's an internal pseudo-GI name, which starts with '_') */
160 if (fold_case && cp[0] != '_' && cp[0] != '#') {
161 for (cp2=cp; *cp2; cp2++)
162 if (islower(*cp2)) *cp2 = toupper(*cp2);
164 T.gi = AddElemName(cp);
166 else if (!strncmp("StartText:", fn, 10)) T.starttext = strdup(cp);
167 else if (!strncmp("EndText:", fn, 8)) T.endtext = strdup(cp);
168 else if (!strncmp("Relation:", fn, 9)) {
169 if (!T.relations) T.relations = NewMap(IMS_relations);
170 SetMapping(T.relations, cp);
172 else if (!strncmp("Replace:", fn, 8)) T.replace = strdup(cp);
173 else if (!strncmp("AttValue:", fn, 9)) {
175 Malloc(1, T.attpair, AttPair_t);
178 Realloc((T.nattpairs+1), T.attpair, AttPair_t);
179 /* we'll split name/value pairs later */
180 T.attpair[T.nattpairs].name = strdup(cp);
183 /* If there's only one item in context, it's the parent. Treat
184 * it specially, since it's easier to just check parent gi.
186 else if (!strncmp("Context:", fn, 8)) T.context = strdup(cp);
187 else if (!strncmp("Message:", fn, 8)) T.message = strdup(cp);
188 else if (!strncmp("SpecID:", fn, 7)) T.my_id = atoi(cp);
189 else if (!strncmp("Action:", fn, 7)) T.use_id = atoi(cp);
190 else if (!strncmp("Content:", fn, 8)) T.content = strdup(cp);
191 else if (!strncmp("PAttSet:", fn, 8)) T.pattrset = strdup(cp);
192 else if (!strncmp("Verbatim:", fn, 9)) T.verbatim = TRUE;
193 else if (!strncmp("Ignore:", fn, 7)) {
194 if (!strcmp(cp, "all")) T.ignore = IGN_ALL;
195 else if (!strcmp(cp, "data")) T.ignore = IGN_DATA;
196 else if (!strcmp(cp, "children")) T.ignore = IGN_CHILDREN;
198 fprintf(stderr, "Bad 'Ignore:' arg in transpec (line %d): %s\n",
201 else if (!strncmp("VarValue:", fn, 9)) {
204 tok = Split(cp, &i, S_STRDUP);
206 T.var_value = tok[1];
208 else if (!strncmp("VarREValue:", fn, 11)) {
211 tok = Split(cp, &i, S_STRDUP);
212 T.var_RE_name = tok[0];
213 ExpandVariables(tok[1], buf, 0);
214 if (!(T.var_RE_value=tpt_regcomp(buf))) {
215 fprintf(stderr, "Regex error in VarREValue Content: %s\n",
219 else if (!strncmp("Set:", fn, 4)) {
220 if (!T.set_var) T.set_var = NewMap(IMS_setvar);
221 SetMapping(T.set_var, cp);
223 else if (!strncmp("Increment:", fn, 10)) {
224 if (!T.incr_var) T.incr_var = NewMap(IMS_incvar);
225 SetMapping(T.incr_var, cp);
227 else if (!strncmp("NthChild:", fn, 9)) T.nth_child = atoi(cp);
228 else if (!strncmp("Var:", fn, 4)) SetMapping(Variables, cp);
229 else if (!strncmp("Quit:", fn, 5)) T.quit = strdup(cp);
231 fprintf(stderr, "Unknown translation spec (skipping it): %s\n", fn);
236 /* ______________________________________________________________________ */
237 /* Store translation spec 't' in memory.
239 * Pointer to translation spec to remember.
240 * Line number where translation spec ends.
250 static Trans_t *last_t;
253 /* If context testing, check some details and set things up for later. */
255 /* See if the context specified is a regular expression.
256 * If so, compile the reg expr. It is assumed to be a regex if
257 * it contains a character other than what's allowed for GIs in the
258 * OSF sgml declaration (alphas, nums, '-', and '.').
260 for (do_regex=0,cp=t->context; *cp; cp++) {
261 if (!isalnum(*cp) && *cp != '-' && *cp != '.' && *cp != ' ') {
268 t->depth = MAX_DEPTH;
269 if (!(t->context_re=tpt_regcomp(t->context))) {
270 fprintf(stderr, "Regex error in Context: %s\n", t->context);
274 /* If there's only one item in context, it's the parent. Treat
275 * it specially, since it's faster to just check parent gi.
278 if (!strchr(cp, ' ')) {
279 t->parent = t->context;
283 /* Figure out depth of context string */
287 while (*cp && !IsWhite(*cp)) cp++; /* find end of gi */
288 while (*cp && IsWhite(*cp)) cp++; /* skip space */
294 /* Compile regular expressions for each attribute */
295 for (i=0; i<t->nattpairs; i++) {
296 /* Initially, name points to "name value". Split them... */
297 cp = t->attpair[i].name;
298 while (*cp && !IsWhite(*cp)) cp++; /* point past end of name */
299 if (*cp) { /* value found */
300 *cp++ = EOS; /* terminate name */
301 while (*cp && IsWhite(*cp)) cp++; /* point to value */
302 ExpandVariables(cp, buf, 0); /* expand any variables */
303 t->attpair[i].val = strdup(buf);
305 else { /* value not found */
306 t->attpair[i].val = ".";
308 if (!(t->attpair[i].rex=tpt_regcomp(t->attpair[i].val))) {
309 fprintf(stderr, "Regex error in AttValue: %s %s\n",
310 t->attpair[i].name, t->attpair[i].val);
314 /* Compile regular expression for content */
317 ExpandVariables(t->content, buf, 0);
318 if (!(t->content_re=tpt_regcomp(buf)))
319 fprintf(stderr, "Regex error in Content: %s\n",
323 /* If multiple GIs, break up into a vector, then remember it. We either
324 * sture the individual, or the list - not both. */
325 if (t->gi && strchr(t->gi, ' ')) {
326 t->gilist = Split(t->gi, 0, S_ALVEC);
330 /* Now, store structure in linked list. */
332 Malloc(1, TrSpecs, Trans_t);
336 Malloc(1, last_t->next, Trans_t);
337 last_t = last_t->next;
343 /* ______________________________________________________________________ */
344 /* Read mapping file, filling in structure slots (just name-value pairs).
346 * Name of character mapping file.
355 char buf[LINESIZE], *name, *val;
357 int n_alloc=0; /* number of slots allocated so far */
359 if ((fp=OpenFile(filename)) == NULL) {
360 fprintf(stderr, "Can not open character mapping file '%s'.\n%s\n",
361 filename, strerror(errno));
365 /* We allocate slots in blocks of N, so we don't have to call
366 * malloc so many times. */
368 Calloc(n_alloc, CharMap, Mapping_t);
371 while (fgets(buf, LINESIZE, fp))
374 /* skip comment and blank lines */
375 if (buf[0] == '#' || buf[0] == NL) continue;
378 /* Need more slots for mapping structures? */
379 if (nCharMap >= n_alloc) {
381 Realloc(n_alloc, CharMap, Mapping_t);
384 while (*val && !IsWhite(*val)) val++; /* point past end of name */
386 *val++ = EOS; /* terminate name */
387 while (*val && IsWhite(*val)) val++; /* point to value */
390 CharMap[nCharMap].name = strdup(name);
391 if (val) CharMap[nCharMap].sval = strdup(val);
392 if (CharMap[nCharMap].name[0] == '\\') CharMap[nCharMap].name++;
399 /* ______________________________________________________________________ */
400 /* Read SDATA mapping file, remembering the mappings in memory.
401 * Input file format is 2 columns, name and value, separated by one or
402 * more tabs (not spaces).
403 * This can be called multuple times, reading several files.
405 * Name of SDATA entity mapping file.
414 char buf[LINESIZE], *name, *val;
417 if ((fp=OpenFile(filename)) == NULL) {
418 fprintf(stderr, "Can not open SDATA file '%s': %s", filename,
423 if (!SDATAmap) SDATAmap = NewMap(IMS_sdata);
425 while (fgets(buf, LINESIZE, fp))
428 /* skip comment and blank lines */
429 if (buf[0] == '#' || buf[0] == NL) continue;
433 while (*val && *val != TAB) val++; /* point past end of name */
435 *val++ = EOS; /* terminate name */
436 while (*val && *val == TAB) val++; /* point to value */
439 SetMappingNV(SDATAmap, name, val);
444 /* ______________________________________________________________________ */