2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $TOG: main.c /main/6 1997/12/23 12:08:23 bill $ */
25 * Copyright 1993 Open Software Foundation, Inc., Cambridge, Massachusetts.
26 * All rights reserved.
30 * Open Software Foundation, Inc.
32 * Permission is hereby granted to use, copy, modify and freely distribute
33 * the software in this file and its documentation for any purpose without
34 * fee, provided that the above copyright notice appears in all copies and
35 * that both the copyright notice and this permission notice appear in
36 * supporting documentation. Further, provided that the name of Open
37 * Software Foundation, Inc. ("OSF") not be used in advertising or
38 * publicity pertaining to distribution of the software without prior
39 * written permission from OSF. OSF makes no representations about the
40 * suitability of this software for any purpose. It is provided "as is"
41 * without express or implied warranty.
44 * Copyright (c) 1996 X Consortium
45 * Copyright (c) 1995, 1996 Dalrymple Consulting
47 * Permission is hereby granted, free of charge, to any person obtaining a copy
48 * of this software and associated documentation files (the "Software"), to deal
49 * in the Software without restriction, including without limitation the rights
50 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
51 * copies of the Software, and to permit persons to whom the Software is
52 * furnished to do so, subject to the following conditions:
54 * The above copyright notice and this permission notice shall be included in
55 * all copies or substantial portions of the Software.
57 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
58 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
59 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
60 * X CONSORTIUM OR DALRYMPLE CONSULTING BE LIABLE FOR ANY CLAIM, DAMAGES OR
61 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
62 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
63 * OTHER DEALINGS IN THE SOFTWARE.
65 * Except as contained in this notice, the names of the X Consortium and
66 * Dalrymple Consulting shall not be used in advertising or otherwise to
67 * promote the sale, use or other dealings in this Software without prior
68 * written authorization.
70 /* ________________________________________________________________________
72 * Program to read an SGML document instance, creating any of several things:
74 * "translated" output for formatting applications (given a trans. spec)
75 * validation report (given a appropriate trans spec)
76 * tree of the document's structure
77 * statistics about the element usage
78 * summary of the elements used
79 * context of each element used
82 * A C structure is created for each element, which includes:
83 * name, attributes, parent, children, content
84 * The tree is descended, and the desired actions performed.
86 * Takes input from James Clark's "sgmls" program (v. 1.1).
87 * ________________________________________________________________________
96 #include <sys/types.h>
104 static int do_context, do_tree, do_summ, do_stats, do_validate, do_idlist;
105 static int do_DATAhack = 0;
106 static char *this_prog;
107 static char *in_file, *out_file;
108 static char *tranfile, *cmapfile, *sdatafile;
109 static char *start_id;
110 static char *last_file;
111 static int last_lineno;
113 extern int BOFTTextThresh;
115 /* forward references */
116 static void HandleArgs(int, char *[]);
117 static void Initialize1();
118 static void Initialize2();
119 static void ReadInstance(char *);
120 static void DoHelpMessage();
121 extern void Browse();
123 /* ______________________________________________________________________ */
124 /* Program entry point. Look at args, read instance, dispatch to the
125 * correct routines to do the work, and finish.
138 ReadInstance(in_file);
141 Browse(); /* this will handle interactive commands */
144 /* Perform tasks based on command line flags... */
147 /* If user wants to start at a particular ID, point to that
148 * element. Else, point to the top of the tree. */
150 if (!(e=FindElemByID(start_id))) {
151 fprintf(stderr, "Error: Can not find element with ID %s\n",
157 if (sdatafile) ReadSDATA(sdatafile);
158 if (cmapfile) ReadCharMap(cmapfile);
159 /* If we're doing validation, make output file pointer null.
160 * This means that we generate no output, except error messages. */
161 if (do_validate) outfp = NULL;
162 DoTranslate(e, tranfile, outfp);
164 if (do_summ) PrintElemSummary(DocTree);
165 if (do_tree) PrintElemTree(DocTree);
166 if (do_stats) PrintStats(DocTree);
167 if (do_context) PrintContext(DocTree);
168 if (do_idlist) PrintIDList();
170 if (out_file && outfp) fclose(outfp);
175 /* ______________________________________________________________________ */
176 /* Initialization stuff done before dealing with args.
178 * Name of program (string).
190 /* where we try to find data/library files */
191 if (!(tpt_lib=getenv(TPT_LIB))) tpt_lib = DEF_TPT_LIB;
193 /* set some global variables */
198 /* setup global variable mapping */
199 Variables = NewMap(IMS_variables);
201 /* set some pre-defined variables */
202 SetMappingNV(Variables, "user", (cp=getenv("USER")) ? cp : "UnknownUser" );
204 nowtm = localtime(&tnow);
205 strftime(buf, 100, "%a %d %b %Y, %R", nowtm);
206 SetMappingNV(Variables, "date", buf);
207 SetMappingNV(Variables, "host", "unknown-host");
208 SetMappingNV(Variables, "transpec", tranfile ? tranfile : "??");
211 /* Initialization stuff done after dealing with args. */
216 SetMappingNV(Variables, "transpec", tranfile ? tranfile : "??");
218 /* If user wants to send output to a file, open the file, and set
219 * the file pointer. Else we send output to standard out. */
221 if (!(outfp = fopen(out_file, "w"))) {
222 fprintf(stderr, "Could not open output '%s' file for writing.\n%s",
223 out_file, strerror(errno));
230 /* ______________________________________________________________________ */
231 /* Set a variable. If it is one of the "known" variables, set the
232 * variable in the C code (this program).
234 * Variable name/value string - separated by an '=' (eg, "myname=Sally").
241 char *cp, buf[100], **tok;
244 /* Turn '=' into a space, to isolate the name. Then set variable. */
246 if ((cp=strchr(buf, '='))) {
247 /* we have "var=value" */
250 tok = Split(buf, &n, 0);
251 /* see if variable name matches one of our internal ones */
252 if (!strcmp(tok[0], "verbose")) verbose = atoi(tok[1]);
253 else if (!strcmp(tok[0], "warnings")) warnings = atoi(tok[1]);
254 else if (!strcmp(tok[0], "foldcase")) fold_case = atoi(tok[1]);
255 else SetMappingNV(Variables, tok[0], tok[1]);
258 fprintf(stderr, "Expected an '=' in variable assignment: %s. Ignored\n",
263 /* ______________________________________________________________________ */
264 /* Bounce through arguments, setting variables and flags.
266 * Argc and Argv, as passed to main().
278 while ((c=getopt(ac, av, "df:t:vc:s:o:huSxIl:bHVWi:D:Z")) != EOF) {
280 case 't': tranfile = optarg; break;
281 case 'v': do_validate = 1; break;
282 case 's': sdatafile = optarg; break;
283 case 'c': cmapfile = optarg; break;
284 case 'h': do_tree = 1; break;
285 case 'u': do_summ = 1; break;
286 case 'S': do_stats = 1; break;
287 case 'x': do_context = 1; break;
288 case 'I': do_idlist = 1; break;
289 case 'l': tpt_lib = optarg; break;
290 case 'i': start_id = optarg; break;
291 case 'o': out_file = optarg; break;
292 case 'd': do_DATAhack = 1; break;
293 case 'f': BOFTTextThresh = atoi(optarg); break;
294 case 'b': interactive = 1; break;
295 case 'W': warnings = 0; break;
296 case 'V': verbose = 1; break;
297 case 'Z': slave = 1; break;
298 case 'H': DoHelpMessage(); exit(0); break;
299 case 'D': CmdLineSetVariable(optarg); break;
300 case '?': errflag = 1; break;
303 fprintf(stderr, "Try '%s -H' for help.\n", this_prog);
308 /* input (ESIS) file name */
309 if (optind < ac) in_file = av[optind];
311 /* If doing interactive/browsing, we can't take ESIS from stdin. */
312 if (interactive && !in_file) {
314 "You must specify ESIS file on cmd line for browser mode.\n");
319 /* ______________________________________________________________________ */
320 /* Simply print out a help/usage message.
323 static char *help_msg[] = {
325 " -t file Print translated output using translation spec in <file>",
326 " -s file <file> contains a list of SDATA entity mappings",
327 " -c file <file> contains a list of character mappings",
328 " -v Validate using translation spec specified with -t",
329 " -i id Consider only subtree starting at element with ID <id>",
330 " -b Interactive browser",
331 " -S Print statistics (how often elements occur, etc.)",
332 " -u Print element usage summary (# of children, depth, etc.)",
333 " -x Print context of each element",
334 " -h Print document hierarchy as a tree",
335 " -o file Write output to <file>. Default is standard output.",
336 " -l dir Set library directory to <dir>. (or env. variable TPT_LIB)",
337 " -I List all IDs used in the instance",
338 " -W Do not print warning messages",
339 " -H Print this help message",
340 " -Dvar=val Set variable 'var' to value 'val'",
341 " file Take input from named file. If not specified, assume stdin.",
342 " File should be output from the 'sgmls' program (ESIS).",
350 printf("usage: %s [option ...] [file]", this_prog);
351 while (*s) puts(*s++);
355 /* ______________________________________________________________________ */
356 /* Remember an external entity for future reference.
358 * Pointer to entity structure to remember.
366 static Entity_t *last_ent;
369 Malloc(1, Entities, Entity_t);
373 Malloc(1, last_ent->next, Entity_t);
374 last_ent = last_ent->next;
380 /* Find an entity, given its entity name.
382 * Name of entity to retrieve.
390 for (n=Entities; n; n=n->next)
391 if (StrEq(ename, n->ename)) return n;
395 /* Accumulate lines up to the open tag. Attributes, line number,
396 * entity info, notation info, etc., all come before the open tag.
403 char buf[LINESIZE+1];
412 static Element_t *last_e;
415 Calloc(1, e, Element_t);
416 memset(&ent, 0, sizeof ent); /* clean space for entity info */
418 /* Also, keep a linked list of elements, so we can easily scan through */
419 if (last_e) last_e->next = e;
422 e->index = Index++; /* just a unique number for identification */
424 /* in case these are not set for this element in the ESIS */
425 e->lineno = last_lineno;
426 e->infile = last_file;
430 if ((c = getc(fp)) == EOF) break;
431 fgets(buf, LINESIZE, fp);
434 case EOF: /* End of input */
435 fprintf(stderr, "Error: Unexpectedly reached end of ESIS.\n");
439 case CMD_OPEN: /* (gi */
440 e->gi = AddElemName(buf);
442 Malloc(na, e->atts, Mapping_t);
443 memcpy(e->atts, a, na*sizeof(Mapping_t));
447 /* Check if this elem has a notation attr. If yes, and there
448 is no notation specified, recall the previous one. (feature
449 of sgmls - it does not repeat notation stuff if we the same
450 is used twice in a row) */
451 if (((atval=FindAttValByName(e, "NAME")) ||
452 (atval=FindAttValByName(e, "ENTITYREF")) ||
453 (atval=FindAttValByName(e, "EXTERNAL"))) && /* HACK */
454 (ent2=FindEntity(atval))) {
461 case CMD_ATT: /* Aname val */
463 tok = Split(buf, &i, 0);
464 if (!strcmp(tok[1], "IMPLIED")) break; /* skip IMPLIED atts. */
465 if (!strcmp(tok[1], "CDATA") || !strcmp(tok[1], "TOKEN") ||
466 !strcmp(tok[1], "ENTITY") ||!strcmp(tok[1], "NOTATION"))
468 a[na].name = AddAttName(tok[0]);
469 a[na].sval = AddAttName(tok[2]);
473 fprintf(stderr, "Error: Bad attr line (%d): A%s %s...\n",
474 e->lineno, tok[0], tok[1]);
478 case CMD_LINE: /* Llineno */
479 /* These lines come in 2 forms: "L123" and "L123 file.sgml".
480 * Filename is given only at 1st occurance. Remember it.
482 if ((cp = strchr(buf, ' '))) {
484 last_file = strdup(cp);
486 last_lineno = e->lineno = atoi(buf);
487 e->infile = last_file;
490 case CMD_DATA: /* -data */
491 fprintf(stderr, "Error: Data in AccumElemInfo, line %d:\n%c%s\n",
497 case CMD_D_ATT: /* Dename name val */
499 case CMD_NOTATION: /* Nnname */
500 case CMD_PI: /* ?pi */
501 /* This should be reworked soon, as it
502 forces all PI's before the first GI
503 to be ignored. -CSS */
506 case CMD_EXT_ENT: /* Eename typ nname */
508 tok = Split(buf, &i, 0);
509 ent.ename = strdup(tok[0]);
510 ent.type = strdup(tok[1]);
511 ent.nname = strdup(tok[2]);
514 case CMD_INT_ENT: /* Iename typ text */
515 fprintf(stderr, "Error: Got CMD_INT_ENT in AccumElemInfo: %s\n", buf);
517 case CMD_SYSID: /* ssysid */
518 ent.sysid = strdup(buf);
520 case CMD_PUBID: /* ppubid */
521 ent.pubid = strdup(buf);
523 case CMD_FILENAME: /* ffilename */
524 ent.fname = strdup(buf);
527 case CMD_CLOSE: /* )gi */
528 case CMD_SUBDOC: /* Sename */
529 case CMD_SUBDOC_S: /* {ename */
530 case CMD_SUBDOC_E: /* }ename */
531 case CMD_EXT_REF: /* &name */
532 case CMD_APPINFO: /* #text */
533 case CMD_CONFORM: /* C */
535 fprintf(stderr, "Error: Unexpected input in AccumElemInfo, %d:\n%c%s\n",
541 fprintf(stderr, "Error: End of AccumElemInfo - should not be here: %s\n",
548 * Limitation? Max 50000 children per node. (done for efficiency --
549 * should do some malloc and bookkeeping games later).
560 int curnum = 10; /* current # of elements allocated */
564 Malloc( curnum, cont, Content_t );
565 Malloc( LINESIZE+1, buf, char );
567 /* Read input stream - the output of "sgmls", called "ESIS". */
568 e = AccumElemInfo(fp);
573 if (ncont >= curnum) { /* may need more */
575 Realloc(curnum, cont, Content_t);
578 if ((c = getc(fp)) == EOF) break;
580 case EOF: /* End of input */
583 case CMD_DATA: /* -data */
584 fgets(buf, LINESIZE, fp);
586 if (do_DATAhack && (buf[0] == '\\') && (buf[1] == 'n') ) {
587 buf[0] = -1; /* simulate "^" command */
588 memcpy(&buf[1], &buf[2], strlen(buf)-1);
590 cont[ncont].ch.data = strdup(buf);
591 cont[ncont].type = CMD_DATA;
595 case CMD_PI: /* ?pi */
596 fgets(buf, LINESIZE, fp);
598 cont[ncont].type = CMD_PI;
599 cont[ncont].ch.data = strdup(buf);
603 case CMD_CLOSE: /* )gi */
604 fgets(buf, LINESIZE, fp);
608 Malloc(ncont, e->cont, Content_t);
609 for (i=0; i<ncont; i++) e->cont[i] = cont[i];
616 case CMD_OPEN: /* (gi */
617 /*fprintf(stderr, "+++++ OPEN +++\n");*/
620 case CMD_ATT: /* Aname val */
621 case CMD_D_ATT: /* Dename name val */
622 case CMD_NOTATION: /* Nnname */
623 case CMD_EXT_ENT: /* Eename typ nname */
624 case CMD_INT_ENT: /* Iename typ text */
625 case CMD_SYSID: /* ssysid */
626 case CMD_PUBID: /* ppubid */
627 case CMD_FILENAME: /* ffilename */
629 cont[ncont].ch.elem = ReadESIS(fp, depth+1);
630 cont[ncont].type = CMD_OPEN;
631 cont[ncont].ch.elem->parent = e;
635 case CMD_LINE: /* Llineno */
636 fgets(buf, LINESIZE, fp);
637 break; /* ignore these here */
639 case CMD_SUBDOC: /* Sename */
640 case CMD_SUBDOC_S: /* {ename */
641 case CMD_SUBDOC_E: /* }ename */
642 case CMD_EXT_REF: /* &name */
643 case CMD_APPINFO: /* #text */
644 case CMD_CONFORM: /* C */
646 fgets(buf, LINESIZE, fp);
647 fprintf(stderr, "Error: Unexpected input at %d: '%c%s'\n",
653 fprintf(stderr, "Error: End of ReadESIS - should not be here: %s\n", e->gi);
659 /* ______________________________________________________________________ */
660 /* Read input stream, creating a tree in memory of the elements and data.
662 * Filename where instance's ESIS is.
674 if (filename) { /* if we specified input file. else stdin */
675 if ((fp=fopen(filename, "r")) == NULL) {
681 last_file = filename;
682 DocTree = ReadESIS(fp, 0);
683 if (filename) fclose(fp);
685 /* Traverse tree, filling in econt and figuring out which child
686 * (ie. what birth order) each element is. */
687 DocTree->my_eorder = -1;
688 for (e=DocTree; e; e=e->next) {
690 /* count element children */
691 for (i=0,n=0; i<e->ncont; i++) if (IsContElem(e,i)) n++;
692 if (n > 0) Calloc(n, e->econt, Element_t *);
693 for (i=0; i<e->ncont; i++)
694 if (IsContElem(e,i)) e->econt[e->necont++] = ContElem(e,i);
696 /* count data children */
697 for (i=0,n=0; i<e->ncont; i++) if (IsContData(e,i)) n++;
698 if (n > 0) Calloc(n, e->dcont, char *);
699 for (i=0; i<e->ncont; i++)
700 if (IsContData(e,i)) e->dcont[e->ndcont++] = ContData(e,i);
702 /* where in child order order */
703 for (i=0; i<e->necont; i++)
704 e->econt[i]->my_eorder = i;
706 /* Does this element have an ID? */
707 for (i=0; i<e->natts; i++) {
708 if ((idatt=FindAttValByName(e, "ID"))) {
710 /* remember ID value for quick reference */
719 /* ______________________________________________________________________ */