cde/programs/dthelp/parser/pass1/parser/scan.c

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these librararies and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: scan.c /main/3 1995/11/08 10:24:54 rswiston $ */
  24 /*
  25               Copyright 1986 Tandem Computers Incorporated.
  26 This product and information is proprietary of Tandem Computers Incorporated.
  27                    Copyright 1986, 1987, 1988, 1989 Hewlett-Packard Co.
  28 */
  29
  30 /* Scan.c is the scanner for program PARSER */
  31
  32 #include <string.h>
  33 #include <malloc.h>
  34 #include <stdio.h>
  35 #if defined(MSDOS)
  36 #include <process.h>
  37 #endif
  38 #include "basic.h"
  39 #include "trie.h"
  40 #define M_CONDEF
  41 #include "context.h"
  42 #define M_DELIMDEF
  43 #include "delim.h"
  44 #define M_DTDDEF
  45 #include "dtd.h"
  46 #include "arc.h"
  47 #define M_PARDEF
  48 #include "parser.h"
  49 #define M_ENTDEF
  50 #include "entity2.h"
  51 #include "sref.h"
  52
  53 /* Actually read a character from an input stream */
  54 int m_actgetc(M_NOPAR)
  55   {
  56     int c ;
  57
  58     c = m_getc(m_sysent[m_sysecnt]) ;
  59     m_saveline[m_svlncnt[m_sysecnt]][m_sysecnt] = c ;
  60     if (++m_svlncnt[m_sysecnt] >= M_LINELENGTH) {
  61       m_svlncnt[m_sysecnt] = 0 ;
  62       m_svlnwrap[m_sysecnt] = TRUE ;
  63       }
  64     return(c) ;
  65     }
  66
  67 /* Expand an entity reference */
  68 void m_entexpand(openent)
  69   M_ENTITY *openent ;
  70   {
  71     M_WCHAR *p ;
  72     M_HOLDTYPE dchar ;
  73     char buffer[10] ;
  74     int i ;
  75
  76     m_ungetachar(M_NULLVAL, M_EE, FALSE) ;
  77     m_eopencnt++ ;
  78     m_opene[m_eopencnt - 1] = openent ;
  79
  80     if (m_stacktop->element &&
  81         m_element[m_stacktop->element - 1].content == M_RCDATA)
  82       m_curcon = RCNEWENT ;
  83     if (m_curcon == LITCON || m_curcon == LITACON)
  84       m_curcon = ENTINLIT ;
  85     if (! openent->wheredef) {
  86       m_eopencnt-- ;
  87       m_err1("%s: System error -- no definition for predeclared entity",
  88              openent->name) ;
  89       m_eopencnt++ ;
  90       return ;
  91       }
  92     if (m_curcon == ENTINLIT)
  93       if (openent->type != M_GENERAL) {
  94         m_eopencnt-- ;
  95         m_err1("%s: Typed entity not allowed in parameter value",
  96                openent->name) ;
  97         m_eopencnt++ ;
  98         return ;
  99         }
 100     if (m_eopencnt > M_ENTLVL) {
 101       m_eopencnt-- ;
 102       m_err1("%s: Too many nested entities", openent->name) ;
 103       m_eopencnt++ ;
 104       return ;
 105       }
 106     for (i = 0 ; i < m_eopencnt - 1; i++)
 107       if (m_opene[i] == openent) {
 108         m_eopencnt-- ;
 109         m_err1("Recursive call to entity %s ignored", openent->name) ;
 110         m_eopencnt++ ;
 111         return ;
 112         }
 113
 114     /* If SDATA or PI entity (regular or CODE) at beginning of document
 115        instance, call m_startdoc and reset m_curcon past preamble */
 116     if (m_curcon == PREAMBLE &&
 117         (openent->type == M_SDATA ||
 118          openent->type == M_CODESDATA ||
 119          openent->type == M_PI ||
 120          openent->type == M_CODEPI)) {
 121       m_startdoc() ;
 122       m_curcon = START ;
 123       m_adjuststate() ;
 124       }
 125
 126     /* SDATA entity */
 127       if (openent->type == M_SDATA || openent->type == M_CODESDATA) {
 128         if (! m_stacktop->intext) {
 129           if (! m_strtproc(M_NULLVAL)) {
 130             if (m_stacktop->oldtop)
 131               m_err1("SDATA entity not allowed at this point in %s",
 132                      m_nameofelt(m_stacktop->element)) ;
 133             else if (! m_start)
 134               m_error("Document may not start with SDATA entity") ;
 135             }
 136           m_start = TRUE ;
 137           m_stacktop->firstre = TRUE ;
 138           m_stacktop->intext = TRUE ;
 139           if (m_curcon == ELCON || m_curcon == DATACON)
 140             m_curcon = POUNDCDATA ;
 141           else if (m_curcon == NETELCON || m_curcon == NETDATACON)
 142             m_curcon = NETCDATA ;
 143           }
 144         m_stacktop->linestat = M_DCORCET ;
 145         m_holdproc() ;
 146         }
 147
 148     /* CODE entity */
 149     if (openent->type == M_CODEPI || openent->type == M_CODESDATA) {
 150       if (openent->type == M_CODEPI)
 151         m_stacktop->linestat = M_SOMETHING ;
 152       m_codeent(openent->codeindex) ;
 153       return ;
 154       }
 155
 156     /* PI or SDATA, but not CODE entity */
 157     if (openent->type == M_PI || openent->type == M_SDATA) {
 158       m_piaction(openent->content, openent->name, openent->type) ;
 159       return ;
 160       }
 161
 162     /* Subordinate data file */
 163     if (openent->type == M_SYSTEM) {
 164       m_sysent[m_sysecnt + 1] = m_openent(openent->content) ;
 165       if (m_sysent[m_sysecnt + 1]) {
 166         m_sysecnt++ ;
 167         m_line[m_sysecnt] = 1 ;
 168         m_svlncnt[m_sysecnt] = 0 ;
 169         m_svlnwrap[m_sysecnt] = FALSE ;
 170         if (m_chtrace) {
 171           m_trace("Opening `") ;
 172           m_wctrace(openent->content) ;
 173           m_trace("'(") ;
 174           sprintf(buffer, "%d", m_sysecnt) ;
 175           m_trace(buffer) ;
 176           m_trace(")\n") ;
 177           }
 178         return ;
 179         }
 180       m_eopencnt-- ;
 181       m_err1("Unable to open file %s", openent->content) ;
 182       m_eopencnt++ ;
 183       return ;
 184       }
 185
 186     /* An entity reference has been encountered.  Put the content of the
 187        entity, including any leading or trailing delimiters into the input
 188        stream in reverse order */
 189     /* Closing delimiter */
 190     switch (openent->type) {
 191       case M_STARTTAG:
 192       case M_ENDTAG: {
 193         m_undodelim(m_dlmptr[M_TAGC - 1], FALSE) ;
 194         break ;
 195         }
 196       case M_MD: {
 197         m_undodelim(m_dlmptr[M_MDC - 1], FALSE) ;
 198         break ;
 199         }
 200       default:
 201         break ;
 202       }
 203     /* Content of entity -- scan for end to reverse string */
 204     if (openent->type == M_CDATAENT) dchar = M_CDCHAR ;
 205     else dchar = M_ENTNORMAL ;
 206     if (p = openent->content)
 207       while (*p) p++;
 208     if (p != openent->content) {
 209       p-- ;
 210       while (TRUE) {
 211         m_ungetachar((int) *p, dchar, FALSE) ;
 212         if (p == openent->content) break ;
 213         p-- ;
 214         }
 215       }
 216     /* Opening delimiter */
 217     switch (openent->type) {
 218       case M_STARTTAG: {
 219         m_undodelim(m_dlmptr[M_STAGO - 1], FALSE) ;
 220         break ;
 221         }
 222       case M_ENDTAG: {
 223         m_undodelim(m_dlmptr[M_ETAGO - 1], FALSE) ;
 224         break ;
 225         }
 226       case M_MD: {
 227         m_undodelim(m_dlmptr[M_MDO - 1], FALSE) ;
 228         break ;
 229         }
 230       default:
 231         break ;
 232       }
 233     } /* End m_entexpand */
 234
 235 /* An srlen-character long short-reference delimiter has been found.  Verify
 236    that it is not the prefix of a general delimiter recognized in context*/
 237 LOGICAL m_gendelim(srlen, context)
 238   int srlen ;
 239   int context ;
 240   {
 241     int ghold[MAXD + 1] ;
 242     int ucase ;
 243     int next ;
 244     int i, n = 0, current, delim[MAXD + 1], oldchars = 0 ;
 245     int newcharstart = 0 ;
 246     M_HOLDTYPE dhold[MAXD + 1], dchar ;
 247     LOGICAL linestart ;
 248     LOGICAL found ;
 249
 250     if (! (current = m_contree[context - 1])) return(FALSE) ;
 251     linestart = TRUE ;
 252     for (i = 0 ; i <= srlen ; i++)
 253       if (m_srefchartype[i] != M_RSCHAR && m_srefchartype[i] != M_WSCHAR) {
 254         linestart = FALSE ;
 255         break ;
 256       }
 257     if (linestart) return(FALSE) ;
 258
 259     current-- ;
 260     while (TRUE) {
 261       delim[n] = FALSE ;
 262       while (oldchars <= srlen &&
 263              (m_srefchartype[oldchars] == M_RSCHAR ||
 264               m_srefchartype[oldchars] == M_WSCHAR))
 265         oldchars++ ;
 266       if (oldchars <= srlen)
 267         ucase = m_hold[oldchars++] ;
 268       else {
 269         if (! newcharstart) newcharstart = n ;
 270         ghold[n] = m_getachar(&dhold[n]) ;
 271         ucase = m_ctupper(ghold[n]) ;
 272         if (dhold[n] != M_NORMAL && dhold[n] != M_ENTNORMAL) break ;
 273         }
 274       for (i = current ;
 275            (int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more ;
 276            i++) ;
 277       if ((int) m_delimtrie[i].symbol == ucase) {
 278         current = m_delimtrie[i].index ;
 279         if (! m_delimtrie[current].symbol)
 280           delim[n] = m_delimtrie[current].index ;
 281         n++ ;
 282         }
 283       else break ;
 284       }
 285
 286     if (! newcharstart) return(FALSE) ;
 287     while (n >= newcharstart - 1) {
 288       found = FALSE ;
 289       if (delim[n]) {
 290         /* Found a delimiter. If it ends with a letter, verify
 291            that the following character is not a letter, in order
 292            to issue error messages in cases such as <!ENTITYrunon ... */
 293         if (m_cttype(ghold[n]) != M_NMSTART) found = TRUE ;
 294         else {
 295           next = m_getachar(&dchar) ;
 296           m_ungetachar(next, dchar, TRUE) ;
 297           if (next == EOF || m_cttype(next) != M_NMSTART)
 298             found = TRUE ;
 299           }
 300         }
 301       if (found) {
 302         if (delim[n] == M_ERO || delim[n] == M_STAGO ||
 303             delim[n] == M_ETAGO) {
 304           next = m_getachar(&dchar) ;
 305           m_ungetachar(next, dchar, TRUE) ;
 306           if (! (m_cttype(next) == M_NMSTART &&
 307                  (dchar == M_NORMAL || dchar == M_ENTNORMAL))) {
 308             n-- ;
 309             continue ;
 310             }
 311           }
 312         while (n >= newcharstart) {
 313           m_ungetachar(ghold[n], dhold[n], TRUE) ;
 314           n-- ;
 315           }
 316         return(TRUE) ;
 317         } /* End if delim[n] */
 318       if (n >= newcharstart) m_ungetachar(ghold[n], dhold[n], TRUE) ;
 319       n-- ;
 320       }
 321
 322     return(FALSE) ;
 323     }
 324
 325 /* Reads next input character from the current source file or from an
 326    entity expansion */
 327 int m_getachar(dchar)
 328   M_HOLDTYPE *dchar ;
 329   {
 330     int c ;
 331     int i ;
 332     char buffer[10] ;
 333     int length;
 334     M_WCHAR wc_ee, wc_re;
 335     char    mb_ee, mb_re;
 336
 337     mb_ee = M_EE;
 338     mb_re = M_RE;
 339     mbtowc(&wc_ee, &mb_ee, 1);
 340     mbtowc(&wc_re, &mb_re, 1);
 341     if (m_toundo && m_sysecnt <= m_sourcefile[m_toundo - 1]) {
 342       c = m_savechar[--m_toundo] ;
 343       *dchar = m_savedchar[m_toundo] ;
 344       if (*dchar == wc_ee) m_atrs = (M_WCHAR) c;
 345       }
 346     else {
 347       c = m_actgetc() ;
 348       *dchar = M_NORMAL ;
 349       if (m_whitespace((M_WCHAR) c) && c != wc_re) {
 350         /* White space, but not RE, i.e., space or tab */
 351         for (m_wscount = 0 ; m_wscount < M_WSPACELEN ; m_wscount++) {
 352           m_wspace[m_wscount] = m_actgetc() ;
 353           if (! m_whitespace((M_WCHAR) m_wspace[m_wscount]) ||
 354               m_wspace[m_wscount] == wc_re)
 355             break ;
 356           }
 357         if (m_whitespace((M_WCHAR) m_wspace[m_wscount]) &&
 358             m_wspace[m_wscount] != wc_re) {
 359           m_error("Ignoring blank or tab") ;
 360           while (m_whitespace((M_WCHAR) m_wspace[m_wscount]) &&
 361                  m_wspace[m_wscount] != wc_re)
 362             m_wspace[m_wscount] = m_actgetc() ;
 363           }
 364         if (m_wscount > m_maxws) m_maxws = m_wscount ;
 365         if (m_wspace[m_wscount] == wc_re) c = wc_re ;
 366         else {
 367           for (i = 0 ; i <= m_wscount ; i++)
 368             m_ungetachar(m_wspace[m_wscount - i], M_NORMAL, FALSE) ;
 369           }
 370         } /* End just read a blank or tab, is it line-trailing? */
 371       } /* End read a character from file */
 372
 373     m_oldlsindex = (m_oldlsindex + 1) % M_SAVECHAR ;
 374     m_oldlinestat[m_oldlsindex] = m_stacktop->linestat ;
 375     m_oldatrs[m_oldlsindex] = m_atrs ;
 376     if (c == wc_re && *dchar) {
 377       if (*dchar == M_NORMAL) m_line[m_sysecnt]++ ;
 378       m_stacktop->linestat = M_NOTHING ;
 379       m_atrs = TRUE ;
 380       }
 381     else if (*dchar) m_atrs = FALSE ;
 382     if (m_chtrace) {
 383       if (*dchar) {
 384         m_trace("get(") ;
 385         length = wctomb(buffer, c);
 386         buffer[length] = 0;
 387         m_trace(buffer) ;
 388         m_trace(")[") ;
 389         sprintf(buffer, "%d", c) ;
 390         m_trace(buffer) ;
 391         m_trace("],") ;
 392         sprintf(buffer, "%d", *dchar) ;
 393         m_trace(buffer) ;
 394         m_trace("\n") ;
 395         }
 396       else m_trace("get(EE)\n") ;
 397       }
 398     return(c) ;
 399     }
 400
 401 /* Reads a name token */
 402 #if defined(M_PROTO)
 403 void m_getname(M_WCHAR first)
 404 #else
 405 void m_getname(first)
 406   M_WCHAR first ;
 407 #endif
 408 {
 409     M_WCHAR *p ;
 410     M_HOLDTYPE dchar ;
 411     int c ;
 412
 413     *(p = m_name) = first ;
 414     while (TRUE) {
 415       c = m_getachar(&dchar) ;
 416       if (c == EOF) break ;
 417       if (dchar != M_NORMAL && dchar != M_ENTNORMAL) break ;
 418       if (m_cttype(c) == M_NONNAME) break ;
 419       *++p = (M_WCHAR) c ;
 420       if (p >= m_name + M_NAMELEN) {
 421         p-- ;
 422         m_error("Name too long") ;
 423         while ((dchar == M_NORMAL || dchar == M_ENTNORMAL) &&
 424                c != EOF &&
 425                m_cttype(c) != M_NONNAME)
 426           c = m_getachar(&dchar) ;
 427         break ;
 428         }
 429       }
 430     m_ungetachar(c, dchar, TRUE) ;
 431     *++p = M_EOS ;
 432     }
 433
 434 /* Reads the next token */
 435 int m_gettoken(c, dchar, context)
 436   int *c ;
 437   M_HOLDTYPE *dchar ;
 438   int context ;
 439   {
 440     int hold[MAXD + 1], next ;
 441     int ucase ;
 442     int i, n = 0, current, delim[MAXD + 1], nexttoken ;
 443     M_HOLDTYPE dhold[MAXD + 1] ;
 444     LOGICAL found ;
 445
 446     switch (context) {
 447       case DATACON:
 448       case NETDATACON:
 449       case POUNDCDATA:
 450       case NETCDATA:
 451       case ELCON:
 452       case NETELCON:
 453         if (m_stacktop->oldtop) m_shortref(context) ;
 454         break ;
 455       default:
 456         break ;
 457       }
 458     if (! (current = m_contree[context - 1])) {
 459       *c = m_getachar(dchar) ;
 460       return(M_NULLVAL) ;
 461       }
 462     current-- ;
 463     while (TRUE) {
 464       hold[n] = m_getachar(&dhold[n]) ;
 465       ucase = m_ctupper(hold[n]) ;
 466       delim[n] = FALSE ;
 467       if (dhold[n] != M_NORMAL && dhold[n] != M_ENTNORMAL) break ;
 468       for (i = current ;
 469            (int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more ;
 470            i++) ;
 471       if ((int) m_delimtrie[i].symbol == ucase) {
 472         current = m_delimtrie[i].index ;
 473         if (! m_delimtrie[current].symbol)
 474           delim[n] = m_delimtrie[current].index ;
 475         n++ ;
 476         }
 477       else break ;
 478       }
 479
 480     while (n >= 0) {
 481       found = FALSE ;
 482       if (delim[n]) {
 483         /* Found a delimiter. If it ends with a letter, verify
 484            that the following character is not a letter, in order
 485            to issue error messages in cases such as <!ENTITYrunon ... */
 486         if (m_cttype(hold[n]) != M_NMSTART) found = TRUE ;
 487         else {
 488           *c = m_getachar(dchar) ;
 489           m_ungetachar(*c, *dchar, TRUE) ;
 490           if (*c == EOF || m_cttype(*c) != M_NMSTART) found = TRUE ;
 491           }
 492         }
 493       if (found) {
 494         if (delim[n] == M_CRO) {
 495           next = m_getachar(dchar) ;
 496           if ((*dchar != M_NORMAL && *dchar != M_ENTNORMAL) ||
 497               (m_cttype(next) != M_DIGIT))
 498             m_ungetachar(next, *dchar, TRUE) ;
 499           else {
 500             m_scanval = next - '0' ;
 501             while (TRUE) {
 502               next = m_getachar(dchar) ;
 503               if ((*dchar != M_NORMAL && *dchar != M_ENTNORMAL) ||
 504                   (m_cttype(next) != M_DIGIT)) {
 505                 m_ungetachar(next, *dchar, TRUE) ;
 506                 if (! m_gettoken(&next, dchar, ENTREF))
 507                   m_ungetachar(next, *dchar, TRUE) ;
 508                 if (context == ELCON || context == NETELCON)
 509                   return(M_BLACKSPACE) ;
 510                 else return(M_TEXT) ;
 511                 }
 512               m_scanval = 10 * m_scanval + next - '0' ;
 513               if (m_scanval >= M_CHARSETLEN) {
 514                 m_error("Invalid character code") ;
 515                 m_scanval = (m_scanval - next + '0') / 10 ;
 516                 m_ungetachar(next, *dchar, TRUE) ;
 517                 if (context == ELCON || context == NETELCON)
 518                   return(M_BLACKSPACE) ;
 519                 else return(M_TEXT) ;
 520                 }
 521               } /* End loop reading digits after M_CRO */
 522             } /* End M_CRO followed by digit */
 523           } /* End delim[n] == M_CRO */
 524         else if (delim[n] == M_ERO)
 525           if (m_vldentref())
 526             return(m_gettoken(c, dchar,
 527                             (m_curcon == RCNEWENT || m_curcon == ENTINLIT) ?
 528                               m_curcon : context)) ;
 529         /* Can be an M_ERO or M_CRO here only if not in context and hence
 530            should not be treated as a delimiter */
 531         if (delim[n] != M_STAGO && delim[n] != M_ETAGO &&
 532             delim[n] != M_ERO && delim[n] != M_CRO)
 533           return(delim[n]) ;
 534         /* M_STAGO and M_ETAGO recognized only if immediately followed by
 535            a M_NMSTART character or by an appropriate closing delimiter
 536            (latter is a short tag) */
 537         if (delim[n] == M_STAGO || delim[n] == M_ETAGO) {
 538           next = m_getachar(dchar) ;
 539           m_ungetachar(next, *dchar, TRUE) ;
 540           if (m_cttype(next) == M_NMSTART &&
 541               (*dchar == M_NORMAL || *dchar == M_ENTNORMAL))
 542             return(delim[n]) ;
 543           nexttoken = m_gettoken(&next, dchar,
 544             delim[n] == M_STAGO ? SELEMENT : EELEMENT) ;
 545           if (nexttoken) {
 546             m_undodelim(m_dlmptr[nexttoken - 1], TRUE) ;
 547             return(delim[n]) ;
 548             }
 549           else m_ungetachar(next, *dchar, TRUE) ;
 550           } /* End delim[n] is M_STAGO or M_ETAGO */
 551         } /* End if (delim[n]) */
 552       if (n) m_ungetachar(hold[n], dhold[n], TRUE) ;
 553       n-- ;
 554       }
 555
 556     *c = *hold ;
 557     *dchar = *dhold ;
 558     return(M_NULLVAL) ;
 559     }
 560
 561 /* Reads a literal */
 562 void m_litproc(delim)
 563   int delim ;
 564   {
 565     int n, i ;
 566     M_HOLDTYPE dchar ;
 567     int savecon = m_curcon ;
 568     int c ;
 569     int atentlev ;
 570     int atdelimcon ;
 571     char mb_re, mb_tab, mb_space, mb_null, mb_ee;
 572     M_WCHAR wc_re, wc_tab, wc_space, wc_null, wc_ee;
 573
 574     mb_re = M_RE;
 575     mb_tab = M_TAB;
 576     mb_space = M_SPACE;
 577     mb_null = M_NULLVAL;
 578     mb_ee = M_EE;
 579     mbtowc(&wc_re, &mb_re, 1);
 580     mbtowc(&wc_tab, &mb_tab, 1);
 581     mbtowc(&wc_space, &mb_space, 1);
 582     mbtowc(&wc_null, &mb_null, 1);
 583     mbtowc(&wc_ee, &mb_ee, 1);
 584
 585     m_curcon = delim == M_LIT ? LITCON : LITACON ;
 586     atentlev = m_eopencnt ;
 587     atdelimcon = m_curcon ;
 588     for (i = 0 ; i < M_LITLEN + 1 ; i++) {
 589       n = m_gettoken(&c, &dchar, m_curcon) ;
 590       switch (n) {
 591         case M_ENDFILE:
 592           m_ungetachar(c, dchar, TRUE) ;
 593           m_literal[i] = wc_null ;
 594           m_curcon = savecon ;
 595           return ;
 596         case M_TEXT:
 597           m_literal[i] = (M_WCHAR) m_scanval ;
 598           break ;
 599         case M_LIT:
 600         case M_LITA:
 601           m_literal[i] = wc_null ;
 602           m_curcon = savecon ;
 603           return ;
 604         case M_LITRS:
 605         case M_LITSCR:
 606           break ;
 607         case M_LITRE:
 608         case M_LITECR:
 609           m_literal[i] = wc_re ;
 610           break ;
 611         case M_LITSPACE:
 612         case M_LITCSPACE:
 613           m_literal[i] = wc_space ;
 614           break ;
 615         case M_LITTAB:
 616         case M_LITCTAB:
 617           m_literal[i] = wc_tab ;
 618           break ;
 619         case M_NULLVAL:
 620           m_literal[i] = (M_WCHAR) c ;
 621           if (dchar == wc_ee) {
 622             if (m_curcon == ENTINLIT) {
 623               m_eopencnt-- ;
 624               i-- ;
 625               if (m_eopencnt == atentlev) {
 626                 m_curcon = atdelimcon ;
 627                 break ;
 628                 }
 629               }
 630             else {
 631               m_literal[i] = wc_null ;
 632               m_curcon = savecon ;
 633               m_ungetachar(wc_null, wc_ee, FALSE) ;
 634               return ;
 635               }
 636             }
 637           break ;
 638         default:
 639           m_error("Internal error processing literal") ;
 640           break ;
 641         }
 642       } /* End for i */
 643     m_error("Literal too long") ;
 644     m_literal[i] = wc_null ;
 645     m_curcon = savecon ;
 646     }
 647
 648 /* Called when a missing tagc delimiter is detected */
 649 #if defined(M_PROTO)
 650 void m_missingtagc(int c, M_HOLDTYPE dchar, LOGICAL start)
 651 #else
 652 void m_missingtagc(c, dchar, start)
 653   int c ;
 654   M_HOLDTYPE dchar ;
 655   LOGICAL start ;
 656 #endif
 657 {
 658     if (! m_wholetag) {
 659       if (start) m_mberr1("Invalid parameter or missing %s", m_tagc);
 660       else m_mberr1("Missing %s in end-tag", m_tagc) ;
 661       }
 662     m_ungetachar(c, dchar, TRUE) ;
 663     m_curcon = START ;
 664     m_adjuststate() ;
 665     }
 666
 667 /* Have found one character in a possible short reference delimiter.
 668    Prepare to look for the next one */
 669 #if defined(M_PROTO)
 670 void m_nextdelimchar(int *n, int i, LOGICAL *linestart, LOGICAL newlinestart,
 671                      LOGICAL skipblank, unsigned char type)
 672 #else
 673 void m_nextdelimchar(n, i, linestart, newlinestart, skipblank, type)
 674   int *n ;
 675   int i ;
 676   LOGICAL *linestart ;
 677   LOGICAL newlinestart ;
 678   LOGICAL skipblank ;
 679   unsigned char type ;
 680 #endif
 681 {
 682     int k ;
 683     char mb_re,mb_seqchar, mb_rschar;
 684     M_WCHAR wc_re,wc_seqchar, wc_rschar;
 685
 686     mb_re = M_RE;
 687     mbtowc(&wc_re, &mb_re, 1);
 688     mb_seqchar = M_SEQCHAR;
 689     mbtowc(&wc_seqchar, &mb_seqchar, 1);
 690     mb_rschar = M_RSCHAR;
 691     mbtowc(&wc_rschar, &mb_rschar, 1);
 692     m_current[*n + 1] = m_sreftree[i].index ;
 693     if (! m_sreftree[m_current[*n + 1]].symbol)
 694       m_delim[*n] = m_sreftree[m_current[*n + 1]].index ;
 695     *linestart = newlinestart ;
 696     m_srefchartype[*n] = type ;
 697     if (skipblank) {
 698       for (k = 0 ; k < M_BSEQLEN ; k++) {
 699         m_hold[*n + 1 + k] = m_getachar(&m_dhold[*n + 1 + k]) ;
 700         if (m_hold[*n + 1 + k] != ' ' && m_hold[*n + 1 + k] != '\t') {
 701           m_ungetachar(m_hold[*n + 1 + k], m_dhold[*n + 1 + k], TRUE) ;
 702           break ;
 703           }
 704         m_current[*n + 1 + k + 1] = m_current[*n + 1] ;
 705         m_delim[*n + 1 + k] = m_delim[*n] ;
 706         m_srefchartype[*n + 1 + k] = wc_seqchar ;
 707         }
 708       *n += k + 1 ;
 709       }
 710     else (*n)++ ;
 711     m_srefchartype[*n] = wc_rschar ;
 712     }
 713
 714 /* Scans past a comment within a markup declaration */
 715 void m_readcomments(M_NOPAR)
 716   {
 717     int c ;
 718     M_HOLDTYPE dchar ;
 719
 720     while (! m_gettoken(&c, &dchar, COMCON))
 721       if (c == EOF) {
 722         m_error("Document ended within a comment") ;
 723         m_done() ;
 724         }
 725     }
 726
 727 /* Scanner */
 728 int m_scan(M_NOPAR)
 729   {
 730     int c ;
 731     M_HOLDTYPE dchar ;
 732     int n ;
 733     char buffer[10] ;
 734     char mb_ee, mb_re, mb_space, mb_tab;
 735     M_WCHAR wc_ee, wc_re, wc_space, wc_tab;
 736
 737     mb_ee = M_EE;
 738     mbtowc(&wc_ee, &mb_ee, 1);
 739     mb_re = M_RE;
 740     mbtowc(&wc_re, &mb_re, 1);
 741     mb_space = M_SPACE;
 742     mbtowc(&wc_space, &mb_space, 1);
 743     mb_tab = M_TAB;
 744     mbtowc(&wc_tab, &mb_tab, 1);
 745     while (TRUE) {
 746       n = m_gettoken(&c, &dchar, m_curcon) ;
 747       if (n) {
 748         if (n != M_ENTITYEND && m_stacktop->linestat == M_NOTHING)
 749           m_stacktop->linestat = M_SOMETHING ;
 750         switch (n) {
 751           case M_LITRS:
 752           case M_LITSCR:
 753             m_atrs = TRUE ;
 754             continue ;
 755           case M_LITRE:
 756           case M_LITECR:
 757             m_ungetachar(wc_re, M_ENTNORMAL, FALSE) ;
 758             continue ;
 759           case M_LITSPACE:
 760           case M_LITCSPACE:
 761             m_ungetachar(wc_space, M_ENTNORMAL, FALSE) ;
 762             continue ;
 763           case M_LITTAB:
 764           case M_LITCTAB:
 765             m_ungetachar(wc_tab, M_ENTNORMAL, FALSE) ;
 766             continue ;
 767           case M_LIT:
 768           case M_LITA:
 769             m_litproc(n) ;
 770             return(M_LITERAL) ;
 771           default:
 772             return(n) ;
 773           }
 774         }
 775       /* Check for Entity End */
 776       if (dchar == wc_ee) {
 777         m_eopencnt-- ;
 778         if (m_stacktop->element &&
 779             m_element[m_stacktop->element - 1].content == M_RCDATA) {
 780           if (m_eopencnt == m_stacktop->thisent) {
 781             if (m_netlevel) m_curcon = NETRCDATA ;
 782             else m_curcon = RCDATAEL ;}
 783           else if (m_eopencnt < m_stacktop->thisent)
 784             m_stacktop->thisent = m_eopencnt ;
 785           }
 786         if (m_newcon(m_curcon - 1, M_ENTITYEND - 1)) return(M_ENTITYEND) ;
 787         continue ;
 788         }
 789       /* Whitespace character--check if could be data.  If so,
 790          if it's a RE, check if its significant */
 791       if (m_whitespace((M_WCHAR) c)) {
 792         if (! m_newcon(m_curcon - 1, M_TEXT - 1)) continue ;
 793         if (c != wc_re || m_curcon == PROCINT || m_curcon == LITCON ||
 794                        m_curcon == LITENT || m_curcon == LITAENT) {
 795           m_scanval = c ;
 796           return(M_TEXT) ;
 797           }
 798         m_sigre() ;
 799         continue ;
 800         }
 801       if (c == EOF) {
 802         if (m_sysecnt) {
 803           m_closent(m_sysent[m_sysecnt--]) ;
 804           if (m_chtrace) {
 805             m_trace("Closing to level ") ;
 806             sprintf(buffer, "%d", m_sysecnt) ;
 807             m_trace(buffer) ;
 808             m_trace("\n") ;
 809             }
 810           continue ;
 811           }
 812         return(M_ENDFILE) ;
 813         }
 814       if (
 815           ((m_curcon == SELEMENT ||
 816             m_curcon == EELEMENT ||
 817             m_curcon == ENTNAME ||
 818             m_curcon == MAPNAME ||
 819             m_curcon == AMAPNAME)
 820             && m_cttype(c) == M_NMSTART) ||
 821           ((m_curcon == ATTNAME || m_curcon == ATTVAL ||
 822               m_curcon == NEEDVI) &&
 823             m_cttype(c) != M_NONNAME)
 824           ){
 825         m_getname((M_WCHAR) c) ;
 826         return(M_NAME) ;
 827         }
 828       switch (m_curcon) {
 829         case ATTVAL:
 830           m_err1("Expecting value for %s",
 831                  &m_pname[m_parameter[m_ppsave - 1].paramname]) ;
 832           m_stcomplete() ;
 833           m_missingtagc(c, dchar, TRUE) ;
 834           continue ;
 835         case ATTNAME:
 836           m_stcomplete() ;
 837           m_missingtagc(c, dchar, TRUE) ;
 838           continue ;
 839         case NEEDVI:
 840           m_attvonly(m_saveatt) ;
 841           m_stcomplete() ;
 842           m_missingtagc(c, dchar, TRUE) ;
 843           continue ;
 844         case ETAGEND:
 845           if (! m_stacktop->oldtop)
 846             m_scanel = m_arc[m_state[0].first - 1].label ;
 847           else m_scanel = m_stacktop->element ;
 848           m_stacktop->holdre = FALSE ;
 849           m_etcomplete() ;
 850           m_missingtagc(c, dchar, FALSE) ;
 851           continue ;
 852         default:
 853           break ;
 854         }
 855       m_scanval = c ;
 856       if (! m_newcon(m_curcon - 1, M_TEXT - 1)) return(M_BLACKSPACE) ;
 857       return(M_TEXT) ;
 858       } /* End while */
 859     } /* End scan */
 860
 861
 862 /* Process explicit or implied USEMAP or ADDMAP */
 863 #if defined(M_PROTO)
 864 void m_setmap(int map, LOGICAL useoradd)
 865 #else
 866 void m_setmap(map, useoradd)
 867   int map ;
 868   LOGICAL useoradd ;
 869 #endif
 870 {
 871     int i ;
 872     int sref ;
 873
 874     if (! m_stacktop->oldtop) {
 875       m_error("Program error: attempt to set map for empty stack") ;
 876       m_exit(TRUE) ;
 877       }
 878
 879     /* #EMPTY map*/
 880     if (map == 1) {
 881       if (m_stacktop->map && m_stacktop->oldtop->map != m_stacktop->map)
 882         m_free(m_stacktop->map, "short reference map") ;
 883       /* Done, if USEMAP */
 884       if (useoradd) {
 885         m_stacktop->map = NULL ;
 886         return ;
 887         }
 888       /* <!ADDMAP #EMPTY> restores map from beginning of element */
 889       m_stacktop->map = m_stacktop->oldtop->map ;
 890       if (m_element[m_stacktop->element - 1].srefptr)
 891         m_setmap(m_element[m_stacktop->element - 1].srefptr,
 892                  (LOGICAL) m_element[m_stacktop->element - 1].useoradd) ;
 893       return ;
 894       }
 895
 896     /* Allocate and initialize a new map if needed */
 897     if (! m_stacktop->map || m_stacktop->map == m_stacktop->oldtop->map) {
 898       m_stacktop->map =
 899         (int *) m_malloc(sizeof(int) * M_SREFCNT, "short reference map") ;
 900       for (i = 0 ; i < M_SREFCNT ; i++)
 901         if (! useoradd && m_stacktop->oldtop->map)
 902           m_stacktop->map[i] = m_stacktop->oldtop->map[i] ;
 903         else m_stacktop->map[i] = M_NULLVAL ;
 904       }
 905     /* Clear an old map if replacing it */
 906     else
 907       if (useoradd)
 908         for (i = 0 ; i < M_SREFCNT ; i++)
 909           m_stacktop->map[i] = M_NULLVAL ;
 910
 911     /* Offset into m_map is 2, 1 for 0-based indexing, 1 for #EMPTY code */
 912     for (sref = m_map[map - 2] ; sref ; sref = m_sref[sref - 1].next)
 913        m_stacktop->map[m_sref[sref - 1].sref - 1] = m_sref[sref - 1].entity ;
 914     }
 915
 916 /* Check for short reference delimiters */
 917 void m_shortref(context)
 918 int context ;
 919 {
 920 int n = 0 ;
 921 int i ;
 922 int c ;
 923 LOGICAL linestart = m_atrs ;
 924 char mb_ee;
 925 M_WCHAR wc_ee;
 926
 927 mb_ee = M_EE;
 928 mbtowc(&wc_ee, &mb_ee, 1);
 929
 930 /* If no short references defined, don't try to match one */
 931 if (sizeof(m_sreftree)/sizeof(M_PTRIE) == 1) return ;
 932
 933 /* Can return if using MARKUP extensions and no map is active */
 934 if (! m_conform && ! m_stacktop->map) return ;
 935
 936 m_current[0] = 0 ;
 937 m_srefchartype[0] = M_RSCHAR ;
 938 while (TRUE)
 939     {
 940     /* Search through short reference delimiter tree */
 941     while (TRUE)
 942         {
 943         m_delim[n] = FALSE ;
 944
 945         /* Look for RS */
 946         if (linestart && m_srefchartype[n] >= M_RSCHAR)
 947             {
 948             for (i = m_current[n] ;
 949             m_sreftree[i].more && m_sreftree[i].symbol < RS ;
 950             i++) ;
 951             if (m_sreftree[i].symbol == RS)
 952                 {
 953                 m_nextdelimchar(&n, i, &linestart, FALSE, FALSE, M_RSCHAR) ;
 954                 continue ;
 955                 }
 956             }
 957
 958         /* Look for white space sequence */
 959         if (m_srefchartype[n] >= M_WSCHAR)
 960             {
 961             for (i = m_current[n] ;
 962             m_sreftree[i].more && m_sreftree[i].symbol < WSSEQ ;
 963             i++) ;
 964             if (m_sreftree[i].symbol == WSSEQ)
 965                 {
 966                 m_nextdelimchar(&n, i, &linestart, FALSE, TRUE, M_WSCHAR) ;
 967                 continue ;
 968                 }
 969             }
 970
 971         /* Look at next character from input stream */
 972         m_hold[n] = m_getachar(&m_dhold[n]) ;
 973         if (m_dhold[n] == wc_ee ||
 974         (m_dhold[n] != M_NORMAL && m_dhold[n] != M_ENTNORMAL))
 975             {
 976             m_srefchartype[n] = M_REGCHAR ;
 977             break ;
 978             }
 979
 980         /* Look for blank sequence */
 981         if (m_srefchartype[n] >= M_BSCHAR &&
 982         (m_hold[n] == ' ' || m_hold[n] == '\t'))
 983             {
 984             for (i = m_current[n] ;
 985             m_sreftree[i].more && m_sreftree[i].symbol < BLANKSEQ ;
 986             i++) ;
 987             if (m_sreftree[i].symbol == BLANKSEQ &&
 988             (m_hold[n] == ' ' || m_hold[n] == '\t'))
 989                 {
 990                 m_nextdelimchar(&n, i, &linestart, FALSE, TRUE, M_BSCHAR) ;
 991                 continue ;
 992                 }
 993             }
 994
 995         /* Look for regular character */
 996         c = m_ctupper(m_hold[n]) ;
 997         if (m_cttype(c) != M_NMSTART)
 998             {
 999             for (i = m_current[n] ;
1000                  m_sreftree[i].more && (int) m_sreftree[i].symbol < c ;
1001                  i++) ;
1002             if ((int) m_sreftree[i].symbol == c)
1003                 {
1004                 m_nextdelimchar(&n, i, &linestart, m_atrs, FALSE, M_REGCHAR) ;
1005                 continue ;
1006                 }
1007             }
1008
1009         m_srefchartype[n] = M_REGCHAR ;
1010         break ;
1011         } /* End search through sref delimiter tree */
1012
1013     while (TRUE)
1014         {
1015         if (m_delim[n])
1016             {
1017             /* Found a delimiter. If letters were allowed in short references
1018             would check here for runon situations such as <!ENTITYrunon ... */
1019             if (m_gendelim(n, context))
1020                 {
1021                 for (i = n ; i >= 0 ; i--)
1022                     if (m_srefchartype[i] < M_WSCHAR)
1023                         m_ungetachar(m_hold[i], m_dhold[i], TRUE) ;
1024                 return ;
1025                 }
1026             linestart = TRUE ;
1027             for (i = n ; i >= 0 ; i--)
1028             if (m_srefchartype[i] < M_WSCHAR)
1029                 {
1030                 linestart = FALSE ;
1031                 break ;
1032                 }
1033             if (linestart) m_atrs = FALSE ;
1034             if (m_stacktop->map && m_stacktop->map[m_delim[n] - 1])
1035                 {
1036                 m_entexpand(
1037                 &m_entities[m_stacktop->map[m_delim[n] - 1] - 1]) ;
1038                 return ;
1039                 }
1040             if (m_conform)
1041                 {
1042                 for (i = n ; i >= 0 ; i--)
1043                 if (m_srefchartype[i] < M_WSCHAR)
1044                 m_ungetachar(m_hold[i], M_CDCHAR, TRUE) ;
1045                 return ;
1046                 }
1047             }
1048         if (m_srefchartype[n] < M_WSCHAR)
1049         m_ungetachar(m_hold[n], m_dhold[n], TRUE) ;
1050         if (m_srefchartype[n] > M_REGCHAR)
1051             {
1052             m_srefchartype[n]-- ;
1053             break ;
1054             }
1055         n-- ;
1056         if (n < 0) return ;
1057         }
1058     }
1059 }
1060
1061 /* Test for significant record ends.  Ignore RE (\n) if
1062       1)  It is the first RE in the content and no data character
1063           or contextual end tag has occurred
1064       2)  Something has occurred on the line but not a data character
1065           or contextual end tag [linestat == M_SOMETHING]
1066       3)  If a record end might be the last one in an element, save it
1067 */
1068 void m_sigre(M_NOPAR)
1069   {
1070     /* Check for first RE in content and no preceding content */
1071     if (m_start &&
1072         (! m_stacktop->firstre && m_oldlinestat[m_oldlsindex] != M_DCORCET)) {
1073       m_stacktop->firstre = TRUE ;
1074       return ;
1075       }
1076     /* Check for line containing other than data characters or contextual
1077        subelements */
1078     if (m_start && m_oldlinestat[m_oldlsindex] == M_SOMETHING) return ;
1079     /* Save the RE to see what follows */
1080     m_holdproc() ;
1081     m_stacktop->holdre = TRUE ;
1082     return ;
1083     } /* End white space */
1084
1085 /* Returns a context-dependent delimiter string to input stream so
1086    characters can be reread one at a time in another context */
1087 #if defined(M_PROTO)
1088 void m_undodelim(M_WCHAR *delim, LOGICAL flag)
1089 #else
1090 void m_undodelim(delim, flag)
1091   M_WCHAR *delim ;
1092   LOGICAL flag ;
1093 #endif
1094 {
1095     M_WCHAR *p ;
1096
1097     for (p = delim ; *p ; p++) ;
1098
1099     p-- ;
1100     while (TRUE) {
1101       m_ungetachar((int) *p, M_NORMAL, flag) ;
1102       if (p == delim) return ;
1103       p-- ;
1104       }
1105     }
1106
1107 /* Place a character on the current input stream.  The character may have
1108    been scanned and determined not to be part of the current token or it
1109    may be in the expansion of an entity*/
1110 #if defined(M_PROTO)
1111 void m_ungetachar(int c, M_HOLDTYPE dchar, LOGICAL preread)
1112 #else
1113 void m_ungetachar(c, dchar, preread)
1114   int c ;
1115   M_HOLDTYPE dchar ;
1116   LOGICAL preread ;
1117 #endif
1118 {
1119     char buffer[10] ;
1120     int length;
1121     char mb_ee;
1122     M_WCHAR wc_ee;
1123
1124     mb_ee = M_EE;
1125     mbtowc(&wc_ee, &mb_ee, 1);
1126     if (m_chtrace) {
1127       if (dchar) {
1128         m_trace("unget(") ;
1129         length = wctomb(buffer, c);
1130         buffer[length] = 0;
1131         m_trace(buffer) ;
1132         m_trace(")[") ;
1133         sprintf(buffer, "%d", c) ;
1134         m_trace(buffer) ;
1135         m_trace("],") ;
1136         sprintf(buffer, "%d", dchar) ;
1137         m_trace(buffer) ;
1138         m_trace("\n") ;
1139         }
1140       else m_trace("unget(EE)\n") ;
1141       }
1142     m_inctest(&m_toundo, M_SAVECHAR, "M_SAVECHAR") ;
1143     m_sourcefile[m_toundo - 1] = m_sysecnt ;
1144     m_savedchar[m_toundo - 1] = dchar ;
1145     m_savechar[m_toundo - 1] = dchar == wc_ee ? (int) m_atrs : c ;
1146     if (preread) {
1147       m_stacktop->linestat = m_oldlinestat[m_oldlsindex] ;
1148       m_atrs = m_oldatrs[m_oldlsindex] ;
1149       m_oldlsindex = (m_oldlsindex - 1 + M_SAVECHAR) % M_SAVECHAR ;
1150       }
1151     if (m_toundo > m_maxundo) m_maxundo = m_toundo ;
1152     if (c == M_RE)
1153       if (dchar == M_NORMAL) m_line[m_sysecnt]-- ;
1154     }
1155
1156 /* Have encountered an M_ERO.  If the entity reference is valid, process it*/
1157 LOGICAL m_vldentref(M_NOPAR)
1158   {
1159     M_HOLDTYPE dchar ;
1160     int next ;
1161     M_ENTITY *openent ;
1162     char mb_ee;
1163     M_WCHAR wc_ee;
1164
1165     mb_ee = M_EE;
1166     mbtowc(&wc_ee, &mb_ee, 1);
1167     next = m_getachar(&dchar) ;
1168     if (next != EOF && m_cttype(next) == M_NMSTART && dchar != wc_ee) {
1169       m_getname((M_WCHAR) next) ;
1170       if (! m_gettoken(&next, &dchar, ENTREF))
1171         if (next != M_RE) m_ungetachar(next, dchar, TRUE) ;
1172       if (openent = (M_ENTITY *) m_lookfortrie(m_name, m_enttrie))
1173         m_entexpand(openent) ;
1174       else m_err1("Reference to undefined entity '%s'", m_name) ;
1175       return(TRUE) ;
1176       }
1177     m_ungetachar(next, dchar, TRUE) ;
1178     return(FALSE) ;
1179     }
1180
1181 #if defined(sparse)
1182 #include "sparse.c"
1183 #endif
1184
1185