cde/programs/dthelp/parser/pass1/parser/scan.c

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these libraries and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: scan.c /main/3 1995/11/08 10:24:54 rswiston $ */
  24 /*
  25               Copyright 1986 Tandem Computers Incorporated.
  26 This product and information is proprietary of Tandem Computers Incorporated.
  27                    Copyright 1986, 1987, 1988, 1989 Hewlett-Packard Co.
  28 */
  29
  30 /* Scan.c is the scanner for program PARSER */
  31
  32 #include <string.h>
  33 #include <stdlib.h>
  34 #include <stdio.h>
  35 #include "basic.h"
  36 #include "trie.h"
  37 #define M_CONDEF
  38 #include "context.h"
  39 #define M_DELIMDEF
  40 #include "delim.h"
  41 #define M_DTDDEF
  42 #include "dtd.h"
  43 #include "arc.h"
  44 #define M_PARDEF
  45 #include "parser.h"
  46 #define M_ENTDEF
  47 #include "entity2.h"
  48 #include "sref.h"
  49
  50 /* Actually read a character from an input stream */
  51 int m_actgetc(void)
  52   {
  53     int c ;
  54
  55     c = m_getc(m_sysent[m_sysecnt]) ;
  56     m_saveline[m_svlncnt[m_sysecnt]][m_sysecnt] = c ;
  57     if (++m_svlncnt[m_sysecnt] >= M_LINELENGTH) {
  58       m_svlncnt[m_sysecnt] = 0 ;
  59       m_svlnwrap[m_sysecnt] = TRUE ;
  60       }
  61     return(c) ;
  62     }
  63
  64 /* Expand an entity reference */
  65 void m_entexpand(M_ENTITY *openent)
  66   {
  67     M_WCHAR *p ;
  68     M_HOLDTYPE dchar ;
  69     char buffer[10] ;
  70     int i ;
  71
  72     m_ungetachar(M_NULLVAL, M_EE, FALSE) ;
  73     m_eopencnt++ ;
  74     m_opene[m_eopencnt - 1] = openent ;
  75
  76     if (m_stacktop->element &&
  77         m_element[m_stacktop->element - 1].content == M_RCDATA)
  78       m_curcon = RCNEWENT ;
  79     if (m_curcon == LITCON || m_curcon == LITACON)
  80       m_curcon = ENTINLIT ;
  81     if (! openent->wheredef) {
  82       m_eopencnt-- ;
  83       m_err1("%s: System error -- no definition for predeclared entity",
  84              openent->name) ;
  85       m_eopencnt++ ;
  86       return ;
  87       }
  88     if (m_curcon == ENTINLIT)
  89       if (openent->type != M_GENERAL) {
  90         m_eopencnt-- ;
  91         m_err1("%s: Typed entity not allowed in parameter value",
  92                openent->name) ;
  93         m_eopencnt++ ;
  94         return ;
  95         }
  96     if (m_eopencnt > M_ENTLVL) {
  97       m_eopencnt-- ;
  98       m_err1("%s: Too many nested entities", openent->name) ;
  99       m_eopencnt++ ;
 100       return ;
 101       }
 102     for (i = 0 ; i < m_eopencnt - 1; i++)
 103       if (m_opene[i] == openent) {
 104         m_eopencnt-- ;
 105         m_err1("Recursive call to entity %s ignored", openent->name) ;
 106         m_eopencnt++ ;
 107         return ;
 108         }
 109
 110     /* If SDATA or PI entity (regular or CODE) at beginning of document
 111        instance, call m_startdoc and reset m_curcon past preamble */
 112     if (m_curcon == PREAMBLE &&
 113         (openent->type == M_SDATA ||
 114          openent->type == M_CODESDATA ||
 115          openent->type == M_PI ||
 116          openent->type == M_CODEPI)) {
 117       m_startdoc() ;
 118       m_curcon = START ;
 119       m_adjuststate() ;
 120       }
 121
 122     /* SDATA entity */
 123       if (openent->type == M_SDATA || openent->type == M_CODESDATA) {
 124         if (! m_stacktop->intext) {
 125           if (! m_strtproc(M_NULLVAL)) {
 126             if (m_stacktop->oldtop)
 127               m_err1("SDATA entity not allowed at this point in %s",
 128                      m_nameofelt(m_stacktop->element)) ;
 129             else if (! m_start)
 130               m_error("Document may not start with SDATA entity") ;
 131             }
 132           m_start = TRUE ;
 133           m_stacktop->firstre = TRUE ;
 134           m_stacktop->intext = TRUE ;
 135           if (m_curcon == ELCON || m_curcon == DATACON)
 136             m_curcon = POUNDCDATA ;
 137           else if (m_curcon == NETELCON || m_curcon == NETDATACON)
 138             m_curcon = NETCDATA ;
 139           }
 140         m_stacktop->linestat = M_DCORCET ;
 141         m_holdproc() ;
 142         }
 143
 144     /* CODE entity */
 145     if (openent->type == M_CODEPI || openent->type == M_CODESDATA) {
 146       if (openent->type == M_CODEPI)
 147         m_stacktop->linestat = M_SOMETHING ;
 148       m_codeent(openent->codeindex) ;
 149       return ;
 150       }
 151
 152     /* PI or SDATA, but not CODE entity */
 153     if (openent->type == M_PI || openent->type == M_SDATA) {
 154       m_piaction(openent->content, openent->name, openent->type) ;
 155       return ;
 156       }
 157
 158     /* Subordinate data file */
 159     if (openent->type == M_SYSTEM) {
 160       m_sysent[m_sysecnt + 1] = m_openent(openent->content) ;
 161       if (m_sysent[m_sysecnt + 1]) {
 162         m_sysecnt++ ;
 163         m_line[m_sysecnt] = 1 ;
 164         m_svlncnt[m_sysecnt] = 0 ;
 165         m_svlnwrap[m_sysecnt] = FALSE ;
 166         if (m_chtrace) {
 167           m_trace("Opening `") ;
 168           m_wctrace(openent->content) ;
 169           m_trace("'(") ;
 170           sprintf(buffer, "%d", m_sysecnt) ;
 171           m_trace(buffer) ;
 172           m_trace(")\n") ;
 173           }
 174         return ;
 175         }
 176       m_eopencnt-- ;
 177       m_err1("Unable to open file %s", openent->content) ;
 178       m_eopencnt++ ;
 179       return ;
 180       }
 181
 182     /* An entity reference has been encountered.  Put the content of the
 183        entity, including any leading or trailing delimiters into the input
 184        stream in reverse order */
 185     /* Closing delimiter */
 186     switch (openent->type) {
 187       case M_STARTTAG:
 188       case M_ENDTAG: {
 189         m_undodelim(m_dlmptr[M_TAGC - 1], FALSE) ;
 190         break ;
 191         }
 192       case M_MD: {
 193         m_undodelim(m_dlmptr[M_MDC - 1], FALSE) ;
 194         break ;
 195         }
 196       default:
 197         break ;
 198       }
 199     /* Content of entity -- scan for end to reverse string */
 200     if (openent->type == M_CDATAENT) dchar = M_CDCHAR ;
 201     else dchar = M_ENTNORMAL ;
 202     if (p = openent->content)
 203       while (*p) p++;
 204     if (p != openent->content) {
 205       p-- ;
 206       while (TRUE) {
 207         m_ungetachar((int) *p, dchar, FALSE) ;
 208         if (p == openent->content) break ;
 209         p-- ;
 210         }
 211       }
 212     /* Opening delimiter */
 213     switch (openent->type) {
 214       case M_STARTTAG: {
 215         m_undodelim(m_dlmptr[M_STAGO - 1], FALSE) ;
 216         break ;
 217         }
 218       case M_ENDTAG: {
 219         m_undodelim(m_dlmptr[M_ETAGO - 1], FALSE) ;
 220         break ;
 221         }
 222       case M_MD: {
 223         m_undodelim(m_dlmptr[M_MDO - 1], FALSE) ;
 224         break ;
 225         }
 226       default:
 227         break ;
 228       }
 229     } /* End m_entexpand */
 230
 231 /* An srlen-character long short-reference delimiter has been found.  Verify
 232    that it is not the prefix of a general delimiter recognized in context*/
 233 LOGICAL m_gendelim(int srlen, int context)
 234   {
 235     int ghold[MAXD + 1] ;
 236     int ucase ;
 237     int next ;
 238     int i, n = 0, current, delim[MAXD + 1], oldchars = 0 ;
 239     int newcharstart = 0 ;
 240     M_HOLDTYPE dhold[MAXD + 1], dchar ;
 241     LOGICAL linestart ;
 242     LOGICAL found ;
 243
 244     if (! (current = m_contree[context - 1])) return(FALSE) ;
 245     linestart = TRUE ;
 246     for (i = 0 ; i <= srlen ; i++)
 247       if (m_srefchartype[i] != M_RSCHAR && m_srefchartype[i] != M_WSCHAR) {
 248         linestart = FALSE ;
 249         break ;
 250       }
 251     if (linestart) return(FALSE) ;
 252
 253     current-- ;
 254     while (TRUE) {
 255       delim[n] = FALSE ;
 256       while (oldchars <= srlen &&
 257              (m_srefchartype[oldchars] == M_RSCHAR ||
 258               m_srefchartype[oldchars] == M_WSCHAR))
 259         oldchars++ ;
 260       if (oldchars <= srlen)
 261         ucase = m_hold[oldchars++] ;
 262       else {
 263         if (! newcharstart) newcharstart = n ;
 264         ghold[n] = m_getachar(&dhold[n]) ;
 265         ucase = m_ctupper(ghold[n]) ;
 266         if (dhold[n] != M_NORMAL && dhold[n] != M_ENTNORMAL) break ;
 267         }
 268       for (i = current ;
 269            (int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more ;
 270            i++) ;
 271       if ((int) m_delimtrie[i].symbol == ucase) {
 272         current = m_delimtrie[i].index ;
 273         if (! m_delimtrie[current].symbol)
 274           delim[n] = m_delimtrie[current].index ;
 275         n++ ;
 276         }
 277       else break ;
 278       }
 279
 280     if (! newcharstart) return(FALSE) ;
 281     while (n >= newcharstart - 1) {
 282       found = FALSE ;
 283       if (delim[n]) {
 284         /* Found a delimiter. If it ends with a letter, verify
 285            that the following character is not a letter, in order
 286            to issue error messages in cases such as <!ENTITYrunon ... */
 287         if (m_cttype(ghold[n]) != M_NMSTART) found = TRUE ;
 288         else {
 289           next = m_getachar(&dchar) ;
 290           m_ungetachar(next, dchar, TRUE) ;
 291           if (next == EOF || m_cttype(next) != M_NMSTART)
 292             found = TRUE ;
 293           }
 294         }
 295       if (found) {
 296         if (delim[n] == M_ERO || delim[n] == M_STAGO ||
 297             delim[n] == M_ETAGO) {
 298           next = m_getachar(&dchar) ;
 299           m_ungetachar(next, dchar, TRUE) ;
 300           if (! (m_cttype(next) == M_NMSTART &&
 301                  (dchar == M_NORMAL || dchar == M_ENTNORMAL))) {
 302             n-- ;
 303             continue ;
 304             }
 305           }
 306         while (n >= newcharstart) {
 307           m_ungetachar(ghold[n], dhold[n], TRUE) ;
 308           n-- ;
 309           }
 310         return(TRUE) ;
 311         } /* End if delim[n] */
 312       if (n >= newcharstart) m_ungetachar(ghold[n], dhold[n], TRUE) ;
 313       n-- ;
 314       }
 315
 316     return(FALSE) ;
 317     }
 318
 319 /* Reads next input character from the current source file or from an
 320    entity expansion */
 321 int m_getachar(M_HOLDTYPE *dchar)
 322   {
 323     int c ;
 324     int i ;
 325     char buffer[10] ;
 326     int length;
 327     M_WCHAR wc_ee, wc_re;
 328     char    mb_ee, mb_re;
 329
 330     mb_ee = M_EE;
 331     mb_re = M_RE;
 332     mbtowc(&wc_ee, &mb_ee, 1);
 333     mbtowc(&wc_re, &mb_re, 1);
 334     if (m_toundo && m_sysecnt <= m_sourcefile[m_toundo - 1]) {
 335       c = m_savechar[--m_toundo] ;
 336       *dchar = m_savedchar[m_toundo] ;
 337       if (*dchar == wc_ee) m_atrs = (M_WCHAR) c;
 338       }
 339     else {
 340       c = m_actgetc() ;
 341       *dchar = M_NORMAL ;
 342       if (m_whitespace((M_WCHAR) c) && c != wc_re) {
 343         /* White space, but not RE, i.e., space or tab */
 344         for (m_wscount = 0 ; m_wscount < M_WSPACELEN ; m_wscount++) {
 345           m_wspace[m_wscount] = m_actgetc() ;
 346           if (! m_whitespace((M_WCHAR) m_wspace[m_wscount]) ||
 347               m_wspace[m_wscount] == wc_re)
 348             break ;
 349           }
 350         if (m_whitespace((M_WCHAR) m_wspace[m_wscount]) &&
 351             m_wspace[m_wscount] != wc_re) {
 352           m_error("Ignoring blank or tab") ;
 353           while (m_whitespace((M_WCHAR) m_wspace[m_wscount]) &&
 354                  m_wspace[m_wscount] != wc_re)
 355             m_wspace[m_wscount] = m_actgetc() ;
 356           }
 357         if (m_wscount > m_maxws) m_maxws = m_wscount ;
 358         if (m_wspace[m_wscount] == wc_re) c = wc_re ;
 359         else {
 360           for (i = 0 ; i <= m_wscount ; i++)
 361             m_ungetachar(m_wspace[m_wscount - i], M_NORMAL, FALSE) ;
 362           }
 363         } /* End just read a blank or tab, is it line-trailing? */
 364       } /* End read a character from file */
 365
 366     m_oldlsindex = (m_oldlsindex + 1) % M_SAVECHAR ;
 367     m_oldlinestat[m_oldlsindex] = m_stacktop->linestat ;
 368     m_oldatrs[m_oldlsindex] = m_atrs ;
 369     if (c == wc_re && *dchar) {
 370       if (*dchar == M_NORMAL) m_line[m_sysecnt]++ ;
 371       m_stacktop->linestat = M_NOTHING ;
 372       m_atrs = TRUE ;
 373       }
 374     else if (*dchar) m_atrs = FALSE ;
 375     if (m_chtrace) {
 376       if (*dchar) {
 377         m_trace("get(") ;
 378         length = wctomb(buffer, c);
 379         buffer[length] = 0;
 380         m_trace(buffer) ;
 381         m_trace(")[") ;
 382         sprintf(buffer, "%d", c) ;
 383         m_trace(buffer) ;
 384         m_trace("],") ;
 385         sprintf(buffer, "%d", *dchar) ;
 386         m_trace(buffer) ;
 387         m_trace("\n") ;
 388         }
 389       else m_trace("get(EE)\n") ;
 390       }
 391     return(c) ;
 392     }
 393
 394 /* Reads a name token */
 395 void m_getname(M_WCHAR first)
 396 {
 397     M_WCHAR *p ;
 398     M_HOLDTYPE dchar ;
 399     int c ;
 400
 401     *(p = m_name) = first ;
 402     while (TRUE) {
 403       c = m_getachar(&dchar) ;
 404       if (c == EOF) break ;
 405       if (dchar != M_NORMAL && dchar != M_ENTNORMAL) break ;
 406       if (m_cttype(c) == M_NONNAME) break ;
 407       *++p = (M_WCHAR) c ;
 408       if (p >= m_name + M_NAMELEN) {
 409         p-- ;
 410         m_error("Name too long") ;
 411         while ((dchar == M_NORMAL || dchar == M_ENTNORMAL) &&
 412                c != EOF &&
 413                m_cttype(c) != M_NONNAME)
 414           c = m_getachar(&dchar) ;
 415         break ;
 416         }
 417       }
 418     m_ungetachar(c, dchar, TRUE) ;
 419     *++p = M_EOS ;
 420     }
 421
 422 /* Reads the next token */
 423 int m_gettoken(int *c, M_HOLDTYPE *dchar, int context)
 424   {
 425     int hold[MAXD + 1], next ;
 426     int ucase ;
 427     int i, n = 0, current, delim[MAXD + 1], nexttoken ;
 428     M_HOLDTYPE dhold[MAXD + 1] ;
 429     LOGICAL found ;
 430
 431     switch (context) {
 432       case DATACON:
 433       case NETDATACON:
 434       case POUNDCDATA:
 435       case NETCDATA:
 436       case ELCON:
 437       case NETELCON:
 438         if (m_stacktop->oldtop) m_shortref(context) ;
 439         break ;
 440       default:
 441         break ;
 442       }
 443     if (! (current = m_contree[context - 1])) {
 444       *c = m_getachar(dchar) ;
 445       return(M_NULLVAL) ;
 446       }
 447     current-- ;
 448     while (TRUE) {
 449       hold[n] = m_getachar(&dhold[n]) ;
 450       ucase = m_ctupper(hold[n]) ;
 451       delim[n] = FALSE ;
 452       if (dhold[n] != M_NORMAL && dhold[n] != M_ENTNORMAL) break ;
 453       for (i = current ;
 454            (int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more ;
 455            i++) ;
 456       if ((int) m_delimtrie[i].symbol == ucase) {
 457         current = m_delimtrie[i].index ;
 458         if (! m_delimtrie[current].symbol)
 459           delim[n] = m_delimtrie[current].index ;
 460         n++ ;
 461         }
 462       else break ;
 463       }
 464
 465     while (n >= 0) {
 466       found = FALSE ;
 467       if (delim[n]) {
 468         /* Found a delimiter. If it ends with a letter, verify
 469            that the following character is not a letter, in order
 470            to issue error messages in cases such as <!ENTITYrunon ... */
 471         if (m_cttype(hold[n]) != M_NMSTART) found = TRUE ;
 472         else {
 473           *c = m_getachar(dchar) ;
 474           m_ungetachar(*c, *dchar, TRUE) ;
 475           if (*c == EOF || m_cttype(*c) != M_NMSTART) found = TRUE ;
 476           }
 477         }
 478       if (found) {
 479         if (delim[n] == M_CRO) {
 480           next = m_getachar(dchar) ;
 481           if ((*dchar != M_NORMAL && *dchar != M_ENTNORMAL) ||
 482               (m_cttype(next) != M_DIGIT))
 483             m_ungetachar(next, *dchar, TRUE) ;
 484           else {
 485             m_scanval = next - '0' ;
 486             while (TRUE) {
 487               next = m_getachar(dchar) ;
 488               if ((*dchar != M_NORMAL && *dchar != M_ENTNORMAL) ||
 489                   (m_cttype(next) != M_DIGIT)) {
 490                 m_ungetachar(next, *dchar, TRUE) ;
 491                 if (! m_gettoken(&next, dchar, ENTREF))
 492                   m_ungetachar(next, *dchar, TRUE) ;
 493                 if (context == ELCON || context == NETELCON)
 494                   return(M_BLACKSPACE) ;
 495                 else return(M_TEXT) ;
 496                 }
 497               m_scanval = 10 * m_scanval + next - '0' ;
 498               if (m_scanval >= M_CHARSETLEN) {
 499                 m_error("Invalid character code") ;
 500                 m_scanval = (m_scanval - next + '0') / 10 ;
 501                 m_ungetachar(next, *dchar, TRUE) ;
 502                 if (context == ELCON || context == NETELCON)
 503                   return(M_BLACKSPACE) ;
 504                 else return(M_TEXT) ;
 505                 }
 506               } /* End loop reading digits after M_CRO */
 507             } /* End M_CRO followed by digit */
 508           } /* End delim[n] == M_CRO */
 509         else if (delim[n] == M_ERO)
 510           if (m_vldentref())
 511             return(m_gettoken(c, dchar,
 512                             (m_curcon == RCNEWENT || m_curcon == ENTINLIT) ?
 513                               m_curcon : context)) ;
 514         /* Can be an M_ERO or M_CRO here only if not in context and hence
 515            should not be treated as a delimiter */
 516         if (delim[n] != M_STAGO && delim[n] != M_ETAGO &&
 517             delim[n] != M_ERO && delim[n] != M_CRO)
 518           return(delim[n]) ;
 519         /* M_STAGO and M_ETAGO recognized only if immediately followed by
 520            a M_NMSTART character or by an appropriate closing delimiter
 521            (latter is a short tag) */
 522         if (delim[n] == M_STAGO || delim[n] == M_ETAGO) {
 523           next = m_getachar(dchar) ;
 524           m_ungetachar(next, *dchar, TRUE) ;
 525           if (m_cttype(next) == M_NMSTART &&
 526               (*dchar == M_NORMAL || *dchar == M_ENTNORMAL))
 527             return(delim[n]) ;
 528           nexttoken = m_gettoken(&next, dchar,
 529             delim[n] == M_STAGO ? SELEMENT : EELEMENT) ;
 530           if (nexttoken) {
 531             m_undodelim(m_dlmptr[nexttoken - 1], TRUE) ;
 532             return(delim[n]) ;
 533             }
 534           else m_ungetachar(next, *dchar, TRUE) ;
 535           } /* End delim[n] is M_STAGO or M_ETAGO */
 536         } /* End if (delim[n]) */
 537       if (n) m_ungetachar(hold[n], dhold[n], TRUE) ;
 538       n-- ;
 539       }
 540
 541     *c = *hold ;
 542     *dchar = *dhold ;
 543     return(M_NULLVAL) ;
 544     }
 545
 546 /* Reads a literal */
 547 void m_litproc(int delim)
 548   {
 549     int n, i ;
 550     M_HOLDTYPE dchar ;
 551     int savecon = m_curcon ;
 552     int c ;
 553     int atentlev ;
 554     int atdelimcon ;
 555     char mb_re, mb_tab, mb_space, mb_null, mb_ee;
 556     M_WCHAR wc_re, wc_tab, wc_space, wc_null, wc_ee;
 557
 558     mb_re = M_RE;
 559     mb_tab = M_TAB;
 560     mb_space = M_SPACE;
 561     mb_null = M_NULLVAL;
 562     mb_ee = M_EE;
 563     mbtowc(&wc_re, &mb_re, 1);
 564     mbtowc(&wc_tab, &mb_tab, 1);
 565     mbtowc(&wc_space, &mb_space, 1);
 566     mbtowc(&wc_null, &mb_null, 1);
 567     mbtowc(&wc_ee, &mb_ee, 1);
 568
 569     m_curcon = delim == M_LIT ? LITCON : LITACON ;
 570     atentlev = m_eopencnt ;
 571     atdelimcon = m_curcon ;
 572     for (i = 0 ; i < M_LITLEN + 1 ; i++) {
 573       n = m_gettoken(&c, &dchar, m_curcon) ;
 574       switch (n) {
 575         case M_ENDFILE:
 576           m_ungetachar(c, dchar, TRUE) ;
 577           m_literal[i] = wc_null ;
 578           m_curcon = savecon ;
 579           return ;
 580         case M_TEXT:
 581           m_literal[i] = (M_WCHAR) m_scanval ;
 582           break ;
 583         case M_LIT:
 584         case M_LITA:
 585           m_literal[i] = wc_null ;
 586           m_curcon = savecon ;
 587           return ;
 588         case M_LITRS:
 589         case M_LITSCR:
 590           break ;
 591         case M_LITRE:
 592         case M_LITECR:
 593           m_literal[i] = wc_re ;
 594           break ;
 595         case M_LITSPACE:
 596         case M_LITCSPACE:
 597           m_literal[i] = wc_space ;
 598           break ;
 599         case M_LITTAB:
 600         case M_LITCTAB:
 601           m_literal[i] = wc_tab ;
 602           break ;
 603         case M_NULLVAL:
 604           m_literal[i] = (M_WCHAR) c ;
 605           if (dchar == wc_ee) {
 606             if (m_curcon == ENTINLIT) {
 607               m_eopencnt-- ;
 608               i-- ;
 609               if (m_eopencnt == atentlev) {
 610                 m_curcon = atdelimcon ;
 611                 break ;
 612                 }
 613               }
 614             else {
 615               m_literal[i] = wc_null ;
 616               m_curcon = savecon ;
 617               m_ungetachar(wc_null, wc_ee, FALSE) ;
 618               return ;
 619               }
 620             }
 621           break ;
 622         default:
 623           m_error("Internal error processing literal") ;
 624           break ;
 625         }
 626       } /* End for i */
 627     m_error("Literal too long") ;
 628     m_literal[i] = wc_null ;
 629     m_curcon = savecon ;
 630     }
 631
 632 /* Called when a missing tagc delimiter is detected */
 633 void m_missingtagc(int c, M_HOLDTYPE dchar, LOGICAL start)
 634 {
 635     if (! m_wholetag) {
 636       if (start) m_mberr1("Invalid parameter or missing %s", m_tagc);
 637       else m_mberr1("Missing %s in end-tag", m_tagc) ;
 638       }
 639     m_ungetachar(c, dchar, TRUE) ;
 640     m_curcon = START ;
 641     m_adjuststate() ;
 642     }
 643
 644 /* Have found one character in a possible short reference delimiter.
 645    Prepare to look for the next one */
 646 void m_nextdelimchar(int *n, int i, LOGICAL *linestart, LOGICAL newlinestart,
 647                      LOGICAL skipblank, unsigned char type)
 648 {
 649     int k ;
 650     char mb_re,mb_seqchar, mb_rschar;
 651     M_WCHAR wc_re,wc_seqchar, wc_rschar;
 652
 653     mb_re = M_RE;
 654     mbtowc(&wc_re, &mb_re, 1);
 655     mb_seqchar = M_SEQCHAR;
 656     mbtowc(&wc_seqchar, &mb_seqchar, 1);
 657     mb_rschar = M_RSCHAR;
 658     mbtowc(&wc_rschar, &mb_rschar, 1);
 659     m_current[*n + 1] = m_sreftree[i].index ;
 660     if (! m_sreftree[m_current[*n + 1]].symbol)
 661       m_delim[*n] = m_sreftree[m_current[*n + 1]].index ;
 662     *linestart = newlinestart ;
 663     m_srefchartype[*n] = type ;
 664     if (skipblank) {
 665       for (k = 0 ; k < M_BSEQLEN ; k++) {
 666         m_hold[*n + 1 + k] = m_getachar(&m_dhold[*n + 1 + k]) ;
 667         if (m_hold[*n + 1 + k] != ' ' && m_hold[*n + 1 + k] != '\t') {
 668           m_ungetachar(m_hold[*n + 1 + k], m_dhold[*n + 1 + k], TRUE) ;
 669           break ;
 670           }
 671         m_current[*n + 1 + k + 1] = m_current[*n + 1] ;
 672         m_delim[*n + 1 + k] = m_delim[*n] ;
 673         m_srefchartype[*n + 1 + k] = wc_seqchar ;
 674         }
 675       *n += k + 1 ;
 676       }
 677     else (*n)++ ;
 678     m_srefchartype[*n] = wc_rschar ;
 679     }
 680
 681 /* Scans past a comment within a markup declaration */
 682 void m_readcomments(void)
 683   {
 684     int c ;
 685     M_HOLDTYPE dchar ;
 686
 687     while (! m_gettoken(&c, &dchar, COMCON))
 688       if (c == EOF) {
 689         m_error("Document ended within a comment") ;
 690         m_done() ;
 691         }
 692     }
 693
 694 /* Scanner */
 695 int m_scan(void)
 696   {
 697     int c ;
 698     M_HOLDTYPE dchar ;
 699     int n ;
 700     char buffer[10] ;
 701     char mb_ee, mb_re, mb_space, mb_tab;
 702     M_WCHAR wc_ee, wc_re, wc_space, wc_tab;
 703
 704     mb_ee = M_EE;
 705     mbtowc(&wc_ee, &mb_ee, 1);
 706     mb_re = M_RE;
 707     mbtowc(&wc_re, &mb_re, 1);
 708     mb_space = M_SPACE;
 709     mbtowc(&wc_space, &mb_space, 1);
 710     mb_tab = M_TAB;
 711     mbtowc(&wc_tab, &mb_tab, 1);
 712     while (TRUE) {
 713       n = m_gettoken(&c, &dchar, m_curcon) ;
 714       if (n) {
 715         if (n != M_ENTITYEND && m_stacktop->linestat == M_NOTHING)
 716           m_stacktop->linestat = M_SOMETHING ;
 717         switch (n) {
 718           case M_LITRS:
 719           case M_LITSCR:
 720             m_atrs = TRUE ;
 721             continue ;
 722           case M_LITRE:
 723           case M_LITECR:
 724             m_ungetachar(wc_re, M_ENTNORMAL, FALSE) ;
 725             continue ;
 726           case M_LITSPACE:
 727           case M_LITCSPACE:
 728             m_ungetachar(wc_space, M_ENTNORMAL, FALSE) ;
 729             continue ;
 730           case M_LITTAB:
 731           case M_LITCTAB:
 732             m_ungetachar(wc_tab, M_ENTNORMAL, FALSE) ;
 733             continue ;
 734           case M_LIT:
 735           case M_LITA:
 736             m_litproc(n) ;
 737             return(M_LITERAL) ;
 738           default:
 739             return(n) ;
 740           }
 741         }
 742       /* Check for Entity End */
 743       if (dchar == wc_ee) {
 744         m_eopencnt-- ;
 745         if (m_stacktop->element &&
 746             m_element[m_stacktop->element - 1].content == M_RCDATA) {
 747           if (m_eopencnt == m_stacktop->thisent) {
 748             if (m_netlevel) m_curcon = NETRCDATA ;
 749             else m_curcon = RCDATAEL ;}
 750           else if (m_eopencnt < m_stacktop->thisent)
 751             m_stacktop->thisent = m_eopencnt ;
 752           }
 753         if (m_newcon(m_curcon - 1, M_ENTITYEND - 1)) return(M_ENTITYEND) ;
 754         continue ;
 755         }
 756       /* Whitespace character--check if could be data.  If so,
 757          if it's a RE, check if its significant */
 758       if (m_whitespace((M_WCHAR) c)) {
 759         if (! m_newcon(m_curcon - 1, M_TEXT - 1)) continue ;
 760         if (c != wc_re || m_curcon == PROCINT || m_curcon == LITCON ||
 761                        m_curcon == LITENT || m_curcon == LITAENT) {
 762           m_scanval = c ;
 763           return(M_TEXT) ;
 764           }
 765         m_sigre() ;
 766         continue ;
 767         }
 768       if (c == EOF) {
 769         if (m_sysecnt) {
 770           m_closent(m_sysent[m_sysecnt--]) ;
 771           if (m_chtrace) {
 772             m_trace("Closing to level ") ;
 773             sprintf(buffer, "%d", m_sysecnt) ;
 774             m_trace(buffer) ;
 775             m_trace("\n") ;
 776             }
 777           continue ;
 778           }
 779         return(M_ENDFILE) ;
 780         }
 781       if (
 782           ((m_curcon == SELEMENT ||
 783             m_curcon == EELEMENT ||
 784             m_curcon == ENTNAME ||
 785             m_curcon == MAPNAME ||
 786             m_curcon == AMAPNAME)
 787             && m_cttype(c) == M_NMSTART) ||
 788           ((m_curcon == ATTNAME || m_curcon == ATTVAL ||
 789               m_curcon == NEEDVI) &&
 790             m_cttype(c) != M_NONNAME)
 791           ){
 792         m_getname((M_WCHAR) c) ;
 793         return(M_NAME) ;
 794         }
 795       switch (m_curcon) {
 796         case ATTVAL:
 797           m_err1("Expecting value for %s",
 798                  &m_pname[m_parameter[m_ppsave - 1].paramname]) ;
 799           m_stcomplete() ;
 800           m_missingtagc(c, dchar, TRUE) ;
 801           continue ;
 802         case ATTNAME:
 803           m_stcomplete() ;
 804           m_missingtagc(c, dchar, TRUE) ;
 805           continue ;
 806         case NEEDVI:
 807           m_attvonly(m_saveatt) ;
 808           m_stcomplete() ;
 809           m_missingtagc(c, dchar, TRUE) ;
 810           continue ;
 811         case ETAGEND:
 812           if (! m_stacktop->oldtop)
 813             m_scanel = m_arc[m_state[0].first - 1].label ;
 814           else m_scanel = m_stacktop->element ;
 815           m_stacktop->holdre = FALSE ;
 816           m_etcomplete() ;
 817           m_missingtagc(c, dchar, FALSE) ;
 818           continue ;
 819         default:
 820           break ;
 821         }
 822       m_scanval = c ;
 823       if (! m_newcon(m_curcon - 1, M_TEXT - 1)) return(M_BLACKSPACE) ;
 824       return(M_TEXT) ;
 825       } /* End while */
 826     } /* End scan */
 827
 828
 829 /* Process explicit or implied USEMAP or ADDMAP */
 830 void m_setmap(int map, LOGICAL useoradd)
 831 {
 832     int i ;
 833     int sref ;
 834
 835     if (! m_stacktop->oldtop) {
 836       m_error("Program error: attempt to set map for empty stack") ;
 837       m_exit(TRUE) ;
 838       }
 839
 840     /* #EMPTY map*/
 841     if (map == 1) {
 842       if (m_stacktop->map && m_stacktop->oldtop->map != m_stacktop->map)
 843         m_free(m_stacktop->map, "short reference map") ;
 844       /* Done, if USEMAP */
 845       if (useoradd) {
 846         m_stacktop->map = NULL ;
 847         return ;
 848         }
 849       /* <!ADDMAP #EMPTY> restores map from beginning of element */
 850       m_stacktop->map = m_stacktop->oldtop->map ;
 851       if (m_element[m_stacktop->element - 1].srefptr)
 852         m_setmap(m_element[m_stacktop->element - 1].srefptr,
 853                  (LOGICAL) m_element[m_stacktop->element - 1].useoradd) ;
 854       return ;
 855       }
 856
 857     /* Allocate and initialize a new map if needed */
 858     if (! m_stacktop->map || m_stacktop->map == m_stacktop->oldtop->map) {
 859       m_stacktop->map =
 860         (int *) m_malloc(sizeof(int) * M_SREFCNT, "short reference map") ;
 861       for (i = 0 ; i < M_SREFCNT ; i++)
 862         if (! useoradd && m_stacktop->oldtop->map)
 863           m_stacktop->map[i] = m_stacktop->oldtop->map[i] ;
 864         else m_stacktop->map[i] = M_NULLVAL ;
 865       }
 866     /* Clear an old map if replacing it */
 867     else
 868       if (useoradd)
 869         for (i = 0 ; i < M_SREFCNT ; i++)
 870           m_stacktop->map[i] = M_NULLVAL ;
 871
 872     /* Offset into m_map is 2, 1 for 0-based indexing, 1 for #EMPTY code */
 873     for (sref = m_map[map - 2] ; sref ; sref = m_sref[sref - 1].next)
 874        m_stacktop->map[m_sref[sref - 1].sref - 1] = m_sref[sref - 1].entity ;
 875     }
 876
 877 /* Check for short reference delimiters */
 878 void m_shortref(int context)
 879 {
 880 int n = 0 ;
 881 int i ;
 882 int c ;
 883 LOGICAL linestart = m_atrs ;
 884 char mb_ee;
 885 M_WCHAR wc_ee;
 886
 887 mb_ee = M_EE;
 888 mbtowc(&wc_ee, &mb_ee, 1);
 889
 890 /* If no short references defined, don't try to match one */
 891 if (sizeof(m_sreftree)/sizeof(M_PTRIE) == 1) return ;
 892
 893 /* Can return if using MARKUP extensions and no map is active */
 894 if (! m_conform && ! m_stacktop->map) return ;
 895
 896 m_current[0] = 0 ;
 897 m_srefchartype[0] = M_RSCHAR ;
 898 while (TRUE)
 899     {
 900     /* Search through short reference delimiter tree */
 901     while (TRUE)
 902         {
 903         m_delim[n] = FALSE ;
 904
 905         /* Look for RS */
 906         if (linestart && m_srefchartype[n] >= M_RSCHAR)
 907             {
 908             for (i = m_current[n] ;
 909             m_sreftree[i].more && m_sreftree[i].symbol < RS ;
 910             i++) ;
 911             if (m_sreftree[i].symbol == RS)
 912                 {
 913                 m_nextdelimchar(&n, i, &linestart, FALSE, FALSE, M_RSCHAR) ;
 914                 continue ;
 915                 }
 916             }
 917
 918         /* Look for white space sequence */
 919         if (m_srefchartype[n] >= M_WSCHAR)
 920             {
 921             for (i = m_current[n] ;
 922             m_sreftree[i].more && m_sreftree[i].symbol < WSSEQ ;
 923             i++) ;
 924             if (m_sreftree[i].symbol == WSSEQ)
 925                 {
 926                 m_nextdelimchar(&n, i, &linestart, FALSE, TRUE, M_WSCHAR) ;
 927                 continue ;
 928                 }
 929             }
 930
 931         /* Look at next character from input stream */
 932         m_hold[n] = m_getachar(&m_dhold[n]) ;
 933         if (m_dhold[n] == wc_ee ||
 934         (m_dhold[n] != M_NORMAL && m_dhold[n] != M_ENTNORMAL))
 935             {
 936             m_srefchartype[n] = M_REGCHAR ;
 937             break ;
 938             }
 939
 940         /* Look for blank sequence */
 941         if (m_srefchartype[n] >= M_BSCHAR &&
 942         (m_hold[n] == ' ' || m_hold[n] == '\t'))
 943             {
 944             for (i = m_current[n] ;
 945             m_sreftree[i].more && m_sreftree[i].symbol < BLANKSEQ ;
 946             i++) ;
 947             if (m_sreftree[i].symbol == BLANKSEQ &&
 948             (m_hold[n] == ' ' || m_hold[n] == '\t'))
 949                 {
 950                 m_nextdelimchar(&n, i, &linestart, FALSE, TRUE, M_BSCHAR) ;
 951                 continue ;
 952                 }
 953             }
 954
 955         /* Look for regular character */
 956         c = m_ctupper(m_hold[n]) ;
 957         if (m_cttype(c) != M_NMSTART)
 958             {
 959             for (i = m_current[n] ;
 960                  m_sreftree[i].more && (int) m_sreftree[i].symbol < c ;
 961                  i++) ;
 962             if ((int) m_sreftree[i].symbol == c)
 963                 {
 964                 m_nextdelimchar(&n, i, &linestart, m_atrs, FALSE, M_REGCHAR) ;
 965                 continue ;
 966                 }
 967             }
 968
 969         m_srefchartype[n] = M_REGCHAR ;
 970         break ;
 971         } /* End search through sref delimiter tree */
 972
 973     while (TRUE)
 974         {
 975         if (m_delim[n])
 976             {
 977             /* Found a delimiter. If letters were allowed in short references
 978             would check here for runon situations such as <!ENTITYrunon ... */
 979             if (m_gendelim(n, context))
 980                 {
 981                 for (i = n ; i >= 0 ; i--)
 982                     if (m_srefchartype[i] < M_WSCHAR)
 983                         m_ungetachar(m_hold[i], m_dhold[i], TRUE) ;
 984                 return ;
 985                 }
 986             linestart = TRUE ;
 987             for (i = n ; i >= 0 ; i--)
 988             if (m_srefchartype[i] < M_WSCHAR)
 989                 {
 990                 linestart = FALSE ;
 991                 break ;
 992                 }
 993             if (linestart) m_atrs = FALSE ;
 994             if (m_stacktop->map && m_stacktop->map[m_delim[n] - 1])
 995                 {
 996                 m_entexpand(
 997                 &m_entities[m_stacktop->map[m_delim[n] - 1] - 1]) ;
 998                 return ;
 999                 }
1000             if (m_conform)
1001                 {
1002                 for (i = n ; i >= 0 ; i--)
1003                 if (m_srefchartype[i] < M_WSCHAR)
1004                 m_ungetachar(m_hold[i], M_CDCHAR, TRUE) ;
1005                 return ;
1006                 }
1007             }
1008         if (m_srefchartype[n] < M_WSCHAR)
1009         m_ungetachar(m_hold[n], m_dhold[n], TRUE) ;
1010         if (m_srefchartype[n] > M_REGCHAR)
1011             {
1012             m_srefchartype[n]-- ;
1013             break ;
1014             }
1015         n-- ;
1016         if (n < 0) return ;
1017         }
1018     }
1019 }
1020
1021 /* Test for significant record ends.  Ignore RE (\n) if
1022       1)  It is the first RE in the content and no data character
1023           or contextual end tag has occurred
1024       2)  Something has occurred on the line but not a data character
1025           or contextual end tag [linestat == M_SOMETHING]
1026       3)  If a record end might be the last one in an element, save it
1027 */
1028 void m_sigre(void)
1029   {
1030     /* Check for first RE in content and no preceding content */
1031     if (m_start &&
1032         (! m_stacktop->firstre && m_oldlinestat[m_oldlsindex] != M_DCORCET)) {
1033       m_stacktop->firstre = TRUE ;
1034       return ;
1035       }
1036     /* Check for line containing other than data characters or contextual
1037        subelements */
1038     if (m_start && m_oldlinestat[m_oldlsindex] == M_SOMETHING) return ;
1039     /* Save the RE to see what follows */
1040     m_holdproc() ;
1041     m_stacktop->holdre = TRUE ;
1042     return ;
1043     } /* End white space */
1044
1045 /* Returns a context-dependent delimiter string to input stream so
1046    characters can be reread one at a time in another context */
1047 void m_undodelim(M_WCHAR *delim, LOGICAL flag)
1048 {
1049     M_WCHAR *p ;
1050
1051     for (p = delim ; *p ; p++) ;
1052
1053     p-- ;
1054     while (TRUE) {
1055       m_ungetachar((int) *p, M_NORMAL, flag) ;
1056       if (p == delim) return ;
1057       p-- ;
1058       }
1059     }
1060
1061 /* Place a character on the current input stream.  The character may have
1062    been scanned and determined not to be part of the current token or it
1063    may be in the expansion of an entity*/
1064 void m_ungetachar(int c, M_HOLDTYPE dchar, LOGICAL preread)
1065 {
1066     char buffer[10] ;
1067     int length;
1068     char mb_ee;
1069     M_WCHAR wc_ee;
1070
1071     mb_ee = M_EE;
1072     mbtowc(&wc_ee, &mb_ee, 1);
1073     if (m_chtrace) {
1074       if (dchar) {
1075         m_trace("unget(") ;
1076         length = wctomb(buffer, c);
1077         buffer[length] = 0;
1078         m_trace(buffer) ;
1079         m_trace(")[") ;
1080         sprintf(buffer, "%d", c) ;
1081         m_trace(buffer) ;
1082         m_trace("],") ;
1083         sprintf(buffer, "%d", dchar) ;
1084         m_trace(buffer) ;
1085         m_trace("\n") ;
1086         }
1087       else m_trace("unget(EE)\n") ;
1088       }
1089     m_inctest(&m_toundo, M_SAVECHAR, "M_SAVECHAR") ;
1090     m_sourcefile[m_toundo - 1] = m_sysecnt ;
1091     m_savedchar[m_toundo - 1] = dchar ;
1092     m_savechar[m_toundo - 1] = dchar == wc_ee ? (int) m_atrs : c ;
1093     if (preread) {
1094       m_stacktop->linestat = m_oldlinestat[m_oldlsindex] ;
1095       m_atrs = m_oldatrs[m_oldlsindex] ;
1096       m_oldlsindex = (m_oldlsindex - 1 + M_SAVECHAR) % M_SAVECHAR ;
1097       }
1098     if (m_toundo > m_maxundo) m_maxundo = m_toundo ;
1099     if (c == M_RE)
1100       if (dchar == M_NORMAL) m_line[m_sysecnt]-- ;
1101     }
1102
1103 /* Have encountered an M_ERO.  If the entity reference is valid, process it*/
1104 LOGICAL m_vldentref(void)
1105   {
1106     M_HOLDTYPE dchar ;
1107     int next ;
1108     M_ENTITY *openent ;
1109     char mb_ee;
1110     M_WCHAR wc_ee;
1111
1112     mb_ee = M_EE;
1113     mbtowc(&wc_ee, &mb_ee, 1);
1114     next = m_getachar(&dchar) ;
1115     if (next != EOF && m_cttype(next) == M_NMSTART && dchar != wc_ee) {
1116       m_getname((M_WCHAR) next) ;
1117       if (! m_gettoken(&next, &dchar, ENTREF))
1118         if (next != M_RE) m_ungetachar(next, dchar, TRUE) ;
1119       if (openent = (M_ENTITY *) m_lookfortrie(m_name, m_enttrie))
1120         m_entexpand(openent) ;
1121       else m_err1("Reference to undefined entity '%s'", m_name) ;
1122       return(TRUE) ;
1123       }
1124     m_ungetachar(next, dchar, TRUE) ;
1125     return(FALSE) ;
1126     }
1127
1128 #if defined(sparse)
1129 #include "sparse.c"
1130 #endif
1131
1132