cde/programs/dthelp/parser/canon1/parser/scan.c

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these librararies and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: scan.c /main/3 1995/11/08 09:42:39 rswiston $ */
  24 /*
  25               Copyright 1986 Tandem Computers Incorporated.
  26 This product and information is proprietary of Tandem Computers Incorporated.
  27                    Copyright 1986, 1987, 1988, 1989 Hewlett-Packard Co.
  28 */
  29
  30 /* Scan.c is the scanner for program PARSER */
  31
  32 #include <string.h>
  33 #include <malloc.h>
  34 #include <stdio.h>
  35 #if defined(MSDOS)
  36 #include <process.h>
  37 #endif
  38 #include "basic.h"
  39 #include "trie.h"
  40 #define M_CONDEF
  41 #include "context.h"
  42 #define M_DELIMDEF
  43 #include "delim.h"
  44 #define M_DTDDEF
  45 #include "dtd.h"
  46 #include "arc.h"
  47 #define M_PARDEF
  48 #include "parser.h"
  49 #define M_ENTDEF
  50 #include "entity2.h"
  51 #include "sref.h"
  52
  53 /* Actually read a character from an input stream */
  54 int m_actgetc(M_NOPAR)
  55   {
  56     int c ;
  57
  58     c = m_getc(m_sysent[m_sysecnt]) ;
  59     m_saveline[m_svlncnt[m_sysecnt]][m_sysecnt] = c ;
  60     if (++m_svlncnt[m_sysecnt] >= M_LINELENGTH) {
  61       m_svlncnt[m_sysecnt] = 0 ;
  62       m_svlnwrap[m_sysecnt] = TRUE ;
  63       }
  64     return(c) ;
  65     }
  66
  67 /* Expand an entity reference */
  68 void m_entexpand(openent)
  69   M_ENTITY *openent ;
  70   {
  71     M_WCHAR *p ;
  72     M_HOLDTYPE dchar ;
  73     char buffer[10] ;
  74     int i ;
  75
  76     m_ungetachar(M_NULLVAL, M_EE, FALSE) ;
  77     m_eopencnt++ ;
  78     m_opene[m_eopencnt - 1] = openent ;
  79
  80     if (m_stacktop->element &&
  81         m_element[m_stacktop->element - 1].content == M_RCDATA)
  82       m_curcon = RCNEWENT ;
  83     if (m_curcon == LITCON || m_curcon == LITACON)
  84       m_curcon = ENTINLIT ;
  85     if (! openent->wheredef) {
  86       m_eopencnt-- ;
  87       m_err1("%s: System error -- no definition for predeclared entity",
  88              openent->name) ;
  89       m_eopencnt++ ;
  90       return ;
  91       }
  92     if (m_curcon == ENTINLIT)
  93       if (openent->type != M_GENERAL) {
  94         m_eopencnt-- ;
  95         m_err1("%s: Typed entity not allowed in parameter value",
  96                openent->name) ;
  97         m_eopencnt++ ;
  98         return ;
  99         }
 100     if (m_eopencnt > M_ENTLVL) {
 101       m_eopencnt-- ;
 102       m_err1("%s: Too many nested entities", openent->name) ;
 103       m_eopencnt++ ;
 104       return ;
 105       }
 106     for (i = 0 ; i < m_eopencnt - 1; i++)
 107       if (m_opene[i] == openent) {
 108         m_eopencnt-- ;
 109         m_err1("Recursive call to entity %s ignored", openent->name) ;
 110         m_eopencnt++ ;
 111         return ;
 112         }
 113
 114     /* If SDATA or PI entity (regular or CODE) at beginning of document
 115        instance, call m_startdoc and reset m_curcon past preamble */
 116     if (m_curcon == PREAMBLE &&
 117         (openent->type == M_SDATA ||
 118          openent->type == M_CODESDATA ||
 119          openent->type == M_PI ||
 120          openent->type == M_CODEPI)) {
 121       m_startdoc() ;
 122       m_curcon = START ;
 123       m_adjuststate() ;
 124       }
 125
 126     /* SDATA entity */
 127       if (openent->type == M_SDATA || openent->type == M_CODESDATA) {
 128         if (! m_stacktop->intext) {
 129           if (! m_strtproc(M_NULLVAL)) {
 130             if (m_stacktop->oldtop)
 131               m_err1("SDATA entity not allowed at this point in %s",
 132                      m_nameofelt(m_stacktop->element)) ;
 133             else if (! m_start)
 134               m_error("Document may not start with SDATA entity") ;
 135             }
 136           m_start = TRUE ;
 137           m_stacktop->firstre = TRUE ;
 138           m_stacktop->intext = TRUE ;
 139           if (m_curcon == ELCON || m_curcon == DATACON)
 140             m_curcon = POUNDCDATA ;
 141           else if (m_curcon == NETELCON || m_curcon == NETDATACON)
 142             m_curcon = NETCDATA ;
 143           }
 144         m_stacktop->linestat = M_DCORCET ;
 145         m_holdproc() ;
 146         }
 147
 148     /* CODE entity */
 149     if (openent->type == M_CODEPI || openent->type == M_CODESDATA) {
 150       if (openent->type == M_CODEPI)
 151         m_stacktop->linestat = M_SOMETHING ;
 152       m_codeent(openent->codeindex) ;
 153       return ;
 154       }
 155
 156     /* PI or SDATA, but not CODE entity */
 157     if (openent->type == M_PI || openent->type == M_SDATA) {
 158       m_piaction(openent->content, openent->name, openent->type) ;
 159       return ;
 160       }
 161
 162     /* Subordinate data file */
 163     if (openent->type == M_SYSTEM) {
 164       m_sysent[m_sysecnt + 1] = m_openent(openent->content) ;
 165       if (m_sysent[m_sysecnt + 1]) {
 166         m_sysecnt++ ;
 167         m_line[m_sysecnt] = 1 ;
 168         m_svlncnt[m_sysecnt] = 0 ;
 169         m_svlnwrap[m_sysecnt] = FALSE ;
 170         if (m_chtrace) {
 171           m_trace("Opening `") ;
 172           m_wctrace(openent->content) ;
 173           m_trace("'(") ;
 174           sprintf(buffer, "%d", m_sysecnt) ;
 175           m_trace(buffer) ;
 176           m_trace(")\n") ;
 177           }
 178         return ;
 179         }
 180       m_eopencnt-- ;
 181       m_err1("Unable to open file %s", openent->content) ;
 182       m_eopencnt++ ;
 183       return ;
 184       }
 185
 186     /* An entity reference has been encountered.  Put the content of the
 187        entity, including any leading or trailing delimiters into the input
 188        stream in reverse order */
 189     /* Closing delimiter */
 190     switch (openent->type) {
 191       case M_STARTTAG:
 192       case M_ENDTAG: {
 193         m_undodelim(m_dlmptr[M_TAGC - 1], FALSE) ;
 194         break ;
 195         }
 196       case M_MD: {
 197         m_undodelim(m_dlmptr[M_MDC - 1], FALSE) ;
 198         break ;
 199         }
 200       default:
 201         break ;
 202       }
 203     /* Content of entity -- scan for end to reverse string */
 204     if (openent->type == M_CDATAENT) dchar = M_CDCHAR ;
 205     else dchar = M_ENTNORMAL ;
 206     if (p = openent->content)
 207       while (*p) p++;
 208     if (p != openent->content) {
 209       p-- ;
 210       while (TRUE) {
 211         m_ungetachar((int) *p, dchar, FALSE) ;
 212         if (p == openent->content) break ;
 213         p-- ;
 214         }
 215       }
 216     /* Opening delimiter */
 217     switch (openent->type) {
 218       case M_STARTTAG: {
 219         m_undodelim(m_dlmptr[M_STAGO - 1], FALSE) ;
 220         break ;
 221         }
 222       case M_ENDTAG: {
 223         m_undodelim(m_dlmptr[M_ETAGO - 1], FALSE) ;
 224         break ;
 225         }
 226       case M_MD: {
 227         m_undodelim(m_dlmptr[M_MDO - 1], FALSE) ;
 228         break ;
 229         }
 230       default:
 231         break ;
 232       }
 233     } /* End m_entexpand */
 234
 235 /* An srlen-character long short-reference delimiter has been found.  Verify
 236    that it is not the prefix of a general delimiter recognized in context*/
 237 LOGICAL m_gendelim(srlen, context)
 238   int srlen ;
 239   int context ;
 240   {
 241     int ghold[MAXD + 1] ;
 242     int ucase ;
 243     int next ;
 244     int i, n = 0, current, delim[MAXD + 1], oldchars = 0 ;
 245     int newcharstart = 0 ;
 246     M_HOLDTYPE dhold[MAXD + 1], dchar ;
 247     LOGICAL linestart ;
 248     LOGICAL found ;
 249
 250     if (! (current = m_contree[context - 1])) return(FALSE) ;
 251     linestart = TRUE ;
 252     for (i = 0 ; i <= srlen ; i++)
 253       if (m_srefchartype[i] != M_RSCHAR && m_srefchartype[i] != M_WSCHAR) {
 254         linestart = FALSE ;
 255         break ;
 256       }
 257     if (linestart) return(FALSE) ;
 258
 259     current-- ;
 260     while (TRUE) {
 261       delim[n] = FALSE ;
 262       while (oldchars <= srlen &&
 263              (m_srefchartype[oldchars] == M_RSCHAR ||
 264               m_srefchartype[oldchars] == M_WSCHAR))
 265         oldchars++ ;
 266       if (oldchars <= srlen)
 267         ucase = m_hold[oldchars++] ;
 268       else {
 269         if (! newcharstart) newcharstart = n ;
 270         ghold[n] = m_getachar(&dhold[n]) ;
 271         ucase = m_ctupper(ghold[n]) ;
 272         if (dhold[n] != M_NORMAL && dhold[n] != M_ENTNORMAL) break ;
 273         }
 274       for (i = current ;
 275            (int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more ;
 276            i++) ;
 277       if ((int) m_delimtrie[i].symbol == ucase) {
 278         current = m_delimtrie[i].index ;
 279         if (! m_delimtrie[current].symbol)
 280           delim[n] = m_delimtrie[current].index ;
 281         n++ ;
 282         }
 283       else break ;
 284       }
 285
 286     if (! newcharstart) return(FALSE) ;
 287     while (n >= newcharstart - 1) {
 288       found = FALSE ;
 289       if (delim[n]) {
 290         /* Found a delimiter. If it ends with a letter, verify
 291            that the following character is not a letter, in order
 292            to issue error messages in cases such as <!ENTITYrunon ... */
 293         if (m_cttype(ghold[n]) != M_NMSTART) found = TRUE ;
 294         else {
 295           next = m_getachar(&dchar) ;
 296           m_ungetachar(next, dchar, TRUE) ;
 297           if (next == EOF || m_cttype(next) != M_NMSTART)
 298             found = TRUE ;
 299           }
 300         }
 301       if (found) {
 302         if (delim[n] == M_ERO || delim[n] == M_STAGO ||
 303             delim[n] == M_ETAGO) {
 304           next = m_getachar(&dchar) ;
 305           m_ungetachar(next, dchar, TRUE) ;
 306           if (! (m_cttype(next) == M_NMSTART &&
 307                  (dchar == M_NORMAL || dchar == M_ENTNORMAL))) {
 308             n-- ;
 309             continue ;
 310             }
 311           }
 312         while (n >= newcharstart) {
 313           m_ungetachar(ghold[n], dhold[n], TRUE) ;
 314           n-- ;
 315           }
 316         return(TRUE) ;
 317         } /* End if delim[n] */
 318       if (n >= newcharstart) m_ungetachar(ghold[n], dhold[n], TRUE) ;
 319       n-- ;
 320       }
 321
 322     return(FALSE) ;
 323     }
 324
 325 /* Reads next input character from the current source file or from an
 326    entity expansion */
 327 int m_getachar(dchar)
 328   M_HOLDTYPE *dchar ;
 329   {
 330     int c ;
 331     int i ;
 332     char buffer[10] ;
 333     int length;
 334     M_WCHAR wc_ee, wc_re;
 335     char    mb_ee, mb_re;
 336
 337     mb_ee = M_EE;
 338     mb_re = M_RE;
 339     mbtowc(&wc_ee, &mb_ee, 1);
 340     mbtowc(&wc_re, &mb_re, 1);
 341     if (m_toundo && m_sysecnt <= m_sourcefile[m_toundo - 1]) {
 342       c = m_savechar[--m_toundo] ;
 343       *dchar = m_savedchar[m_toundo] ;
 344       if (*dchar == wc_ee) m_atrs = (M_WCHAR) c;
 345       }
 346     else {
 347       c = m_actgetc() ;
 348       *dchar = M_NORMAL ;
 349       if (m_whitespace((M_WCHAR) c) && c != wc_re) {
 350         /* White space, but not RE, i.e., space or tab */
 351         for (m_wscount = 0 ; m_wscount < M_WSPACELEN ; m_wscount++) {
 352           m_wspace[m_wscount] = m_actgetc() ;
 353           if (! m_whitespace((M_WCHAR) m_wspace[m_wscount]) ||
 354               m_wspace[m_wscount] == wc_re)
 355             break ;
 356           }
 357         if (m_whitespace((M_WCHAR) m_wspace[m_wscount]) &&
 358             m_wspace[m_wscount] != wc_re) {
 359           m_error("Ignoring blank or tab") ;
 360           while (m_whitespace((M_WCHAR) m_wspace[m_wscount]) &&
 361                  m_wspace[m_wscount] != wc_re)
 362             m_wspace[m_wscount] = m_actgetc() ;
 363           }
 364         if (m_wscount > m_maxws) m_maxws = m_wscount ;
 365         if (m_wspace[m_wscount] == wc_re) c = wc_re ;
 366         else {
 367           for (i = 0 ; i <= m_wscount ; i++)
 368             m_ungetachar(m_wspace[m_wscount - i], M_NORMAL, FALSE) ;
 369           }
 370         } /* End just read a blank or tab, is it line-trailing? */
 371       } /* End read a character from file */
 372
 373     m_oldlsindex = (m_oldlsindex + 1) % M_SAVECHAR ;
 374     m_oldlinestat[m_oldlsindex] = m_stacktop->linestat ;
 375     m_oldatrs[m_oldlsindex] = m_atrs ;
 376     if (c == wc_re && *dchar) {
 377       if (*dchar == M_NORMAL) m_line[m_sysecnt]++ ;
 378       m_stacktop->linestat = M_NOTHING ;
 379       m_atrs = TRUE ;
 380       }
 381     else if (*dchar) m_atrs = FALSE ;
 382     if (m_chtrace) {
 383       if (*dchar) {
 384         m_trace("get(") ;
 385         length = wctomb(buffer, c);
 386         buffer[length] = 0;
 387         m_trace(buffer) ;
 388         m_trace(")[") ;
 389         sprintf(buffer, "%d", c) ;
 390         m_trace(buffer) ;
 391         m_trace("],") ;
 392         sprintf(buffer, "%d", *dchar) ;
 393         m_trace(buffer) ;
 394         m_trace("\n") ;
 395         }
 396       else m_trace("get(EE)\n") ;
 397       }
 398     return(c) ;
 399     }
 400
 401 /* Reads a name token */
 402 #if defined(M_PROTO)
 403 void m_getname(M_WCHAR first)
 404 #else
 405 void m_getname(first)
 406   M_WCHAR first ;
 407 #endif
 408 {
 409     M_WCHAR *p ;
 410     M_HOLDTYPE dchar ;
 411     int c ;
 412
 413     *(p = m_name) = first ;
 414     while (TRUE) {
 415       c = m_getachar(&dchar) ;
 416       if (c == EOF) break ;
 417       if (dchar != M_NORMAL && dchar != M_ENTNORMAL) break ;
 418       if (m_cttype(c) == M_NONNAME) break ;
 419       *++p = (M_WCHAR) c ;
 420       if (p >= m_name + M_NAMELEN) {
 421         p-- ;
 422         m_error("Name too long") ;
 423         while ((dchar == M_NORMAL || dchar == M_ENTNORMAL) &&
 424                c != EOF &&
 425                m_cttype(c) != M_NONNAME)
 426           c = m_getachar(&dchar) ;
 427         break ;
 428         }
 429       }
 430     m_ungetachar(c, dchar, TRUE) ;
 431     *++p = M_EOS ;
 432     }
 433
 434 /* Reads the next token */
 435 int m_gettoken(c, dchar, context)
 436   int *c ;
 437   M_HOLDTYPE *dchar ;
 438   int context ;
 439   {
 440     int hold[MAXD + 1], next ;
 441     int ucase ;
 442     int i, n = 0, current, delim[MAXD + 1], nexttoken ;
 443     M_HOLDTYPE dhold[MAXD + 1] ;
 444     LOGICAL found ;
 445
 446     switch (context) {
 447       case DATACON:
 448       case NETDATACON:
 449       case POUNDCDATA:
 450       case NETCDATA:
 451       case ELCON:
 452       case NETELCON:
 453         if (m_stacktop->oldtop) m_shortref(context) ;
 454         break ;
 455       default:
 456         break ;
 457       }
 458     if (! (current = m_contree[context - 1])) {
 459       *c = m_getachar(dchar) ;
 460       return(M_NULLVAL) ;
 461       }
 462     current-- ;
 463     while (TRUE) {
 464       hold[n] = m_getachar(&dhold[n]) ;
 465       ucase = m_ctupper(hold[n]) ;
 466       delim[n] = FALSE ;
 467       if (dhold[n] != M_NORMAL && dhold[n] != M_ENTNORMAL) break ;
 468       for (i = current ;
 469            (int) m_delimtrie[i].symbol < ucase && m_delimtrie[i].more ;
 470            i++) ;
 471       if ((int) m_delimtrie[i].symbol == ucase) {
 472         current = m_delimtrie[i].index ;
 473         if (! m_delimtrie[current].symbol)
 474           delim[n] = m_delimtrie[current].index ;
 475         n++ ;
 476         }
 477       else break ;
 478       }
 479
 480     while (n >= 0) {
 481       found = FALSE ;
 482       if (delim[n]) {
 483         /* Found a delimiter. If it ends with a letter, verify
 484            that the following character is not a letter, in order
 485            to issue error messages in cases such as <!ENTITYrunon ... */
 486         if (m_cttype(hold[n]) != M_NMSTART) found = TRUE ;
 487         else {
 488           *c = m_getachar(dchar) ;
 489           m_ungetachar(*c, *dchar, TRUE) ;
 490           if (*c == EOF || m_cttype(*c) != M_NMSTART) found = TRUE ;
 491           }
 492         }
 493       if (found) {
 494         if (delim[n] == M_CRO) {
 495           next = m_getachar(dchar) ;
 496           if ((*dchar != M_NORMAL && *dchar != M_ENTNORMAL) ||
 497               (m_cttype(next) != M_DIGIT))
 498             m_ungetachar(next, *dchar, TRUE) ;
 499           else {
 500             m_scanval = next - '0' ;
 501             while (TRUE) {
 502               next = m_getachar(dchar) ;
 503               if ((*dchar != M_NORMAL && *dchar != M_ENTNORMAL) ||
 504                   (m_cttype(next) != M_DIGIT)) {
 505                 m_ungetachar(next, *dchar, TRUE) ;
 506                 if (! m_gettoken(&next, dchar, ENTREF))
 507                   m_ungetachar(next, *dchar, TRUE) ;
 508                 if (context == ELCON || context == NETELCON)
 509                   return(M_BLACKSPACE) ;
 510                 else return(M_TEXT) ;
 511                 }
 512               m_scanval = 10 * m_scanval + next - '0' ;
 513               if (m_scanval >= M_CHARSETLEN) {
 514                 m_error("Invalid character code") ;
 515                 m_scanval = (m_scanval - next + '0') / 10 ;
 516                 m_ungetachar(next, *dchar, TRUE) ;
 517                 if (context == ELCON || context == NETELCON)
 518                   return(M_BLACKSPACE) ;
 519                 else return(M_TEXT) ;
 520                 }
 521               } /* End loop reading digits after M_CRO */
 522             } /* End M_CRO followed by digit */
 523           } /* End delim[n] == M_CRO */
 524         else if (delim[n] == M_ERO)
 525           if (m_vldentref())
 526             return(m_gettoken(c, dchar,
 527                             (m_curcon == RCNEWENT || m_curcon == ENTINLIT) ?
 528                               m_curcon : context)) ;
 529         /* Can be an M_ERO or M_CRO here only if not in context and hence
 530            should not be treated as a delimiter */
 531         if (delim[n] != M_STAGO && delim[n] != M_ETAGO &&
 532             delim[n] != M_ERO && delim[n] != M_CRO)
 533           return(delim[n]) ;
 534         /* M_STAGO and M_ETAGO recognized only if immediately followed by
 535            a M_NMSTART character or by an appropriate closing delimiter
 536            (latter is a short tag) */
 537         if (delim[n] == M_STAGO || delim[n] == M_ETAGO) {
 538           next = m_getachar(dchar) ;
 539           m_ungetachar(next, *dchar, TRUE) ;
 540           if (m_cttype(next) == M_NMSTART &&
 541               (*dchar == M_NORMAL || *dchar == M_ENTNORMAL))
 542             return(delim[n]) ;
 543           nexttoken = m_gettoken(&next, dchar,
 544             delim[n] == M_STAGO ? SELEMENT : EELEMENT) ;
 545           if (nexttoken) {
 546             m_undodelim(m_dlmptr[nexttoken - 1], TRUE) ;
 547             return(delim[n]) ;
 548             }
 549           else m_ungetachar(next, *dchar, TRUE) ;
 550           } /* End delim[n] is M_STAGO or M_ETAGO */
 551         } /* End if (delim[n]) */
 552       if (n) m_ungetachar(hold[n], dhold[n], TRUE) ;
 553       n-- ;
 554       }
 555
 556     *c = *hold ;
 557     *dchar = *dhold ;
 558     return(M_NULLVAL) ;
 559     }
 560
 561 /* Reads a literal */
 562 void m_litproc(delim)
 563   int delim ;
 564   {
 565     int n, i ;
 566     M_HOLDTYPE dchar ;
 567     int savecon = m_curcon ;
 568     int c ;
 569     int atentlev ;
 570     int atdelimcon ;
 571     char mb_re, mb_tab, mb_space, mb_null, mb_ee;
 572     M_WCHAR wc_re, wc_tab, wc_space, wc_null, wc_ee;
 573
 574     mb_re = M_RE;
 575     mb_tab = M_TAB;
 576     mb_space = M_SPACE;
 577     mb_null = M_NULLVAL;
 578     mb_ee = M_EE;
 579     mbtowc(&wc_re, &mb_re, 1);
 580     mbtowc(&wc_tab, &mb_tab, 1);
 581     mbtowc(&wc_space, &mb_space, 1);
 582     mbtowc(&wc_null, &mb_null, 1);
 583     mbtowc(&wc_ee, &mb_ee, 1);
 584
 585     m_curcon = delim == M_LIT ? LITCON : LITACON ;
 586     atentlev = m_eopencnt ;
 587     atdelimcon = m_curcon ;
 588     for (i = 0 ; i < M_LITLEN + 1 ; i++) {
 589       n = m_gettoken(&c, &dchar, m_curcon) ;
 590       switch (n) {
 591         case M_ENDFILE:
 592           m_ungetachar(c, dchar, TRUE) ;
 593           m_literal[i] = wc_null ;
 594           m_curcon = savecon ;
 595           return ;
 596         case M_TEXT:
 597           m_literal[i] = (M_WCHAR) m_scanval ;
 598           break ;
 599         case M_LIT:
 600         case M_LITA:
 601           m_literal[i] = wc_null ;
 602           m_curcon = savecon ;
 603           return ;
 604         case M_LITRS:
 605         case M_LITSCR:
 606           break ;
 607         case M_LITRE:
 608         case M_LITECR:
 609           m_literal[i] = wc_re ;
 610           break ;
 611         case M_LITSPACE:
 612         case M_LITCSPACE:
 613           m_literal[i] = wc_space ;
 614           break ;
 615         case M_LITTAB:
 616         case M_LITCTAB:
 617           m_literal[i] = wc_tab ;
 618           break ;
 619         case M_NULLVAL:
 620           m_literal[i] = (M_WCHAR) c ;
 621           if (dchar == wc_ee) {
 622             if (m_curcon == ENTINLIT) {
 623               m_eopencnt-- ;
 624               i-- ;
 625               if (m_eopencnt == atentlev) {
 626                 m_curcon = atdelimcon ;
 627                 break ;
 628                 }
 629               }
 630             else {
 631               m_literal[i] = wc_null ;
 632               m_curcon = savecon ;
 633               m_ungetachar(wc_null, wc_ee, FALSE) ;
 634               return ;
 635               }
 636             }
 637           break ;
 638         default:
 639           m_error("Internal error processing literal") ;
 640           break ;
 641         }
 642       } /* End for i */
 643     m_error("Literal too long") ;
 644     m_literal[i] = wc_null ;
 645     m_curcon = savecon ;
 646     }
 647
 648 /* Called when a missing tagc delimiter is detected */
 649 #if defined(M_PROTO)
 650 void m_missingtagc(int c, M_HOLDTYPE dchar, LOGICAL start)
 651 #else
 652 void m_missingtagc(c, dchar, start)
 653   int c ;
 654   M_HOLDTYPE dchar ;
 655   LOGICAL start ;
 656 #endif
 657 {
 658     if (! m_wholetag) {
 659       if (start) m_mberr1("Invalid parameter or missing %s", m_tagc);
 660       else m_mberr1("Missing %s in end-tag", m_tagc) ;
 661       }
 662     m_ungetachar(c, dchar, TRUE) ;
 663     m_curcon = START ;
 664     m_adjuststate() ;
 665     }
 666
 667 /* Have found one character in a possible short reference delimiter.
 668    Prepare to look for the next one */
 669 #if defined(M_PROTO)
 670 void m_nextdelimchar(int *n, int i, LOGICAL *linestart, LOGICAL newlinestart,
 671                      LOGICAL skipblank, unsigned char type)
 672 #else
 673 void m_nextdelimchar(n, i, linestart, newlinestart, skipblank, type)
 674   int *n ;
 675   int i ;
 676   LOGICAL *linestart ;
 677   LOGICAL newlinestart ;
 678   LOGICAL skipblank ;
 679   unsigned char type ;
 680 #endif
 681 {
 682     int k ;
 683     char mb_re,mb_seqchar, mb_rschar;
 684     M_WCHAR wc_re,wc_seqchar, wc_rschar;
 685
 686     mb_re = M_RE;
 687     mbtowc(&wc_re, &mb_re, 1);
 688     mb_seqchar = M_SEQCHAR;
 689     mbtowc(&wc_seqchar, &mb_seqchar, 1);
 690     mb_rschar = M_RSCHAR;
 691     mbtowc(&wc_rschar, &mb_rschar, 1);
 692     m_current[*n + 1] = m_sreftree[i].index ;
 693     if (! m_sreftree[m_current[*n + 1]].symbol)
 694       m_delim[*n] = m_sreftree[m_current[*n + 1]].index ;
 695     *linestart = newlinestart ;
 696     m_srefchartype[*n] = type ;
 697     if (skipblank) {
 698       for (k = 0 ; k < M_BSEQLEN ; k++) {
 699         m_hold[*n + 1 + k] = m_getachar(&m_dhold[*n + 1 + k]) ;
 700         if (m_hold[*n + 1 + k] != ' ' && m_hold[*n + 1 + k] != '\t') {
 701           m_ungetachar(m_hold[*n + 1 + k], m_dhold[*n + 1 + k], TRUE) ;
 702           break ;
 703           }
 704         m_current[*n + 1 + k + 1] = m_current[*n + 1] ;
 705         m_delim[*n + 1 + k] = m_delim[*n] ;
 706         m_srefchartype[*n + 1 + k] = wc_seqchar ;
 707         }
 708       *n += k + 1 ;
 709       }
 710     else (*n)++ ;
 711     m_srefchartype[*n] = wc_rschar ;
 712     }
 713
 714 /* Scans past a comment within a markup declaration */
 715 void m_readcomments(M_NOPAR)
 716   {
 717     int c ;
 718     M_HOLDTYPE dchar ;
 719
 720     while (! m_gettoken(&c, &dchar, COMCON))
 721       if (c == EOF) {
 722         m_error("Document ended within a comment") ;
 723         m_done() ;
 724         }
 725     }
 726
 727 /* Scanner */
 728 #if defined(M_PROTO)
 729 int m_scan(LOGICAL prolog)
 730 #else
 731 int m_scan(prolog)
 732   LOGICAL prolog ;
 733 #endif
 734 {
 735 int c ;
 736 M_HOLDTYPE dchar ;
 737 int n ;
 738 char buffer[10] ;
 739 char mb_ee, mb_re, mb_space, mb_tab;
 740 M_WCHAR wc_ee, wc_re, wc_space, wc_tab;
 741
 742 mb_ee = M_EE;
 743 mbtowc(&wc_ee, &mb_ee, 1);
 744 mb_re = M_RE;
 745 mbtowc(&wc_re, &mb_re, 1);
 746 mb_space = M_SPACE;
 747 mbtowc(&wc_space, &mb_space, 1);
 748 mb_tab = M_TAB;
 749 mbtowc(&wc_tab, &mb_tab, 1);
 750 while (TRUE)
 751     {
 752     n = m_gettoken(&c, &dchar, m_curcon) ;
 753     if (n)
 754         {
 755         if (n != M_ENTITYEND && m_stacktop->linestat == M_NOTHING)
 756         m_stacktop->linestat = M_SOMETHING ;
 757         switch (n)
 758             {
 759             case M_LITRS:
 760             case M_LITSCR:
 761                 m_atrs = TRUE ;
 762                 continue ;
 763             case M_LITRE:
 764             case M_LITECR:
 765                 m_ungetachar(wc_re, M_ENTNORMAL, FALSE) ;
 766                 continue ;
 767             case M_LITSPACE:
 768             case M_LITCSPACE:
 769                 m_ungetachar(wc_space, M_ENTNORMAL, FALSE) ;
 770                 continue ;
 771             case M_LITTAB:
 772             case M_LITCTAB:
 773                 m_ungetachar(wc_tab, M_ENTNORMAL, FALSE) ;
 774                 continue ;
 775             case M_LIT:
 776             case M_LITA:
 777                 m_litproc(n) ;
 778                 return(M_LITERAL) ;
 779             default:
 780                 return(n) ;
 781             }
 782         }
 783     /* Check for Entity End */
 784     if (dchar == wc_ee)
 785         {
 786         m_eopencnt-- ;
 787         if (m_stacktop->element &&
 788             m_element[m_stacktop->element - 1].content == M_RCDATA)
 789             {
 790             if (m_eopencnt == m_stacktop->thisent)
 791                 {
 792                 if (m_netlevel) m_curcon = NETRCDATA ;
 793                 else m_curcon = RCDATAEL;
 794                 }
 795             else if (m_eopencnt < m_stacktop->thisent)
 796                 m_stacktop->thisent = m_eopencnt ;
 797             }
 798         if (m_newcon(m_curcon - 1, M_ENTITYEND - 1)) return(M_ENTITYEND) ;
 799         continue ;
 800         }
 801     /* Whitespace character--check if could be data.  If so,
 802     if it's a RE, check if its significant */
 803     if (m_whitespace((M_WCHAR) c))
 804         {
 805         if (! m_newcon(m_curcon - 1, M_TEXT - 1)) continue ;
 806         if (c != wc_re || m_curcon == PROCINT || m_curcon == LITCON ||
 807                m_curcon == LITENT || m_curcon == LITAENT)
 808             {
 809             m_scanval = c ;
 810             return(M_TEXT) ;
 811             }
 812         m_sigre() ;
 813         continue ;
 814         }
 815     if (c == EOF)
 816         {
 817         if (m_sysecnt && !(prolog && (m_sysecnt == 1)))
 818             {
 819             m_closent(m_sysent[m_sysecnt--]) ;
 820             if (m_chtrace)
 821                 {
 822                 m_trace("Closing to level ") ;
 823                 sprintf(buffer, "%d", m_sysecnt) ;
 824                 m_trace(buffer) ;
 825                 m_trace("\n") ;
 826                 }
 827             continue ;
 828             }
 829         return(M_ENDFILE) ;
 830         }
 831     if (((m_curcon == SELEMENT ||
 832           m_curcon == EELEMENT ||
 833           m_curcon == ENTNAME  ||
 834           m_curcon == MAPNAME  ||
 835           m_curcon == AMAPNAME)   &&
 836          m_cttype(c) == M_NMSTART) ||
 837           ((m_curcon == ATTNAME    ||
 838           m_curcon == ATTVAL       ||
 839           m_curcon == NEEDVI) &&
 840           m_cttype(c) != M_NONNAME))
 841         {
 842         m_getname((M_WCHAR) c) ;
 843         return(M_NAME) ;
 844         }
 845     switch (m_curcon)
 846         {
 847         case ATTVAL:
 848             m_err1("Expecting value for %s",
 849                    &m_pname[m_parameter[m_ppsave - 1].paramname]) ;
 850             m_stcomplete() ;
 851             m_missingtagc(c, dchar, TRUE) ;
 852             continue ;
 853         case ATTNAME:
 854             m_stcomplete() ;
 855             m_missingtagc(c, dchar, TRUE) ;
 856             continue ;
 857         case NEEDVI:
 858             m_attvonly(m_saveatt) ;
 859             m_stcomplete() ;
 860             m_missingtagc(c, dchar, TRUE) ;
 861             continue ;
 862         case ETAGEND:
 863             if (! m_stacktop->oldtop)
 864                 m_scanel = m_arc[m_state[0].first - 1].label ;
 865             else m_scanel = m_stacktop->element ;
 866             m_stacktop->holdre = FALSE ;
 867             m_etcomplete() ;
 868             m_missingtagc(c, dchar, FALSE) ;
 869             continue ;
 870         default:
 871             break ;
 872         }
 873     m_scanval = c ;
 874     if (! m_newcon(m_curcon - 1, M_TEXT - 1)) return(M_BLACKSPACE) ;
 875         return(M_TEXT) ;
 876     } /* End while */
 877 } /* End scan */
 878
 879
 880 /* Process explicit or implied USEMAP or ADDMAP */
 881 #if defined(M_PROTO)
 882 void m_setmap(int map, LOGICAL useoradd)
 883 #else
 884 void m_setmap(map, useoradd)
 885   int map ;
 886   LOGICAL useoradd ;
 887 #endif
 888 {
 889     int i ;
 890     int sref ;
 891
 892     if (! m_stacktop->oldtop) {
 893       m_error("Program error: attempt to set map for empty stack") ;
 894       m_exit(TRUE) ;
 895       }
 896
 897     /* #EMPTY map*/
 898     if (map == 1) {
 899       if (m_stacktop->map && m_stacktop->oldtop->map != m_stacktop->map)
 900         m_free(m_stacktop->map, "short reference map") ;
 901       /* Done, if USEMAP */
 902       if (useoradd) {
 903         m_stacktop->map = NULL ;
 904         return ;
 905         }
 906       /* <!ADDMAP #EMPTY> restores map from beginning of element */
 907       m_stacktop->map = m_stacktop->oldtop->map ;
 908       if (m_element[m_stacktop->element - 1].srefptr)
 909         m_setmap(m_element[m_stacktop->element - 1].srefptr,
 910                  (LOGICAL) m_element[m_stacktop->element - 1].useoradd) ;
 911       return ;
 912       }
 913
 914     /* Allocate and initialize a new map if needed */
 915     if (! m_stacktop->map || m_stacktop->map == m_stacktop->oldtop->map) {
 916       m_stacktop->map =
 917         (int *) m_malloc(sizeof(int) * M_SREFCNT, "short reference map") ;
 918       for (i = 0 ; i < M_SREFCNT ; i++)
 919         if (! useoradd && m_stacktop->oldtop->map)
 920           m_stacktop->map[i] = m_stacktop->oldtop->map[i] ;
 921         else m_stacktop->map[i] = M_NULLVAL ;
 922       }
 923     /* Clear an old map if replacing it */
 924     else
 925       if (useoradd)
 926         for (i = 0 ; i < M_SREFCNT ; i++)
 927           m_stacktop->map[i] = M_NULLVAL ;
 928
 929     /* Offset into m_map is 2, 1 for 0-based indexing, 1 for #EMPTY code */
 930     for (sref = m_map[map - 2] ; sref ; sref = m_sref[sref - 1].next)
 931        m_stacktop->map[m_sref[sref - 1].sref - 1] = m_sref[sref - 1].entity ;
 932     }
 933
 934 /* Check for short reference delimiters */
 935 void m_shortref(context)
 936 int context ;
 937 {
 938 int n = 0 ;
 939 int i ;
 940 int c ;
 941 LOGICAL linestart = m_atrs ;
 942 char mb_ee;
 943 M_WCHAR wc_ee;
 944
 945 mb_ee = M_EE;
 946 mbtowc(&wc_ee, &mb_ee, 1);
 947
 948 /* If no short references defined, don't try to match one */
 949 if (sizeof(m_sreftree)/sizeof(M_PTRIE) == 1) return ;
 950
 951 /* Can return if using MARKUP extensions and no map is active */
 952 if (! m_conform && ! m_stacktop->map) return ;
 953
 954 m_current[0] = 0 ;
 955 m_srefchartype[0] = M_RSCHAR ;
 956 while (TRUE)
 957     {
 958     /* Search through short reference delimiter tree */
 959     while (TRUE)
 960         {
 961         m_delim[n] = FALSE ;
 962
 963         /* Look for RS */
 964         if (linestart && m_srefchartype[n] >= M_RSCHAR)
 965             {
 966             for (i = m_current[n] ;
 967             m_sreftree[i].more && m_sreftree[i].symbol < RS ;
 968             i++) ;
 969             if (m_sreftree[i].symbol == RS)
 970                 {
 971                 m_nextdelimchar(&n, i, &linestart, FALSE, FALSE, M_RSCHAR) ;
 972                 continue ;
 973                 }
 974             }
 975
 976         /* Look for white space sequence */
 977         if (m_srefchartype[n] >= M_WSCHAR)
 978             {
 979             for (i = m_current[n] ;
 980             m_sreftree[i].more && m_sreftree[i].symbol < WSSEQ ;
 981             i++) ;
 982             if (m_sreftree[i].symbol == WSSEQ)
 983                 {
 984                 m_nextdelimchar(&n, i, &linestart, FALSE, TRUE, M_WSCHAR) ;
 985                 continue ;
 986                 }
 987             }
 988
 989         /* Look at next character from input stream */
 990         m_hold[n] = m_getachar(&m_dhold[n]) ;
 991         if (m_dhold[n] == wc_ee ||
 992         (m_dhold[n] != M_NORMAL && m_dhold[n] != M_ENTNORMAL))
 993             {
 994             m_srefchartype[n] = M_REGCHAR ;
 995             break ;
 996             }
 997
 998         /* Look for blank sequence */
 999         if (m_srefchartype[n] >= M_BSCHAR &&
1000         (m_hold[n] == ' ' || m_hold[n] == '\t'))
1001             {
1002             for (i = m_current[n] ;
1003             m_sreftree[i].more && m_sreftree[i].symbol < BLANKSEQ ;
1004             i++) ;
1005             if (m_sreftree[i].symbol == BLANKSEQ &&
1006             (m_hold[n] == ' ' || m_hold[n] == '\t'))
1007                 {
1008                 m_nextdelimchar(&n, i, &linestart, FALSE, TRUE, M_BSCHAR) ;
1009                 continue ;
1010                 }
1011             }
1012
1013         /* Look for regular character */
1014         c = m_ctupper(m_hold[n]) ;
1015         if (m_cttype(c) != M_NMSTART)
1016             {
1017             for (i = m_current[n] ;
1018                  m_sreftree[i].more && (int) m_sreftree[i].symbol < c ;
1019                  i++) ;
1020             if ((int) m_sreftree[i].symbol == c)
1021                 {
1022                 m_nextdelimchar(&n, i, &linestart, m_atrs, FALSE, M_REGCHAR) ;
1023                 continue ;
1024                 }
1025             }
1026
1027         m_srefchartype[n] = M_REGCHAR ;
1028         break ;
1029         } /* End search through sref delimiter tree */
1030
1031     while (TRUE)
1032         {
1033         if (m_delim[n])
1034             {
1035             /* Found a delimiter. If letters were allowed in short references
1036             would check here for runon situations such as <!ENTITYrunon ... */
1037             if (m_gendelim(n, context))
1038                 {
1039                 for (i = n ; i >= 0 ; i--)
1040                     if (m_srefchartype[i] < M_WSCHAR)
1041                         m_ungetachar(m_hold[i], m_dhold[i], TRUE) ;
1042                 return ;
1043                 }
1044             linestart = TRUE ;
1045             for (i = n ; i >= 0 ; i--)
1046             if (m_srefchartype[i] < M_WSCHAR)
1047                 {
1048                 linestart = FALSE ;
1049                 break ;
1050                 }
1051             if (linestart) m_atrs = FALSE ;
1052             if (m_stacktop->map && m_stacktop->map[m_delim[n] - 1])
1053                 {
1054                 m_entexpand(
1055                 &m_entities[m_stacktop->map[m_delim[n] - 1] - 1]) ;
1056                 return ;
1057                 }
1058             if (m_conform)
1059                 {
1060                 for (i = n ; i >= 0 ; i--)
1061                 if (m_srefchartype[i] < M_WSCHAR)
1062                 m_ungetachar(m_hold[i], M_CDCHAR, TRUE) ;
1063                 return ;
1064                 }
1065             }
1066         if (m_srefchartype[n] < M_WSCHAR)
1067         m_ungetachar(m_hold[n], m_dhold[n], TRUE) ;
1068         if (m_srefchartype[n] > M_REGCHAR)
1069             {
1070             m_srefchartype[n]-- ;
1071             break ;
1072             }
1073         n-- ;
1074         if (n < 0) return ;
1075         }
1076     }
1077 }
1078
1079 /* Test for significant record ends.  Ignore RE (\n) if
1080       1)  It is the first RE in the content and no data character
1081           or contextual end tag has occurred
1082       2)  Something has occurred on the line but not a data character
1083           or contextual end tag [linestat == M_SOMETHING]
1084       3)  If a record end might be the last one in an element, save it
1085 */
1086 void m_sigre(M_NOPAR)
1087   {
1088     /* Check for first RE in content and no preceding content */
1089     if (m_start &&
1090         (! m_stacktop->firstre && m_oldlinestat[m_oldlsindex] != M_DCORCET)) {
1091       m_stacktop->firstre = TRUE ;
1092       return ;
1093       }
1094     /* Check for line containing other than data characters or contextual
1095        subelements */
1096     if (m_start && m_oldlinestat[m_oldlsindex] == M_SOMETHING) return ;
1097     /* Save the RE to see what follows */
1098     m_holdproc() ;
1099     m_stacktop->holdre = TRUE ;
1100     return ;
1101     } /* End white space */
1102
1103 /* Returns a context-dependent delimiter string to input stream so
1104    characters can be reread one at a time in another context */
1105 #if defined(M_PROTO)
1106 void m_undodelim(M_WCHAR *delim, LOGICAL flag)
1107 #else
1108 void m_undodelim(delim, flag)
1109   M_WCHAR *delim ;
1110   LOGICAL flag ;
1111 #endif
1112 {
1113     M_WCHAR *p ;
1114
1115     for (p = delim ; *p ; p++) ;
1116
1117     p-- ;
1118     while (TRUE) {
1119       m_ungetachar((int) *p, M_NORMAL, flag) ;
1120       if (p == delim) return ;
1121       p-- ;
1122       }
1123     }
1124
1125 /* Place a character on the current input stream.  The character may have
1126    been scanned and determined not to be part of the current token or it
1127    may be in the expansion of an entity*/
1128 #if defined(M_PROTO)
1129 void m_ungetachar(int c, M_HOLDTYPE dchar, LOGICAL preread)
1130 #else
1131 void m_ungetachar(c, dchar, preread)
1132   int c ;
1133   M_HOLDTYPE dchar ;
1134   LOGICAL preread ;
1135 #endif
1136 {
1137     char buffer[10] ;
1138     int length;
1139     char mb_ee;
1140     M_WCHAR wc_ee;
1141
1142     mb_ee = M_EE;
1143     mbtowc(&wc_ee, &mb_ee, 1);
1144     if (m_chtrace) {
1145       if (dchar) {
1146         m_trace("unget(") ;
1147         length = wctomb(buffer, c);
1148         buffer[length] = 0;
1149         m_trace(buffer) ;
1150         m_trace(")[") ;
1151         sprintf(buffer, "%d", c) ;
1152         m_trace(buffer) ;
1153         m_trace("],") ;
1154         sprintf(buffer, "%d", dchar) ;
1155         m_trace(buffer) ;
1156         m_trace("\n") ;
1157         }
1158       else m_trace("unget(EE)\n") ;
1159       }
1160     m_inctest(&m_toundo, M_SAVECHAR, "M_SAVECHAR") ;
1161     m_sourcefile[m_toundo - 1] = m_sysecnt ;
1162     m_savedchar[m_toundo - 1] = dchar ;
1163     m_savechar[m_toundo - 1] = dchar == wc_ee ? (int) m_atrs : c ;
1164     if (preread) {
1165       m_stacktop->linestat = m_oldlinestat[m_oldlsindex] ;
1166       m_atrs = m_oldatrs[m_oldlsindex] ;
1167       m_oldlsindex = (m_oldlsindex - 1 + M_SAVECHAR) % M_SAVECHAR ;
1168       }
1169     if (m_toundo > m_maxundo) m_maxundo = m_toundo ;
1170     if (c == M_RE)
1171       if (dchar == M_NORMAL) m_line[m_sysecnt]-- ;
1172     }
1173
1174 /* Have encountered an M_ERO.  If the entity reference is valid, process it*/
1175 LOGICAL m_vldentref(M_NOPAR)
1176   {
1177     M_HOLDTYPE dchar ;
1178     int next ;
1179     M_ENTITY *openent ;
1180     char mb_ee;
1181     M_WCHAR wc_ee;
1182
1183     mb_ee = M_EE;
1184     mbtowc(&wc_ee, &mb_ee, 1);
1185     next = m_getachar(&dchar) ;
1186     if (next != EOF && m_cttype(next) == M_NMSTART && dchar != wc_ee) {
1187       m_getname((M_WCHAR) next) ;
1188       if (! m_gettoken(&next, &dchar, ENTREF))
1189         if (next != M_RE) m_ungetachar(next, dchar, TRUE) ;
1190       if (openent = (M_ENTITY *) m_lookfortrie(m_name, m_enttrie))
1191         m_entexpand(openent) ;
1192       else m_err1("Reference to undefined entity '%s'", m_name) ;
1193       return(TRUE) ;
1194       }
1195     m_ungetachar(next, dchar, TRUE) ;
1196     return(FALSE) ;
1197     }
1198
1199 #if defined(sparse)
1200 #include "sparse.c"
1201 #endif
1202
1203