cde/programs/dtdocbook/sgmls/pars2.c

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these librararies and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: pars2.c /main/3 1996/06/19 17:16:36 drk $ */
  24 #include "sgmlincl.h"         /* #INCLUDE statements for SGML parser. */
  25 /* PARSE: Parse a source input stream with specified lexical and state tables.
  26           Return to caller with action code.
  27 */
  28 int parse(pcb)
  29 struct parse *pcb;            /* Current parse control block. */
  30 {
  31      int rc;                  /* Return code from ENTREF. */
  32
  33      while (1) {
  34           NEWCC;
  35           pcb->input = pcb->plex[*FPOS];
  36           pcb->state = pcb->newstate;
  37           pcb->newstate = (*(pcb->ptab + pcb->state)) [pcb->input];
  38           pcb->action = (*(pcb->ptab + pcb->state + 1)) [pcb->input];
  39           TRACEPCB(pcb);
  40           switch (pcb->action) {
  41           case RC2_:          /* Back up two characters. */
  42                REPEATCC;
  43           case RCC_:          /* Repeat current character. */
  44                REPEATCC;
  45           case NOP_:          /* No action necessary.*/
  46                continue;
  47
  48           case RS_:           /* Record start: ccnt=0; ++rcnt.*/
  49                ++RCNT; CTRSET(RSCC);
  50                continue;
  51
  52           case GET_:          /* EOB or dull EOS or EE found: keep going.*/
  53                if (entget()==-1) {pcb->action = EOD_; break;}/* Signal if EOD.*/
  54                continue;
  55
  56           case EOF_:          /* Illegal entity end; return EE_. */
  57                synerr(E_EOF, pcb);
  58                pcb->action = EE_;
  59           case EE_:           /* Important EOS or EE found: return to caller.*/
  60                if (entget()==-1) pcb->action = EOD_;   /* Signal if EOD. */
  61                break;
  62
  63           case PER_:          /* Parameter entity reference. */
  64                REPEATCC;           /* Use PERO as 1st char of entity name. */
  65                parsenm(entbuf, ENTCASE);
  66                parse(&pcbref);     /* Handle REFC or other terminator. */
  67                rc = entref(entbuf);
  68                if (rc==ENTPI) {pcb->action = PIE_; break;}
  69                continue;
  70
  71           case ER_:           /* General entity reference; continue. */
  72                parsenm(entbuf, ENTCASE);
  73                parse(&pcbref);     /* Handle REFC or other terminator. */
  74                rc = entref(entbuf);
  75                if (rc==ENTDATA) {pcb->action = DEF_; break;}
  76                if (rc==ENTPI) {pcb->action = PIE_; break;}
  77                continue;
  78
  79
  80           case PEX_:          /* Parameter entity reference; return. */
  81                REPEATCC;           /* Use PERO as 1st char of entity name. */
  82           case ERX_:          /* General entity reference; return. */
  83                parsenm(entbuf, ENTCASE);
  84                parse(&pcbref);     /* Handle REFC or other terminator. */
  85                rc = entref(entbuf);
  86                if (rc == ENTDATA){
  87                     /* Reference to external data/subdoc entity in replaceable
  88                        character data. */
  89                     if (BITON(entdatsw, NDECONT)) {
  90                          switch (((PNE)data)->nextype) {
  91                          case ESNCDATA:
  92                          case ESNSDATA:
  93                               /* The standard says `non-SGML data entity'
  94                                  but the amendment should have changed it
  95                                  to `external data entity'. */
  96                               synerr(145, pcb);
  97                               break;
  98                          case ESNNDATA:
  99                          case ESNSUB:
 100                               /* This is definitely illegal. */
 101                               synerr(141, pcb);
 102                               break;
 103                          }
 104                          entdatsw = 0;
 105                          continue;
 106                     }
 107                     pcb->action = DEF_;
 108                }
 109                else if (rc == ENTPI) {
 110                     /* Reference to PI entity not allowed in replaceable
 111                        character data. */
 112                     synerr(59, pcb);
 113                     entpisw = 0;
 114                     continue;
 115                }
 116                else if (rc) pcb->action = EE_;
 117                break;
 118
 119           case CRN_:          /* Character reference: numeric. */
 120                parsetkn(entbuf, NU, NAMELEN);
 121                parse(&pcbref);     /* Handle reference terminator. */
 122                pcb->action = charrefn(entbuf, pcb);
 123                if (pcb->action==CRN_) continue;   /* Invalid reference */
 124                break;
 125
 126           case CRA_:           /* Character reference: alphabetic. */
 127                parsenm(entbuf, NAMECASE);
 128                parse(&pcbref);     /* Handle reference terminator. */
 129                charrefa(entbuf);
 130                continue;
 131
 132           case SYS_:          /* Invalid NONCHAR: send msg and ignore. */
 133                synerr(E_SYS, pcb);
 134                if (*FPOS == DELNONCH) NEWCC;
 135                continue;
 136
 137           case NON_:          /* Valid NONCHAR: prefix and shift encoding. */
 138                synerr(60, pcb);
 139                pcb->action = datachar(*FPOS, pcb);
 140                break;
 141           case NSC_:
 142                synerr(60, pcb);
 143                NEWCC;
 144                nonchbuf[1] = *FPOS;
 145                pcb->action = NON_;
 146                break;
 147           case PCI_:          /* Previous character was invalid (INV_). */
 148                REPEATCC;
 149           case INV_:          /* Markup ended by invalid char; repeat char. */
 150                synerr(9, pcb);
 151                REPEATCC;
 152                break;
 153
 154           case LNR_:          /* Previous char exceeded len; back up to it. */
 155                REPEATCC;
 156           case LEN_:          /* Token too long; ignore excess character. */
 157                synerr(3, pcb);
 158                continue;
 159
 160           case RCR_:          /* Repeat current char and return to caller. */
 161                REPEATCC;
 162           default:            /* Actions for specific parse. */
 163                break;
 164           }
 165           return (int)pcb->action;
 166      }
 167 }
 168 /* CHARREFA: Resolve an alphabetical reference to a function character
 169              and put the character in the read buffer.
 170              If reference is bad, issue an error message.
 171 */
 172 VOID charrefa(r)
 173 UNCH *r;                      /* Undelimited char ref (with length and EOS). */
 174 {
 175      UNCH thechar;
 176
 177      thechar = mapsrch(funtab, r+1);
 178      if (thechar == 0)
 179           synerr(62, &pcbref);
 180      else {
 181           /* This isn't ideal, because the character position will still
 182              be wrong for one line. */
 183           if (thechar == RSCHAR) RCNT--;
 184           setcurchar(thechar);
 185           REPEATCC;
 186      }
 187 }
 188
 189 /* Make the current character ch. */
 190
 191 VOID setcurchar(ch)
 192 int ch;
 193 {
 194      /* If we're reading directly from an internal entity, we can't
 195         change the entity, since the entity might be referenced again.
 196         So in this case we copy the entity.  This is inefficient, but
 197         it will only happen in a case like this:
 198
 199         <!entity % amp "&">
 200         <!entity e "x%amp;#SPACE;">
 201
 202         Usually character references will have been processed while the
 203         entity was being defined.  */
 204      if (*FPOS != ch) {
 205           if (!FILESW && !COPIEDSW) {
 206                UNCH *s = savestr(FBUF + 1);
 207                FPOS = s + (FPOS - FBUF - 1);
 208                FBUF = s - 1;
 209                COPIEDSW = 1;
 210           }
 211           *FPOS = ch;
 212      }
 213 }
 214
 215 /* CHARREFN: Resolve a numeric character reference.
 216              If reference is bad, issue an error message.
 217 */
 218
 219 int charrefn(r, pcb)
 220 UNCH *r;                      /* Undelimited character reference. */
 221 struct parse *pcb;            /* Current parse control block. */
 222 {
 223      int thechar;
 224
 225      thechar = atoi((char *)r);
 226      if (thechar<0 || thechar>255) {
 227           synerr(61, &pcbref);
 228           return((int)pcb->action);
 229      }
 230      return datachar(thechar, pcb);
 231 }
 232
 233 /* Return ch as a datachar.  If this a non-SGML character which might
 234 confuse the parser, shift it to a code that won't and place it in a
 235 special buffer which has DELNONCH in the preceding byte.  Otherwise
 236 put it the read buffer. */
 237
 238 int datachar(ch, pcb)
 239 int ch;
 240 struct parse *pcb;
 241 {
 242      switch (ch) {
 243      case EOS:
 244      case EOFCHAR:
 245      case EOBCHAR:
 246      case GENRECHAR:
 247      case DELCDATA:
 248      case DELSDATA:
 249      case DELNONCH:
 250           /* A potentially confusing character which must be prefixed
 251              with DELNONCH. */
 252           nonchbuf[1] = SHIFTNON((UNCH)ch);
 253           return NON_;
 254      }
 255      setcurchar(ch);
 256      /* If in content, return DCE_ for element content, DAF_ for mixed.  */
 257      /* If not content, it must be a literal parse, so return MLA_. */
 258      if (pcb == conpcb) {
 259           if (pcb == &pcbcone)
 260                return DCE_;
 261           else {
 262                data = FPOS;
 263                /* Action for DAF_ will do REPEATCC. */
 264                NEWCC;
 265                return DAF_;
 266           }
 267      }
 268      else
 269           return MLA_;
 270 }
 271 /* INITATT: Initialize al with adl. */
 272
 273 VOID initatt(adl)
 274 struct ad *adl;
 275 {
 276      notadn = 0;              /* No NOTATION attribute yet. */
 277      conrefsw = 0;            /* Assume no content reference att. */
 278      /* Copy attribute definition list as a template. */
 279      memcpy((UNIV)al, (UNIV)adl, (1+ADN(adl))*ADSZ);
 280 }
 281
 282 /* PARSEATT: Parse attribute specification list.
 283              Make a current copy of the attribute definition list
 284              and update it with the user's specifications.
 285              Indicate each attribute that was specified in the
 286              list (as opposed to defaulted) by setting the ASPEC flag.
 287              If no attributes were specified, return NULL.  Otherwise,
 288              if in the prolog, make a permanent copy of the list and
 289              return its pointer.  If not in the prolog, return al.
 290 */
 291 struct ad *parseatt(adl, pt)
 292 struct ad *adl;               /* Attribute definition list. */
 293 UNCH *pt;                     /* Tokenization area: tbuf[TAGLEN+ATTSPLEN]. */
 294 {
 295      UNCH *antvptr;
 296      UNCH *nm = 0;            /* Pointer to saved name in tbuf (with length). */
 297      int adn = -1;            /* Position of attribute in list (-1=empty). */
 298      UNCH *tbuflim = pt + ATTSPLEN;
 299      mdessv = es;             /* Save es for checking entity nesting. */
 300      initatt(adl);
 301      while (pt<=tbuflim) {
 302           parse(&pcbstag);
 303           switch (pcbstag.action) {
 304           case NVS:                     /* Att name or value token found. */
 305                parsenm(pt, NAMECASE);   /* Case translation wanted on name. */
 306                pt += *(nm = pt);        /* Save name while pointing past it. */
 307                continue;
 308
 309           case AVD:           /* Delimited value found. */
 310           case AVDA:          /* Delimited value found (alternate delimiter). */
 311                /* Find position (adn) of saved attribute name in list. */
 312                adn = anmget((int)ADN(al), nm);
 313                parselit(pt,
 314                         (adn == 0 || ADTYPE(al, adn) == ACHARS)
 315                         ? &pcblitr
 316                         : &pcblitt,
 317                         LITLEN,
 318                         (pcbstag.action==AVD) ? lex.d.lit : lex.d.lita);
 319                if (adn == 0) {
 320                     /* Error: unrecognized attribute name. */
 321                     sgmlerr(13, &pcbstag, nm+1, pt);
 322                     continue;
 323                }
 324                /* Tokenize and validate value; let it default if an error. */
 325                /* Put value in list and bump ptr by the normalized length
 326                   (which is always >= the actual length). */
 327                if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
 328                continue;
 329           case AVU:           /* Attribute value found: undelimited. */
 330                if (!sd.shorttag) sgmlerr(196, &pcbstag, (UNCH *)0, (UNCH *)0);
 331                parsetkn(pt, NMC, LITLEN);
 332                /* Find position (adn) of saved attribute name in list. */
 333                if ((adn = anmget((int)ADN(al), nm))==0) {
 334                     /* Error: unrecognized attribute name. */
 335                     sgmlerr(13, &pcbstag, nm+1, pt);
 336                     continue;
 337                }
 338                /* Tokenize and validate value; let it default if an error. */
 339                /* Put value in list and bump ptr by the normalized length
 340                   (which is always >= the actual length). */
 341                if (!attval(1, pt, adn, adl)) pt += ADLEN(al,adn);
 342                continue;
 343
 344           case NASV:          /* Saved NVS was really an NTV. */
 345                REPEATCC;           /* Put back next token starter. */
 346                pt = nm;            /* Back up to NVS. */
 347           case NTV:           /* Name token value found. */
 348                if (!sd.shorttag) sgmlerr(195, &pcbstag, (UNCH *)0, (UNCH *)0);
 349                if (pcbstag.action==NTV) parsenm(pt, NAMECASE);
 350                if ((adn = antvget((int)ADN(al), pt, &antvptr))==0) {
 351                     /* Error: unrecognized name token value. */
 352                     sgmlerr(74, &pcbstag, pt+1, (UNCH *)0);
 353                     continue;
 354                }
 355                /* Validate value; let it default if an error. */
 356                /* Put value in list and bump ptr by the normalized length
 357                   (which is always >= the actual length). */
 358                if (!attval(0, antvptr+1, adn, adl)) pt += ADLEN(al,adn);
 359                continue;
 360
 361           default:            /* All attributes have been parsed. */
 362                REPEATCC;      /* Put next char back for tag close parse. */
 363                break;
 364           }
 365           break;
 366      }
 367      if (pt>tbuflim) synerr(75, &pcbstag);
 368      if (es!=mdessv) synerr(37, &pcbstag);
 369      if (adn<0) return((struct ad *)0); /* List was empty. */
 370      TRACEADL(al);
 371      return al;
 372 }
 373 /* ATTVAL: Validate a specified attribute value.  Issue a message if it is
 374            the wrong type (or otherwise is not up to spec), and use the default.
 375            Call PARSEVAL to tokenize the value, unless it is a CDATA string.
 376            If the attribute is a group, the value is a string.
 377            For other types, the token count is set by PARSEVAL if the value
 378            is syntactically correct.  If incorrect (or if CDATA) the token
 379            count is zero (i.e., the value is a string).
 380            The length of a token does not include the length byte, and
 381            there is no EOS.  A string length (as always) includes both
 382            the length byte and the EOS.
 383            If it is a CONREF attribute, set a switch for STAG().
 384            If it is a CURRENT attribute, store the value as the new default.
 385 */
 386 #define DEFVAL adl[adn].addef /* Default value of current attribute. */
 387 #define DEFNUM adl[adn].adnum /* Default group size of current attribute. */
 388 #define DEFLEN adl[adn].adlen /* Length of default value of current attribute.*/
 389 int attval(mtvsw, adval, adn, adl)
 390 int mtvsw;                    /* Must tokenize value: 1=yes; 0=no. */
 391 UNCH *adval;                  /* Untokenized attribute value. */
 392 int adn;                      /* Attribute's position in list. */
 393 struct ad *adl;               /* Element's master att def list. */
 394 {
 395      int errcode;             /* Value/declaration conflict error code. */
 396
 397      if (GET(ADFLAGS(al,adn), ASPEC))      /* Can't respecify same attribute. */
 398           {sgmlerr(73, &pcbstag, ADNAME(al,adn), adval); return(1);}
 399      SET(ADFLAGS(al,adn), ASPEC);          /* Indicate att was specified. */
 400      if (GET(ADFLAGS(al,adn), ACONREF))    /* If attribute is content reference: */
 401           conrefsw = TAGREF;            /* Set switch for STAG(). */
 402      if (mtvsw && ADTYPE(al,adn)!=ACHARS) {
 403           /* If no syntax errors, check for proper group membership. */
 404           if ( ((errcode = parseval(adval, ADTYPE(al,adn), lbuf))==0)
 405             && GET(ADFLAGS(al,adn), AGROUP)
 406             && !amemget(&al[adn], ADNUM(al,adn), lbuf) ) errcode = 18;
 407           /* If syntax or group membership error, send message and exit. */
 408           if (errcode) {
 409                sgmlerr(errcode, &pcbstag, ADNAME(al,adn), adval);
 410                SET(ADFLAGS(al,adn), AERROR);
 411                return(1);
 412           }
 413           /* Replace specified value in adval with tokenized in lbuf. */
 414           ustrcpy(adval, lbuf);
 415           if (BITOFF(ADFLAGS(al,adn), AGROUP)) ADNUM(al,adn) = (UNCH)tokencnt;
 416      }
 417      if (!mtvsw)
 418           adval--;
 419      /* If attribute is FIXED, specified value must equal default. */
 420      if (BITON(ADFLAGS(al,adn), AFIXED) && ustrcmp(adval, DEFVAL)) {
 421           /* Since the value has been tokenized, don't use it in the
 422              error message. */
 423           sgmlerr(67, &pcbstag, ADNAME(al,adn), (UNCH *)0);
 424           SET(ADFLAGS(al,adn), AERROR);
 425           return(1);
 426      }
 427      ADLEN(al,adn) = vallen(ADTYPE(al,adn), ADNUM(al,adn), adval);
 428      if (ADLEN(al,adn) > LITLEN) {
 429           sgmlerr(224, &pcbstag, ADNAME(al,adn), (UNCH *)0);
 430           SET(ADFLAGS(al,adn), AERROR);
 431           return 1;
 432      }
 433      ADVAL(al,adn) = adval;
 434      /* If attribute is CURRENT, value is new default.*/
 435      if (GET(ADFLAGS(al,adn), ACURRENT)) {
 436           if (ADLEN(al,adn)>DEFLEN) {
 437                ds.attdef += (ADLEN(al,adn) - DEFLEN);
 438                DEFLEN = ADLEN(al,adn);
 439           }
 440           DEFVAL = replace(DEFVAL, ADVAL(al,adn));
 441           DEFNUM = ADNUM(al,adn);
 442      }
 443      return(0);                   /* Indicate value was valid. */
 444 }
 445 /* ADLVAL: Validate the completed attribute definition list (defaults plus
 446            specified values).  Issue a message if an
 447            attribute is required or current and its value is NULL.
 448 */
 449 VOID adlval(adsz, newetd)
 450 int adsz;                     /* Size of list. */
 451 struct etd *newetd;           /* Element type definition for this element. */
 452 {
 453      int adn = 1;             /* Position in list. */
 454      UNCH *npt, *pt;          /* Ptr save areas. */
 455      UNCH nptsv;              /* Save area for ptr value (length?). */
 456      struct dcncb *dpt;       /* Save area for dcncb ptr. */
 457
 458      aentctr = 0;             /* Number of AENTITY tokens in this att list. */
 459      idrctr = 0;              /* Number of IDREF tokens in this att list. */
 460      do {
 461           if (ADVAL(al,adn)==NULL) {                      /* NULL value */
 462                if (GET(ADFLAGS(al,adn), AREQ+ACURRENT)) { /*Error if REQ, CURRENT*/
 463                     sgmlerr(19, &pcbstag, ADNAME(al,adn), (UNCH *)0);
 464                     SET(ADFLAGS(al,adn), AINVALID);
 465                }
 466           }
 467           else switch (ADTYPE(al,adn)) {
 468           case AENTITY:       /* Return data ecb pointer if valid entity. */
 469                aenttst(adn, ADVAL(al,adn));
 470                break;
 471           case AENTITYS:      /* Return data ecb pointers if valid entities. */
 472                pt = ADVAL(al,adn);
 473                tokencnt = (int)ADNUM(al,adn);
 474                while (tokencnt--) {
 475                     nptsv = *(npt = pt + *pt+1);
 476                     *pt += 2; *npt = EOS;
 477                     aenttst(adn, pt);
 478                     *pt -= 2; *(pt = npt) = nptsv;
 479                }
 480                break;
 481           case AID:
 482                /* Define ID; msg if it already exists. */
 483                if (iddef(ADVAL(al,adn))) {
 484                     sgmlerr(71, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
 485                     SET(ADFLAGS(al,adn), AINVALID);
 486                     continue;
 487                }
 488                ++ds.idcnt;
 489                break;
 490           case AIDREF:
 491                idreftst(adn, ADVAL(al,adn));
 492                break;
 493           case AIDREFS:
 494                pt = ADVAL(al,adn);
 495                tokencnt = (int)ADNUM(al,adn);
 496                while (tokencnt--) {
 497                     nptsv = *(npt = pt + *pt+1);
 498                     *pt += 2; *npt = EOS;
 499                     idreftst(adn, pt);
 500                     *pt -= 2; *(pt = npt) = nptsv;
 501                }
 502                break;
 503           case ANOTEGRP:      /* Return notation identifier. */
 504                if (GET(ADFLAGS(al,adn), ASPEC)) notadn = adn;/*NOTATION specified*/
 505                if ((dpt = dcnfind(ADVAL(al,adn)))==0) {
 506                     sgmlerr(77, &pcbstag, ADNAME(al,adn), ADVAL(al,adn)+1);
 507                     SET(ADFLAGS(al,adn), AINVALID);
 508                }
 509                else ADDATA(al,adn).x = dpt;
 510                break;
 511           }
 512           if (!sd.shorttag && !sd.omittag && ADVAL(al,adn)!=NULL
 513               && !GET(ADFLAGS(al,adn), ASPEC+AINVALID))
 514                sgmlerr(197, &pcbstag, ADNAME(al,adn), (UNCH *)0);
 515      } while ((adn+=BITON(ADFLAGS(al,adn),AGROUP) ? (int)ADNUM(al,adn)+1 : 1)<=adsz);
 516
 517      /* Error if NOTATION specified with CONREF attribute or EMPTY element. */
 518      if (notadn && (conrefsw
 519                     || (newetd && GET(newetd->etdmod->ttype, MNONE)))) {
 520           sgmlerr((UNS)(conrefsw ? 84 : 76), &pcbstag,
 521                ADNAME(al,notadn), ADVAL(al,notadn)+1);
 522           SET(ADFLAGS(al,notadn), AINVALID);
 523      }
 524 }
 525 /* AENTTST: Validate an individual ENTITY token in AENTITY or AENTITYS value.
 526 */
 527 VOID aenttst(adn, pt)
 528 int adn;                      /* Position in list. */
 529 UNCH *pt;                     /* Ptr to current ENTITY token in value. */
 530 {
 531      struct entity *ept;      /* Save area for ecb ptr. */
 532
 533      if (++aentctr>GRPCNT) {
 534           sgmlerr(136, &pcbstag, ADNAME(al,adn), pt+1);
 535           SET(ADFLAGS(al,adn), AINVALID);
 536           return;
 537      }
 538      if ( (ept = entfind(pt))==0
 539        && (ecbdeflt==0 || (ept = usedef(pt))==0) ) {
 540           sgmlerr(ecbdeflt ? 151 : 72, &pcbstag, ADNAME(al,adn), pt+1);
 541           SET(ADFLAGS(al,adn), AINVALID);
 542           return;
 543      }
 544      if (ept->estore==ESX || ept->estore==ESC || ept->estore==ESN) {
 545           /* Error if DCN has no notation identifier. */
 546           if (ept->estore==ESN && NEXTYPE(ept->etx.n)!=ESNSUB
 547               && !NEDCNDEFINED(ept->etx.n)) {
 548                sgmlerr(78, &pcbstag, NEDCN(ept->etx.n)+1,
 549                            pt+1);
 550                SET(ADFLAGS(al,adn), AINVALID);
 551           }
 552      }
 553      else {
 554           sgmlerr(86, &pcbstag, ADNAME(al,adn), pt+1);
 555           SET(ADFLAGS(al,adn), AINVALID);
 556      }
 557 }
 558 /* IDREFTST: Validate an individual IDREF token in an IDREF or IDREFS value.
 559 */
 560 VOID idreftst(adn, pt)
 561 int adn;                      /* Position in list. */
 562 UNCH *pt;                     /* Ptr to current IDREF token in value. */
 563 {
 564      struct fwdref *rp;
 565      if (++idrctr>GRPCNT) {
 566           sgmlerr(70, &pcbstag, ADNAME(al,adn), pt+1);
 567           SET(ADFLAGS(al,adn), AINVALID);
 568           return;
 569      }
 570      /* Note IDREF; indicate if ID exists. */
 571      if ((rp = idref(pt)) != 0)
 572           rp->msg = saverr(69, &pcbstag, ADNAME(al,adn), pt+1);
 573      ++ds.idrcnt;
 574 }
 575 /* ANMGET: Locate an attribute name in an attribute definition list.
 576 */
 577 int anmget(adsz, nm)
 578 int adsz;                     /* Size of list. */
 579 UNCH *nm;                     /* Value to be found (with length byte). */
 580 {
 581      int adn = 0;             /* Position in list. */
 582
 583      while (++adn <= adsz && ustrcmp(nm+1, ADNAME(al,adn))) {
 584           if (BITON(ADFLAGS(al,adn), AGROUP)) adn += (int)ADNUM(al,adn);
 585      }
 586      return (adn > adsz) ? 0 : adn;
 587 }
 588 /* ANTVGET: Find the position of a name token value in an attribute list.
 589             Return the position of the attribute definition, or zero
 590             if none was found.  Set pp to the value, if non-NULL.
 591 */
 592 int antvget(adsz, nm, pp)
 593 int adsz;                     /* Size of list. */
 594 UNCH *nm;                     /* Value to be found (with length byte). */
 595 UNCH **pp;                    /* Store value here */
 596 {
 597      int adn = 0;             /* Position in list. */
 598
 599      while (++adn<=adsz) {
 600           /* Test only name group members. */
 601           if (BITON(ADFLAGS(al,adn), AGROUP)) {
 602                int advn;      /* Position of value in sub-list. */
 603                if ((advn = amemget(&al[adn], (int)ADNUM(al,adn), nm))!=0) {
 604                     if (pp)
 605                          *pp = al[adn+advn].adname;
 606                     return adn;
 607                }
 608                adn += (int)ADNUM(al,adn);
 609           }
 610      }
 611      return 0;
 612 }
 613 /* AMEMGET: Get the position of a member in an attribute name token group.
 614             Returns the position, or zero if not found.
 615             The length byte is ignored in the comparison so that final
 616             form tokens from ATTVAL can be compared to group members.
 617 */
 618 int amemget(anmtgrp, adsz, nm)
 619 struct ad anmtgrp[];          /* Name token group. */
 620 int adsz;                     /* Size of group. */
 621 UNCH *nm;                     /* Name to be found (with length byte). */
 622 {
 623      int adn = 0;             /* Position in group. */
 624
 625      while ( ++adn<=adsz && ustrncmp(nm+1, anmtgrp[adn].adname+1, (UNS)*nm-1)) ;
 626      return (adn>adsz) ? 0 : adn;
 627 }
 628 /* VALLEN: Returns the length of an attribute value for capacity
 629            calculations.  Normally, the length is NORMSEP plus the number
 630            of characters.  For tokenized lists, it is NORMSEP,
 631            plus the number of characters in the tokens, plus
 632            NORMSEP for each token.
 633            ACHARS and tokenized lists don't have a length byte.
 634
 635 */
 636 UNS vallen(type, num, def)
 637 int type;                     /* ADTYPE(al,adn) */
 638 int num;                      /* ADNUM(al,adn) */
 639 UNCH *def;                    /* ADVAL(al,adn) */
 640 {
 641      if (type == ACHARS)
 642           return ustrlen(def) + NORMSEP;
 643      if (type < ATKNLIST)
 644           return *def - 2 + NORMSEP;
 645      return ustrlen(def) + num * (NORMSEP - 1) + NORMSEP;
 646 }
 647 /* PARSEGRP: Parse GI names, get their etds, and form an array of pointers
 648              to them.  The array is terminated by a NULL pointer.
 649              The number of pointers (including the NULL) is returned.
 650              The grp buffer must have room for GRPCNT+1 etds.
 651 */
 652 UNS parsegrp(grp, pcb, tbuf)
 653 struct etd *grp[];            /* Buffer for building the group. */
 654 struct parse *pcb;            /* Current parse control block. */
 655 UNCH *tbuf;
 656 {
 657      int grpcnt = 0;          /* Number of etds in the group. */
 658      int i;
 659      int essv = es;           /* Entity stack level when grp started. */
 660
 661      while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
 662           switch (pcb->action) {
 663           case NAS_:          /* GI name: get its etd for the group. */
 664                grp[grpcnt] = etddef(parsenm(tbuf, NAMECASE));
 665                for (i = 0; i < grpcnt; i++)
 666                     if (grp[i] == grp[grpcnt]) {
 667                          mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->etdgi + 1);
 668                          break;
 669                     }
 670                if (i == grpcnt)
 671                     grpcnt++;
 672                continue;
 673
 674           case EE_:           /* Entity ended (correctly or incorrectly). */
 675                if (es<essv) {synerr(37, pcb); essv = es;}
 676                continue;
 677
 678           case PIE_:          /* PI entity reference (invalid). */
 679                entpisw = 0;   /* Reset PI entity indicator. */
 680                synerr(59, pcb);
 681                continue;
 682
 683           default:
 684                break;
 685           }
 686           break;
 687      }
 688      grp[grpcnt++] = 0;       /* NULL pointer indicates end of group. */
 689      if (es!=essv) synerr(37, pcb);
 690      return grpcnt;           /* Return number of ptrs in group. */
 691 }
 692 /* PARSNGRP: Parse notation names, get their dcncbs, and form an array of
 693              pointers to them.  The array is terminated by a NULL pointer.
 694              The number of pointers (including the NULL) is returned.
 695              The grp buffer must have room for GRPCNT+1 members.
 696 */
 697 UNS parsngrp(grp, pcb, tbuf)
 698 struct dcncb *grp[];          /* Buffer for building the group. */
 699 struct parse  *pcb;           /* Current parse control block. */
 700 UNCH *tbuf;
 701 {
 702      int grpcnt = 0;          /* Number of members in the group. */
 703      int i;
 704      int essv = es;           /* Entity stack level when grp started. */
 705
 706      while (parse(pcb)!=GRPE && grpcnt<GRPCNT) {
 707           switch (pcb->action) {
 708           case NAS_:          /* Member name: get its control block. */
 709                grp[grpcnt] = dcndef(parsenm(tbuf, NAMECASE));
 710                for (i = 0; i < grpcnt; i++)
 711                     if (grp[i] == grp[grpcnt]) {
 712                          mderr(98, ntoa(grpcnt + 1), grp[grpcnt]->ename + 1);
 713                          break;
 714                     }
 715                if (i == grpcnt)
 716                     grpcnt++;
 717                continue;
 718
 719           case EE_:           /* Entity ended (correctly or incorrectly). */
 720                if (es<essv) {synerr(37, pcb); essv = es;}
 721                continue;
 722
 723           case PIE_:          /* PI entity reference (invalid). */
 724                entpisw = 0;   /* Reset PI entity indicator. */
 725                synerr(59, pcb);
 726                continue;
 727
 728           default:
 729                break;
 730           }
 731           break;
 732      }
 733      grp[grpcnt++] = 0;       /* NULL pointer indicates end of group. */
 734      if (es!=essv) synerr(37, pcb);
 735      return grpcnt;           /* Return number of ptrs in group. */
 736 }
 737 /* COPYGRP: Allocate storage for a group and copy the group into it.
 738 */
 739 PETD *copygrp(pg, grpsz)
 740 PETD pg[];                    /* Pointer to a group (array of etd ptrs). */
 741 UNS grpsz;                    /* Number of ptrs in grp, including final NULL. */
 742 {
 743      UNS glen;                /* Group length in characters. */
 744      PETD *gnm;               /* Ptr to permanent name group. */
 745
 746      if (pg==0) return (PETD *)0;
 747      glen = grpsz * sizeof(struct etd *);
 748      memcpy( (UNIV)(gnm = (struct etd **)rmalloc(glen)) , (UNIV)pg, glen );
 749      return gnm;
 750 }
 751 /* INGRP: Locate an etd in a name group and return its index+1 (or zero
 752           if not found).
 753 */
 754 int ingrp(pg, ketd)
 755 PETD pg[];                    /* Array of pointers to etds. */
 756 PETD ketd;                    /* Pointer to etd to be found in group. */
 757 {
 758      int i = 0;               /* Array index. */
 759
 760      while (pg[i]) if (pg[i++]==ketd) return i;
 761      return 0;
 762 }
 763 /* PARSELIT: Parse a delimited string and collect it into a token.
 764              Caller supplies buffer, which must be 1 longer than
 765              maximum string allowed.
 766              Caller also supplies character that delimits the string.
 767              TODO: Return 1 if CDATA, SDATA or NONSGML occurred.
 768 */
 769 #ifdef USE_PROTOTYPES
 770 VOID parselit(UNCH *tbuf, struct parse *pcb, UNS maxlen, UNCH del)
 771 #else
 772 VOID parselit(tbuf, pcb, maxlen, del)
 773 UNCH *tbuf;                   /* Work area for tokenization (parmlen+1). */
 774 struct parse *pcb;            /* Current parse control block. */
 775 UNS maxlen;                   /* Maximum length of token. */
 776 UNCH del;                     /* Literal delimiter: LIT LITA PIC EOS */
 777 #endif
 778 {
 779      UNCH *pt = tbuf;         /* Current pointer into tbuf. */
 780      UNCH lexsv = lexlms[del];/* Saved lexlms value of delimiter. */
 781      int essv = es;           /* Entity stack level when literal started. */
 782      UNCH datadel;            /* Delimiter for CDATA/SDATA entity. */
 783      int parmlen = (int)maxlen;  /* Working limit (to be decremented). */
 784
 785      lexlms[del] = lex.l.litc;   /* Set delimiter to act as literal close. */
 786      do {
 787           switch (parse(pcb)) {
 788                case LP2_:          /* Move 2nd char back to buffer; redo prev.*/
 789                     REPEATCC;
 790                case LPR_:          /* Move previous char to buffer; REPEATCC; */
 791                     REPEATCC;
 792                case MLA_:          /* Move character to buffer. */
 793                     *pt++ = *FPOS; --parmlen;
 794                     continue;
 795
 796                case FUN_:          /* Function char found; replace with space.*/
 797                     *pt++ = ' '; --parmlen;
 798                     continue;
 799
 800                case RSM_:          /* Record start: ccnt=0; ++rcnt.*/
 801                     ++RCNT; CTRSET(RSCC); *pt++ = *FPOS; --parmlen;
 802                     continue;
 803
 804                case ERX_:          /* Entity reference: cancel LITC delim. */
 805                case PEX_:          /* Parameter entity ref: cancel LITC delim.*/
 806                     lexlms[del] = lexsv;
 807                     continue;
 808
 809                case EE_:
 810                     if (es<essv) {
 811                          synerr(37, pcb);
 812                          essv = es;
 813                     }
 814                     /* If back at top level, re-enable the LITC delimiter. */
 815                     if (es==essv) lexlms[del] = lex.l.litc;
 816                     continue;
 817
 818                case MLE_:          /* Char not allowed in minimum literal. */
 819                     synerr(63, pcb);
 820                     continue;
 821
 822                case DEF_:          /* Data entity: add it to buffer. */
 823                     if (pcb == &pcblitt) {
 824                          int parmlensv = parmlen;
 825                          entdatsw = 0;
 826                          parmlen = tokdata(pt, parmlen);
 827                          if (parmlen < 0)
 828                               break;
 829                          pt += parmlensv - parmlen;
 830                          continue;
 831                     }
 832                     if ((parmlen -= (int)datalen+2)<0) {entdatsw = 0; break;}
 833                     *pt++ = datadel =
 834                          BITON(entdatsw, CDECONT) ? DELCDATA : DELSDATA;
 835                     entdatsw = 0;
 836                     memcpy( pt , data, datalen );
 837                     pt += datalen;
 838                     *pt++ = datadel;
 839                     continue;
 840
 841                case NON_:          /* Non-SGML char (delimited and shifted). */
 842                     if ((parmlen -= 2)<0) break;
 843                     memcpy( pt , nonchbuf, 2 );
 844                     pt += 2;
 845                     continue;
 846
 847                case RPR_:          /* Remove character from buffer. */
 848                     --pt; ++parmlen;
 849                     break;
 850
 851                case EOD_:
 852                     exiterr(92, pcb);
 853
 854                default:
 855                     break;
 856           }
 857           break;
 858      } while (parmlen>=0 && pcb->action!=TER_);
 859
 860      if (parmlen<0) {--pt; sgmlerr(134, pcb, ntoa((int)maxlen),(UNCH *)0); REPEATCC;}
 861      datalen = (UNS)(pt-tbuf);/* To return PI string to text processor. */
 862      *pt++ = EOS;
 863      lexlms[del] = lexsv;     /* Restore normal delimiter handling. */
 864      if (es!=essv) synerr(37, pcb);
 865      return;
 866 }
 867
 868 /* Handle a data entity in a tokenized attribute value literal.
 869 Parmlen is amount of space left.  Return new parmlen. If there's not
 870 enough space return -1, and copy up to parmlen + 1 characters. */
 871
 872 int tokdata(pt, parmlen)
 873 UNCH *pt;
 874 int parmlen;
 875 {
 876      int skip = (pcblitt.newstate == 0);
 877      int i;
 878
 879      for (i = 0; parmlen >= 0 && i < datalen; i++) {
 880           switch (data[i]) {
 881           case RSCHAR:
 882                /* ignore it */
 883                break;
 884           case RECHAR:
 885           case TABCHAR:
 886           case SPCCHAR:
 887                if (!skip) {
 888                     *pt++ = data[i];
 889                     parmlen--;
 890                     skip = 1;
 891                }
 892                break;
 893           default:
 894                if (data[i] == DELNONCH) {
 895                     assert(i + 1 < datalen);
 896                     if ((parmlen -= 2) < 0)
 897                          break;
 898                     *pt++ = DELNONCH;
 899                     *pt++ = data[++i];
 900                     skip = 0;
 901                }
 902                else {
 903                     *pt++ = data[i];
 904                     parmlen--;
 905                     skip = 0;
 906                }
 907                break;
 908           }
 909      }
 910      pcblitt.newstate = skip ? 0 : pcblittda;
 911      return parmlen;
 912 }
 913
 914
 915 /* PARSEMD: Parser for markup declarations.
 916             It returns a token each time it is called.
 917
 918 */
 919 int parsemd(pt, namecase, lpcb, tokenlen)
 920 UNCH *pt;                     /* Token buffer: >=tokenlen+2. */
 921 int namecase;                 /* Case translation: ENTCASE NAMECASE AVALCASE. */
 922 struct parse *lpcb;           /* Parse control block for literal parse. */
 923 UNS tokenlen;                 /* Max length of expected token: NAMELEN LITLEN */
 924 {
 925      struct parse *pcb;       /* Current parse control block. */
 926
 927      pcb = (lpcb) ? &pcbmd : &pcbmdc;  /* If no literal pcb, dcl is comment. */
 928
 929      doparse: while (parse(pcb)==EE_)
 930           if (es<mdessv) {synerr(37, pcb); mdessv = es;}
 931      if (pcb->action==PIE_) { /* PI entity reference not allowed. */
 932           entpisw = 0;        /* Reset PI entity indicator. */
 933           synerr(59, pcb);
 934           goto doparse;
 935      }
 936      ++parmno;           /* Increment parameter counter. */
 937      switch (pcb->action) {
 938      case CDR:           /* COM[1] (MINUS) occurred previously. */
 939           REPEATCC;
 940           return (int)pcb->action;
 941      case LIT:           /* Literal: CDATA with LIT delimiter. */
 942           parselit(pt, lpcb, tokenlen, lex.d.lit);
 943           return (int)pcb->action;
 944      case LITE:          /* Literal: CDATA with LITA delimiter. */
 945           parselit(pt, lpcb, tokenlen, lex.d.lita);
 946           return((int)(pcb->action = LIT));
 947      case RNS:           /* Reserved name started (after RNI). */
 948           parsenm(pt, NAMECASE);
 949           return (int)pcb->action;
 950      case NAS:           /* Name started. */
 951           if (namecase!=AVALCASE) {
 952                parsenm(pt, namecase);
 953                return (int)pcb->action;
 954           }
 955           /* Treat attribute value as name character string. */
 956      case NMT:           /* Name token string. */
 957           parsetkn(pt, NMC, (int)tokenlen);  /* Get undelimited value. */
 958           return (int)pcb->action;
 959      case NUM:           /* Number or number token string. */
 960           parsetkn(pt, (UNCH)((int)tokenlen<=NAMELEN ? NU:NMC), (int)tokenlen);
 961           return (int)pcb->action;
 962      case PENR:
 963           REPEATCC;
 964           return (pcb->action = PEN);
 965      case EOD_:
 966           exiterr(133, pcb);
 967           /* EXIT */
 968      default:            /* End of declaration. */
 969           return (int)pcb->action; /* EMD GRPS MGRP PEN PGRP */
 970      }
 971 }
 972 /* PARSEMOD: If the declared content was a keyword, the token count is zero
 973              and it is only necessary to save the type.  Otherwise,
 974              collect the outermost token count and model type bytes for a model.
 975              The count includes tokens found in nested groups also.
 976              After building the model, parse for its occurrence indicator.
 977 */
 978 struct thdr *parsemod(dctype)
 979 int dctype;                        /* Content type (0=model). */
 980 {
 981      gbuf[0].ttype = (UNCH)dctype; /* Initialize content flags byte. */
 982      if (dctype) {gbuf[0].tu.tnum = 0; return gbuf;} /* Return if not model. */
 983
 984      gbuf[0].tu.tnum = 0;          /* Don't count 1st group or model header. */
 985      gbuf[1].ttype = 0;            /* Initialize 1st group type ... */
 986      gbuf[1].tu.tnum = 0;          /* and count. */
 987      grplvl = 1;                   /* Content model is 1st level group. */
 988      pcbgrcm.newstate = 0;         /* Go parse the model group. */
 989      /* Empty group is trapped during syntax parse; other errors return NULL. */
 990      if (!parsegcm(&pcbgrcm, &gbuf[1], &gbuf[0])) return (struct thdr *)0;
 991      parse(&pcbgrcs);             /* Get the model suffix, if there is one. */
 992      switch(pcbgrcs.action) {
 993      case OPT:                     /* OPT occurrence indicator for model. */
 994           SET(gbuf[1].ttype, TOPT|TXOPT);
 995           break;
 996      case REP:                     /* REP occurrence indicator for model. */
 997           SET(gbuf[1].ttype, TREP|TXREP);
 998           break;
 999      case OREP:                    /* OREP occurrence indicator for model. */
1000           SET(gbuf[1].ttype, TOREP|TXOREP);
1001           break;
1002      default:                      /* RCR_: Repeat char and return. */
1003           break;
1004      }
1005      if (sw.swambig) ambig();      /* Check content model for ambiguity. */
1006      return gbuf;
1007 }
1008 /* PARSEGCM: Collect token headers (struct thdr) into a group (array).
1009              An etd is defined for each GI (if none exists) and its pointer is
1010              stored in the header.  The function is called recursively.
1011 */
1012 struct thdr *parsegcm(pcb, pgh, gbuf)
1013 struct parse *pcb;                 /* Current parse control block. */
1014 struct thdr *pgh;                  /* Current group header in group buffer. */
1015 struct thdr *gbuf;                 /* Header for outermost group (model). */
1016 {
1017 #define MCON gbuf->ttype           /* Model type (content attributes). */
1018      struct thdr *pg=pgh;          /* Current group token. */
1019      struct thdr *pgsv=pgh;        /* Saved current token for occ indicator. */
1020      int optcnt = 0;               /* Count of optional tokens in group. */
1021      int essv = es;                /* Entity stack level when grp started. */
1022
1023     while (gbuf->tu.tnum<=GRPGTCNT && pgh->tu.tnum<=GRPCNT && parse(pcb)!=GRPE)
1024      switch (pcb->action) {
1025
1026      case NAS_:          /* GI name: get its etd and store it. */
1027           ++gbuf->tu.tnum; ++pgh->tu.tnum;
1028           (pgsv = ++pg)->ttype = TTETD;
1029           pg->tu.thetd = etddef(parsenm(tbuf, NAMECASE));
1030           SET(MCON, MGI);
1031           continue;
1032
1033      case RNS_:          /* Reserved name started (#PCDATA). */
1034           parsenm(tbuf, NAMECASE);
1035           if (ustrcmp(tbuf+1, key[KPCDATA])) {
1036                mderr(116, ntoa(gbuf->tu.tnum), tbuf+1);
1037                return (struct thdr *)0;
1038           }
1039           /* If #PCDATA is the first non-group token, model is a phrase. */
1040           if (!MCON) SET(MCON, MPHRASE);
1041      case DTAG:          /* Data tag template ignored; treat as #PCDATA. */
1042           if (pcb->action==DTAG) SET(pgh->ttype, TTSEQ); /* DTAG is SEQ grp. */
1043           ++gbuf->tu.tnum; ++pgh->tu.tnum;
1044           (++pg)->ttype = TTCHARS+TOREP;/* #PCDATA is OPT and REP. */
1045           pg->tu.thetd = ETDCDATA;
1046           ++optcnt;                     /* Ct opt tokens to see if grp is opt.*/
1047           SET(MCON, MCHARS);
1048           continue;
1049
1050      case GRP_:          /* Group started. */
1051           ++gbuf->tu.tnum; ++pgh->tu.tnum;
1052           (pgsv = ++pg)->ttype = 0;     /* Type will be set by connector. */
1053           pg->tu.tnum = 0;              /* Group has number instead of etd. */
1054           if (++grplvl>GRPLVL) {
1055                mderr(115, ntoa(gbuf->tu.tnum), (UNCH *)0);
1056                return (struct thdr *)0;
1057           }
1058           pg = parsegcm(pcb, pg, gbuf);
1059           if (!pg) return (struct thdr *)0;
1060           if (GET(pgsv->ttype, TOPT)) ++optcnt;  /* Indicate nested opt grp. */
1061           --grplvl;
1062           continue;
1063
1064      case OREP:          /* OREP occurrence indicator for current token.*/
1065           SET(pgsv->ttype, TREP|TXREP);
1066                          /* Now treat like OPT. */
1067      case OPT:           /* OPT occurrence indicator for current token. */
1068           SET(pgsv->ttype, TXOPT);
1069           if (GET(pgsv->ttype, TOPT)) continue;  /* Exit if nested opt grp. */
1070           SET(pgsv->ttype, TOPT);
1071           ++optcnt;      /* Count opt tokens to see if grp is optional. */
1072           continue;
1073      case REP:           /* REP occurrence indicator for current token. */
1074           SET(pgsv->ttype, TREP|TXREP);
1075           continue;
1076
1077      case OR:            /* OR connector found. */
1078           if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTOR);
1079           else if (GET(pgh->ttype, TTAND)!=TTOR)
1080                mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1081           continue;
1082      case AND:           /* AND connector found. */
1083           if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTAND);
1084           else if (GET(pgh->ttype, TTAND)!=TTAND)
1085                mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1086           continue;
1087      case SEQ:           /* SEQ connector found. */
1088           if BITOFF(pgh->ttype, TTAND) SET(pgh->ttype, TTSEQ);
1089           else if (GET(pgh->ttype, TTAND)!=TTSEQ)
1090                mderr(55, ntoa(gbuf->tu.tnum), (UNCH *)0);
1091           continue;
1092
1093      case EE_:           /* Entity ended (correctly or incorrectly). */
1094           if (es<essv) {synerr(37, pcb); essv = es;}
1095           continue;
1096
1097      case PIE_:          /* PI entity reference (not permitted). */
1098           entpisw = 0;   /* Reset PI entity indicator. */
1099           synerr(59, pcb);
1100           continue;
1101
1102      default:            /* Syntax errors return in disgrace. */
1103           synerr(37, pcb);
1104           return (struct thdr *)0;
1105      }
1106      if (pgh->tu.tnum>GRPCNT) {
1107           mderr(113, ntoa(gbuf->tu.tnum), (UNCH *)0);
1108           return (struct thdr *)0;
1109      }
1110      if (gbuf->tu.tnum>GRPGTCNT) {
1111           mderr(114, ntoa(gbuf->tu.tnum), (UNCH *)0);
1112           return (struct thdr *)0;
1113      }
1114      if (pgh->tu.tnum==1) SET(pgh->ttype, TTSEQ); /* Unit grp is SEQ. */
1115      /* An optional token in an OR group makes the group optional. */
1116      if (GET(pgh->ttype, TTMASK)==TTOR && optcnt) SET(pgh->ttype, TOPT);
1117      /* If all tokens in any group are optional, so is the group. */
1118      if (pgh->tu.tnum<=optcnt) SET(pgh->ttype, TOPT);
1119
1120      if (es!=essv) synerr(37, pcb);
1121      return pg;                             /* Return pointer to GRPS token. */
1122 }
1123 /* PARSENM: Parser for SGML names, which can be translated with LEXTRAN.
1124             The input is read from the entity stack.  CC is 1st char of name.
1125             Returns a pointer to the parsed name.
1126 */
1127 UNCH *parsenm(tbuf, nc)
1128 UNCH *tbuf;                   /* Buffer for name: >=NAMELEN+2. */
1129 int nc;                       /* Namecase translation: 1=yes; 0=no. */
1130 {
1131      UNCH   len;              /* Length of name (incl EOS & length byte). */
1132
1133      *(tbuf + (len = 1) ) = nc ? lextran[*FPOS] : *FPOS;
1134      while ((NEWCC, (int)lextoke[*FPOS]>=NMC) && (len<NAMELEN)) {
1135           TRACETKN(NMC, lextoke);
1136           if (lextoke[*(tbuf + ++len) = (nc ? lextran[*FPOS] : *FPOS)]==EOB) {
1137                --len;
1138                entget();
1139           }
1140      }
1141      REPEATCC;                       /* Put back the non-token character. */
1142      *(tbuf + ++len) = EOS;          /* Terminate name with standard EOS. */
1143      *tbuf = ++len;                  /* Store length ahead of name. */
1144      return tbuf;
1145 }
1146 /* PARSETKN: Parser for start-tag attribute value tokens.
1147              First character of token is already in *FPOS.
1148              Returns a pointer to the parsed token.
1149              Parsed token has EOS but no length byte.
1150 */
1151 #ifdef USE_PROTOTYPES
1152 UNCH *parsetkn(UNCH *tbuf, UNCH scope, int maxlen)
1153 #else
1154 UNCH *parsetkn(tbuf, scope, maxlen)
1155 UNCH *tbuf;                   /* Buffer for token: >=maxlen+1. */
1156 UNCH scope;                   /* Minimum lexical class allowed. */
1157 int maxlen;                   /* Maximum length of a token. */
1158 #endif
1159 {
1160      int i = 1;
1161      tbuf[0] = *FPOS;
1162      while (i < maxlen) {
1163           NEWCC;
1164           if (lextoke[*FPOS] < scope) {
1165                REPEATCC;
1166                break;
1167           }
1168           TRACETKN(scope, lextoke);
1169           if (*FPOS == EOBCHAR)
1170                entget();
1171           else
1172                tbuf[i++] = *FPOS;
1173      }
1174      tbuf[i] = EOS;
1175      return tbuf;
1176 }
1177 /* PARSESEQ: Parser for blank sequences (i.e., space and TAB characters ).
1178              First character of sequence is already in *FPOS.
1179 */
1180 VOID parseseq(tbuf, maxlen)
1181 UNCH *tbuf;                   /* Buffer for storing found sequence. */
1182 int maxlen;                   /* Maximum length of a blank sequence. */
1183 {
1184      tbuf[0] = *FPOS;
1185      datalen = 1;
1186      for (;;) {
1187           NEWCC;
1188           if (*FPOS == EOBCHAR) {
1189                entget();
1190                continue;
1191           }
1192           if ((lextoke[*FPOS] != SEP && *FPOS != SPCCHAR)
1193               || datalen >= maxlen)
1194                break;
1195           tbuf[datalen++] = *FPOS;
1196           TRACETKN(SEP, lextoke);
1197      }
1198 }
1199 /* S2VALNM: Parser for attribute values that are tokenized like names.
1200             The input is read from a string (hence S ("string") 2 ("to") VALNM).
1201             It stops at the first bad character.
1202             Returns a pointer to the created name.
1203 */
1204 #ifdef USE_PROTOTYPES
1205 UNCH *s2valnm(UNCH *nm, UNCH *s, UNCH scope, int translate)
1206 #else
1207 UNCH *s2valnm(nm, s, scope, translate)
1208 UNCH *nm;                     /* Name to be created. */
1209 UNCH *s;                      /* Source string to be parsed as name. */
1210 UNCH scope;                   /* Minimum lexical class allowed. */
1211 int translate;                /* Namecase translation: 1=yes; 0=no. */
1212 #endif
1213 {
1214      UNCH len = 0;            /* Length of name (incl EOS and length). */
1215
1216      for (; (int)lextoke[*s] >= scope && len < NAMELEN; s++)
1217           nm[++len] = translate ? lextran[*s] : *s;
1218      nm[++len] = EOS;         /* Terminate name with standard EOS. */
1219      *nm = ++len;             /* Store length ahead of name. */
1220      return nm;
1221 }
1222 /* PARSEVAL: Parser for attribute values.
1223              The input is read from a string and tokenized in a buffer.
1224              The input is terminated by EOS.
1225              Each token is preceded by its actual length; there is no EOS.
1226              If an error occurs while parsing, or
1227              if a token doesn't conform, set the token count to 0 to show that
1228              value was not tokenized and return the error code.
1229              After successful parse, return buffer length and 0 error code.
1230              The number of tokens found is set in external variable tokencnt.
1231 */
1232 int parseval(s, atype, tbuf)
1233 UNCH *s;                      /* Source string to be parsed as token list. */
1234 UNS atype;                    /* Type of token list expected. */
1235 UNCH *tbuf;                   /* Work area for tokenization. */
1236 {
1237      int t;
1238      UNCH *pt = tbuf;
1239
1240      pcbval.newstate = 0; tokencnt = 0;
1241      while (1) {
1242           for (;;) {
1243                pcbval.input = lextoke[*s];
1244                pcbval.state = pcbval.newstate;
1245                pcbval.newstate = (*(pcbval.ptab + pcbval.state)) [pcbval.input];
1246                pcbval.action = (*(pcbval.ptab + pcbval.state+1)) [pcbval.input];
1247                TRACEVAL(&pcbval, atype, s, tokencnt);
1248                if (pcbval.action != NOPA)
1249                     break;
1250                s++;
1251           }
1252
1253
1254           switch (pcbval.action) {
1255           case INVA:          /* Invalid character; terminate parse. */
1256                if (*s == '\0') goto alldone;  /* Normal termination. */
1257                tokencnt = 0;  /* Value was not tokenized. */
1258                return(14);
1259           case LENA:          /* Length limit of token exceeded; end parse. */
1260                tokencnt = 0;  /* Value was not tokenized. */
1261                return(15);
1262           default:            /* Token begun: NUMA, NASA, or NMTA. */
1263                break;
1264           }
1265
1266           ++tokencnt;         /* One token per iteration. */
1267           switch (atype) {
1268           case AENTITY:
1269                if (tokencnt>1) {tokencnt = 0; return(16);}
1270           case AENTITYS:
1271                if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
1272                s2valnm(pt, s, NMC, ENTCASE);
1273                break;
1274
1275           case AID:
1276           case AIDREF:
1277           case ANAME:
1278           case ANOTEGRP:
1279                if (tokencnt>1) {tokencnt = 0; return(16);}
1280           case AIDREFS:
1281           case ANAMES:
1282                if (pcbval.action!=NASA) {tokencnt = 0; return(17);}
1283                s2valnm(pt, s, NMC, NAMECASE);
1284                break;
1285
1286           case ANMTGRP:
1287           case ANMTOKE:
1288                if (tokencnt>1) {tokencnt = 0; return(16);}
1289           case ANMTOKES:
1290                /* No test needed because NMTA, NUMA and NASA are all valid. */
1291                s2valnm(pt, s, NMC, NAMECASE);
1292                break;
1293
1294           case ANUMBER:
1295                if (tokencnt>1) {tokencnt = 0; return(16);}
1296           case ANUMBERS:
1297                if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
1298                s2valnm(pt, s, NU, NAMECASE);
1299                t = lextoke[s[*pt - 2]];
1300                if (t == NMS || t == NMC) {tokencnt = 0; return(17);}
1301                break;
1302
1303           case ANUTOKE:
1304                if (tokencnt>1) {tokencnt = 0; return(16);}
1305           case ANUTOKES:
1306                if (pcbval.action!=NUMA) {tokencnt = 0; return(17);}
1307                s2valnm(pt, s, NMC, NAMECASE);
1308                break;
1309           }
1310           *pt -= 2;
1311           s += *pt;
1312           pt += *pt + 1;
1313      }
1314  alldone:
1315      *pt++ = EOS;
1316      if (*tbuf == '\0')
1317           return 25;
1318      if (atype < ATKNLIST)
1319           *tbuf += 2;         /* include length and EOS */
1320      return 0;
1321 }
1322 /*
1323 Local Variables:
1324 c-indent-level: 5
1325 c-continued-statement-offset: 5
1326 c-brace-offset: -5
1327 c-argdecl-indent: 0
1328 c-label-offset: -5
1329 comment-column: 30
1330 End:
1331 */