nsgmls: remove register keyword
[oweals/cde.git] / cde / programs / nsgmls / parseInstance.C
1 /*
2  * CDE - Common Desktop Environment
3  *
4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
5  *
6  * These libraries and programs are free software; you can
7  * redistribute them and/or modify them under the terms of the GNU
8  * Lesser General Public License as published by the Free Software
9  * Foundation; either version 2 of the License, or (at your option)
10  * any later version.
11  *
12  * These libraries and programs are distributed in the hope that
13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15  * PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with these libraries and programs; if not, write
20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21  * Floor, Boston, MA 02110-1301 USA
22  */
23 /* $XConsortium: parseInstance.C /main/2 1996/08/12 14:05:40 mgreess $ */
24 // Copyright (c) 1994 James Clark
25 // See the file COPYING for copying permission.
26
27 #include "splib.h"
28 #include "Parser.h"
29 #include "ParserMessages.C"
30 #include "MessageArg.h"
31 #include "TokenMessageArg.h"
32 #include "StringVectorMessageArg.h"
33 #include "token.h"
34 #include "macros.h"
35
36 #ifdef SP_NAMESPACE
37 namespace SP_NAMESPACE {
38 #endif
39
40 void Parser::doInstanceStart()
41 {
42   if (cancelled()) {
43     allDone();
44     return;
45   }
46   // FIXME check here that we have a valid dtd
47   compileInstanceModes();
48   setPhase(contentPhase);
49   Token token = getToken(currentMode());
50   switch (token) {
51   case tokenEe:
52   case tokenStagoNameStart:
53   case tokenStagoTagc:
54   case tokenStagoGrpo:
55   case tokenEtagoNameStart:
56   case tokenEtagoTagc:
57   case tokenEtagoGrpo:
58     break;
59   default:
60     if (sd().omittag()) {
61       unsigned startImpliedCount = 0;
62       unsigned attributeListIndex = 0;
63       IList<Undo> undoList;
64       IList<Event> eventList;
65       if (!tryImplyTag(currentLocation(),
66                        startImpliedCount,
67                        attributeListIndex,
68                        undoList,
69                        eventList))
70         CANNOT_HAPPEN();
71       queueElementEvents(eventList);
72     }
73     else
74       message(ParserMessages::instanceStartOmittag);
75   }
76   currentInput()->ungetToken();
77 }
78
79 void Parser::endInstance()
80 {
81   // Do checking before popping entity stack so that there's a
82   // current location for error messages.
83   endAllElements();
84   while (markedSectionLevel() > 0) {
85     message(ParserMessages::unclosedMarkedSection,
86             currentMarkedSectionStartLocation());
87     endMarkedSection();
88   }
89   checkIdrefs();
90   popInputStack();
91   allDone();
92 }
93
94 void Parser::checkIdrefs()
95 {
96   IdTableIter iter(idTableIter());
97   Id *id;
98   while ((id = iter.next()) != 0) {
99     for (size_t i = 0; i < id->pendingRefs().size(); i++) {
100       Messenger::setNextLocation(id->pendingRefs()[i]);
101       message(ParserMessages::missingId, StringMessageArg(id->name()));
102     }
103   }
104 }
105
106 void Parser::doContent()
107 {
108   do {
109     if (cancelled()) {
110       allDone();
111       return;
112     }
113     Token token = getToken(currentMode());
114     switch (token) {
115     case tokenEe:
116       if (inputLevel() == 1) {
117         endInstance();
118         return;
119       }
120       if (inputLevel() == specialParseInputLevel()) {
121         // FIXME have separate messages for each type of special parse
122         // perhaps force end of marked section or element
123         message(ParserMessages::specialParseEntityEnd);
124       }
125       if (eventsWanted().wantInstanceMarkup())
126         eventHandler().entityEnd(new (eventAllocator())
127                                  EntityEndEvent(currentLocation()));
128       if (afterDocumentElement())
129         message(ParserMessages::afterDocumentElementEntityEnd);
130       popInputStack();
131       break;
132     case tokenCroDigit:
133       {
134         if (afterDocumentElement())
135           message(ParserMessages::characterReferenceAfterDocumentElement);
136         Char ch;
137         Location loc;
138         if (parseNumericCharRef(ch, loc)) {
139           acceptPcdata(loc);
140           noteData();
141           eventHandler().data(new (eventAllocator())
142                               ImmediateDataEvent(Event::characterData,
143                                                  &ch, 1, loc, 1));
144           break;
145         }
146       }
147       break;
148     case tokenCroNameStart:
149       if (afterDocumentElement())
150           message(ParserMessages::characterReferenceAfterDocumentElement);
151       parseNamedCharRef();
152       break;
153     case tokenEroGrpo:
154     case tokenEroNameStart:
155       {
156         if (afterDocumentElement())
157           message(ParserMessages::entityReferenceAfterDocumentElement);
158         ConstPtr<Entity> entity;
159         Ptr<EntityOrigin> origin;
160         if (parseEntityReference(0, token == tokenEroGrpo, entity, origin)) {
161           if (!entity.isNull()) {
162             if (entity->isCharacterData())
163               acceptPcdata(Location(origin.pointer(), 0));
164             if (inputLevel() == specialParseInputLevel())
165               entity->rcdataReference(*this, origin);
166             else
167               entity->contentReference(*this, origin);
168           }
169         }
170         noteMarkup();
171       }
172       break;
173     case tokenEtagoNameStart:
174       parseEndTag();
175       break;
176     case tokenEtagoTagc:
177       parseEmptyEndTag();
178       break;
179     case tokenEtagoGrpo:
180       parseGroupEndTag();
181       break;
182     case tokenMdoNameStart:
183       if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation()))
184         currentMarkup()->addDelim(Syntax::dMDO);
185       Syntax::ReservedName name;
186       Boolean result;
187       unsigned startLevel;
188       startLevel = inputLevel();
189       if (parseDeclarationName(&name)) {
190         switch (name) {
191         case Syntax::rUSEMAP:
192           if (afterDocumentElement())
193             message(ParserMessages::declarationAfterDocumentElement,
194                     StringMessageArg(syntax().reservedName(name)));
195           result = parseUsemapDecl();
196           break;
197         case Syntax::rUSELINK:
198           if (afterDocumentElement())
199             message(ParserMessages::declarationAfterDocumentElement,
200                     StringMessageArg(syntax().reservedName(name)));
201           result = parseUselinkDecl();
202           break;
203         case Syntax::rDOCTYPE:
204         case Syntax::rLINKTYPE:
205         case Syntax::rELEMENT:
206         case Syntax::rATTLIST:
207         case Syntax::rENTITY:
208         case Syntax::rNOTATION:
209         case Syntax::rSHORTREF:
210         case Syntax::rLINK:
211         case Syntax::rIDLINK:
212           message(ParserMessages::instanceDeclaration,
213                   StringMessageArg(syntax().reservedName(name)));
214           result = 0;
215           break;
216         default:
217           message(ParserMessages::noSuchDeclarationType,
218                   StringMessageArg(syntax().reservedName(name)));
219           result = 0;
220           break;
221         }
222       }
223       else
224         result = 0;
225       if (!result)
226         skipDeclaration(startLevel);
227       noteMarkup();
228       break;
229     case tokenMdoMdc:
230       // empty comment
231       emptyCommentDecl();
232       noteMarkup();
233       break;
234     case tokenMdoCom:
235       parseCommentDecl();
236       noteMarkup();
237       break;
238     case tokenMdoDso:
239       if (afterDocumentElement())
240         message(ParserMessages::markedSectionAfterDocumentElement);
241       parseMarkedSectionDeclStart();
242       noteMarkup();
243       break;
244     case tokenMscMdc:
245       handleMarkedSectionEnd();
246       noteMarkup();
247       break;
248     case tokenNet:
249       parseNullEndTag();
250       break;
251     case tokenPio:
252       parseProcessingInstruction();
253       break;
254     case tokenStagoNameStart:
255       parseStartTag();
256       break;
257     case tokenStagoTagc:
258       parseEmptyStartTag();
259       break;
260     case tokenStagoGrpo:
261       parseGroupStartTag();
262       break;
263     case tokenRe:
264       acceptPcdata(currentLocation());
265       queueRe(currentLocation());
266       break;
267     case tokenRs:
268       acceptPcdata(currentLocation());
269       noteRs();
270       if (eventsWanted().wantInstanceMarkup())
271         eventHandler().ignoredRs(new (eventAllocator())
272                                  IgnoredRsEvent(currentChar(),
273                                                 currentLocation()));
274       break;
275     case tokenS:
276       extendContentS();
277       if (eventsWanted().wantInstanceMarkup())
278         eventHandler().sSep(new (eventAllocator())
279                             SSepEvent(currentInput()->currentTokenStart(),
280                                       currentInput()->currentTokenLength(),
281                                       currentLocation(),
282                                       0));
283       break;
284     case tokenIgnoredChar:
285       extendData();
286       if (eventsWanted().wantMarkedSections())
287         eventHandler().ignoredChars(new (eventAllocator())
288                                     IgnoredCharsEvent(currentInput()->currentTokenStart(),
289                                                       currentInput()->currentTokenLength(),
290                                                       currentLocation(),
291                                                       0));
292       break;
293     case tokenUnrecognized:
294       reportNonSgmlCharacter();
295       // fall through
296     case tokenChar:
297       parsePcdata();
298       break;
299     default:
300       ASSERT(token >= tokenFirstShortref);
301       handleShortref(token - tokenFirstShortref);
302       break;
303     }
304   } while (eventQueueEmpty());
305 }
306
307 void Parser::skipDeclaration(unsigned startLevel)
308 {
309   const unsigned skipMax = 250;
310   unsigned skipCount = 0;
311   for (;;) {
312     Token token = getToken(mdMode);
313     if (inputLevel() == startLevel)
314       skipCount++;
315     switch (token) {
316     case tokenUnrecognized:
317       (void)getChar();
318       break;
319     case tokenEe:
320       if (inputLevel() <= startLevel)
321         return;
322       popInputStack();
323       return;
324     case tokenMdc:
325       if (inputLevel() == startLevel)
326         return;
327       break;
328     case tokenS:
329       if (inputLevel() == startLevel && skipCount >= skipMax
330           && currentChar() == syntax().standardFunction(Syntax::fRE))
331         return;
332       break;
333     default:
334       break;
335     }
336   }
337 }
338
339 void Parser::handleShortref(int index)
340 {
341   const ConstPtr<Entity> &entity
342     = currentElement().map()->entity(index);
343   if (!entity.isNull()) {
344     Owner<Markup> markupPtr;
345     if (eventsWanted().wantInstanceMarkup()) {
346       markupPtr = new Markup;
347       markupPtr->addShortref(currentInput());
348     }
349     Ptr<EntityOrigin> origin
350       = new (internalAllocator())
351           EntityOrigin(entity,
352                        currentLocation(),
353                        currentInput()->currentTokenLength(),
354                        markupPtr);
355     entity->contentReference(*this, origin);
356     return;
357   }
358   InputSource *in = currentInput();
359   size_t length = in->currentTokenLength();
360   const Char *s = in->currentTokenStart();
361   size_t i = 0;
362   if (currentMode() == econMode || currentMode() == econnetMode) {
363     // FIXME do this in advance (what about B sequence?)
364     for (i = 0; i < length && syntax().isS(s[i]); i++)
365       ;
366     if (i > 0 && eventsWanted().wantInstanceMarkup())
367       eventHandler().sSep(new (eventAllocator())
368                           SSepEvent(s, i, currentLocation(), 0));
369   }
370   if (i < length) {
371     Location location(currentLocation());
372     location += i;
373     s += i;
374     length -= i;
375     acceptPcdata(location);
376     // FIXME speed this up
377     for (; length > 0; location += 1, length--, s++) {
378       if (*s == syntax().standardFunction(Syntax::fRS)) {
379         noteRs();
380         if (eventsWanted().wantInstanceMarkup())
381           eventHandler().ignoredRs(new (eventAllocator())
382                                    IgnoredRsEvent(*s, location));
383       }
384       else if (*s == syntax().standardFunction(Syntax::fRE))
385         queueRe(location);
386       else {
387         noteData();
388         eventHandler().data(new (eventAllocator())
389                             ImmediateDataEvent(Event::characterData, s, 1,
390                                                location, 0));
391       }
392     }
393   }
394 }
395
396 void Parser::parsePcdata()
397 {
398   extendData();
399   acceptPcdata(currentLocation());
400   noteData();
401   eventHandler().data(new (eventAllocator())
402                       ImmediateDataEvent(Event::characterData,
403                                          currentInput()->currentTokenStart(),
404                                          currentInput()->currentTokenLength(),
405                                          currentLocation(),
406                                          0));
407 }
408
409 void Parser::parseStartTag()
410 {
411   InputSource *in = currentInput();
412   Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
413                                in->currentLocation());
414   in->discardInitial();
415   extendNameToken(syntax().namelen(), ParserMessages::nameLength);
416   if (markup) {
417     markup->addDelim(Syntax::dSTAGO);
418     markup->addName(in);
419   }
420   StringC &name = nameBuffer();
421   getCurrentToken(syntax().generalSubstTable(), name);
422   const ElementType *e = currentDtd().lookupElementType(name);
423   if (sd().rank()) {
424     if (!e)
425       e = completeRankStem(name);
426     else if (e->isRankedElement())
427       handleRankedElement(e);
428   }
429   if (!e) 
430     e = lookupCreateUndefinedElement(name, currentLocation());
431   Boolean netEnabling;
432   AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
433   Token closeToken = getToken(tagMode);
434   if (closeToken == tokenTagc) {
435     if (name.size() > syntax().taglen())
436       checkTaglen(markupLocation().index());
437     attributes->finish(*this);
438     netEnabling = 0;
439     if (markup)
440       markup->addDelim(Syntax::dTAGC);
441   }
442   else {
443     in->ungetToken();
444     if (parseAttributeSpec(0, *attributes, netEnabling)) {
445       // The difference between the indices will be the difference
446       // in offsets plus 1 for each named character reference.
447       if (in->currentLocation().index() - markupLocation().index()
448           > syntax().taglen())
449         checkTaglen(markupLocation().index());
450     }
451     else
452       netEnabling = 0;
453   }
454   acceptStartTag(e,
455                  new (eventAllocator())
456                  StartElementEvent(e,
457                                    currentDtdPointer(),
458                                    attributes,
459                                    markupLocation(),
460                                    markup),
461                  netEnabling);
462 }
463
464 const ElementType *Parser::completeRankStem(const StringC &name)
465 {
466   const RankStem *rankStem = currentDtd().lookupRankStem(name);
467   if (rankStem) {
468     StringC name(rankStem->name());
469     if (!appendCurrentRank(name, rankStem))
470       message(ParserMessages::noCurrentRank, StringMessageArg(name));
471     else
472       return currentDtd().lookupElementType(name);
473   }
474   return 0;
475 }
476
477 void Parser::handleRankedElement(const ElementType *e)
478 {
479   StringC rankSuffix(e->definition()->rankSuffix());
480   const RankStem *rankStem = e->rankedElementRankStem();
481   for (size_t i = 0; i < rankStem->nDefinitions(); i++) {
482     const ElementDefinition *def = rankStem->definition(i);
483     for (size_t j = 0; j < def->nRankStems(); j++)
484       setCurrentRank(def->rankStem(j), rankSuffix);
485   }
486 }
487
488 void Parser::checkTaglen(Index tagStartIndex)
489 {
490   const InputSourceOrigin *origin
491     = currentLocation().origin()->asInputSourceOrigin();
492   ASSERT(origin != 0);
493   if (origin->startOffset(currentLocation().index())
494       - origin->startOffset(tagStartIndex
495                             + syntax().delimGeneral(Syntax::dSTAGO).size())
496       > syntax().taglen())
497     message(ParserMessages::taglen, NumberMessageArg(syntax().taglen()));
498 }
499
500 void Parser::parseEmptyStartTag()
501 {
502   if (options().warnEmptyTag)
503     message(ParserMessages::emptyStartTag);
504   // FIXME error if not in base.
505   const ElementType *e = 0;
506   if (!sd().omittag()) 
507     e = lastEndedElementType();
508   else if (tagLevel() > 0)
509     e = currentElement().type();
510   if (!e)
511     e = currentDtd().documentElementType();
512   AttributeList *attributes = allocAttributeList(e->attributeDef(), 0);
513   attributes->finish(*this);
514   Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
515                                currentLocation());
516   if (markup) {
517     markup->addDelim(Syntax::dSTAGO);
518     markup->addDelim(Syntax::dTAGC);
519   }
520   acceptStartTag(e,
521                  new (eventAllocator())
522                    StartElementEvent(e,
523                                      currentDtdPointer(),
524                                      attributes,
525                                      markupLocation(),
526                                      markup),
527                  0);
528 }
529
530 void Parser::parseGroupStartTag()
531 {
532   if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
533     currentMarkup()->addDelim(Syntax::dSTAGO);
534     currentMarkup()->addDelim(Syntax::dGRPO);
535   }
536   Boolean active;
537   if (!parseTagNameGroup(active))
538     return;
539   InputSource *in = currentInput();
540   // Location startLocation = in->currentLocation();
541   in->startToken();
542   Xchar c = in->tokenChar(messenger());
543   if (!syntax().isNameStartCharacter(c)) {
544     message(ParserMessages::startTagMissingName);
545     return;
546   }
547   in->discardInitial();
548   extendNameToken(syntax().namelen(), ParserMessages::nameLength);
549   if (currentMarkup())
550     currentMarkup()->addName(currentInput());
551   skipAttributeSpec();
552   if (currentMarkup())
553     eventHandler().ignoredMarkup(new (eventAllocator())
554                                  IgnoredMarkupEvent(markupLocation(),
555                                                     currentMarkup()));
556   noteMarkup();
557 }
558
559 void Parser::parseGroupEndTag()
560 {
561   if (startMarkup(eventsWanted().wantInstanceMarkup(), currentLocation())) {
562     currentMarkup()->addDelim(Syntax::dSTAGO);
563     currentMarkup()->addDelim(Syntax::dGRPO);
564   }
565   Boolean active;
566   if (!parseTagNameGroup(active))
567     return;
568   InputSource *in = currentInput();
569   // Location startLocation = in->currentLocation();
570   in->startToken();
571   Xchar c = in->tokenChar(messenger());
572   if (!syntax().isNameStartCharacter(c)) {
573     message(ParserMessages::endTagMissingName);
574     return;
575   }
576   in->discardInitial();
577   extendNameToken(syntax().namelen(), ParserMessages::nameLength);
578   if (currentMarkup())
579     currentMarkup()->addName(currentInput());
580   parseEndTagClose();
581   if (currentMarkup())
582     eventHandler().ignoredMarkup(new (eventAllocator())
583                                  IgnoredMarkupEvent(markupLocation(),
584                                                     currentMarkup()));
585   noteMarkup();
586 }
587
588 void Parser::acceptPcdata(const Location &startLocation)
589 {
590   if (currentElement().tryTransitionPcdata())
591     return;
592   // Need to test here since implying tags may turn off pcdataRecovering.
593   if (pcdataRecovering())
594     return;
595   IList<Undo> undoList;
596   IList<Event> eventList;
597   unsigned startImpliedCount = 0;
598   unsigned attributeListIndex = 0;
599   keepMessages();
600   while (tryImplyTag(startLocation, startImpliedCount, attributeListIndex,
601                      undoList, eventList))
602     if (currentElement().tryTransitionPcdata()) {
603       queueElementEvents(eventList);
604       return;
605     }
606   discardKeptMessages();
607   undo(undoList);
608   message(ParserMessages::pcdataNotAllowed);
609   pcdataRecover();
610 }
611
612 void Parser::acceptStartTag(const ElementType *e,
613                             StartElementEvent *event,
614                             Boolean netEnabling)
615 {
616   if (e->definition()->undefined()) {
617     message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
618     pushElementCheck(e, event, netEnabling);
619     return;
620   }
621   if (elementIsExcluded(e)) {
622     keepMessages();
623     checkExclusion(e);
624   }
625   else {
626     if (currentElement().tryTransition(e)) {
627       pushElementCheck(e, event, netEnabling);
628       return;
629     }
630     if (elementIsIncluded(e)) {
631       event->setIncluded();
632       pushElementCheck(e, event, netEnabling);
633       return;
634     }
635     keepMessages();
636   }
637   IList<Undo> undoList;
638   IList<Event> eventList;
639   unsigned startImpliedCount = 0;
640   unsigned attributeListIndex = 1;
641   while (tryImplyTag(event->location(), startImpliedCount,
642                      attributeListIndex, undoList, eventList))
643     if (tryStartTag(e, event, netEnabling, eventList))
644       return;
645   discardKeptMessages();
646   undo(undoList);
647   handleBadStartTag(e, event, netEnabling);
648 }
649
650 void Parser::undo(IList<Undo> &undoList)
651 {
652   while (!undoList.empty()) {
653     Undo *p = undoList.get();
654     p->undo(this);
655     delete p;
656   }
657 }
658
659 void Parser::queueElementEvents(IList<Event> &events)
660 {
661   releaseKeptMessages();
662   // FIXME provide IList<T>::reverse function
663   // reverse it
664   IList<Event> tem;
665   while (!events.empty())
666     tem.insert(events.get());
667   while (!tem.empty()) {
668     Event *e = tem.get();
669     if (e->type() == Event::startElement) {
670       noteStartElement(((StartElementEvent *)e)->included());
671       eventHandler().startElement((StartElementEvent *)e);
672     }
673     else {
674       noteEndElement(((EndElementEvent *)e)->included());
675       eventHandler().endElement((EndElementEvent *)e);
676     }
677   }
678
679 }
680
681 void Parser::checkExclusion(const ElementType *e)
682 {
683   const LeafContentToken *token = currentElement().invalidExclusion(e);
684   if (token)
685     message(ParserMessages::invalidExclusion,
686             OrdinalMessageArg(token->typeIndex() + 1),
687             StringMessageArg(token->elementType()->name()),
688             StringMessageArg(currentElement().type()->name()));
689 }
690
691 Boolean Parser::tryStartTag(const ElementType *e,
692                             StartElementEvent *event,
693                             Boolean netEnabling,
694                             IList<Event> &impliedEvents)
695 {
696   if (elementIsExcluded(e)) {
697     checkExclusion(e);
698     return 0;
699   }
700   if (currentElement().tryTransition(e)) {
701     queueElementEvents(impliedEvents);
702     pushElementCheck(e, event, netEnabling);
703     return 1;
704   }
705   if (elementIsIncluded(e)) {
706     queueElementEvents(impliedEvents);
707     event->setIncluded();
708     pushElementCheck(e, event, netEnabling);
709     return 1;
710   }
711   return 0;
712 }
713
714 Boolean Parser::tryImplyTag(const Location &loc,
715                             unsigned &startImpliedCount,
716                             unsigned &attributeListIndex,
717                             IList<Undo> &undo,
718                             IList<Event> &eventList)
719 {
720   if (!sd().omittag())
721     return 0;
722   if (currentElement().isFinished()) {
723     if (tagLevel() == 0)
724       return 0;
725 #if 1
726     const ElementDefinition *def = currentElement().type()->definition();
727     if (def && !def->canOmitEndTag())
728       return 0;
729 #endif
730     // imply an end tag
731     if (startImpliedCount > 0) {
732       message(ParserMessages::startTagEmptyElement,
733               StringMessageArg(currentElement().type()->name()));
734       startImpliedCount--;
735     }
736 #if 0
737     const ElementDefinition *def = currentElement().type()->definition();
738     if (def && !def->canOmitEndTag())
739       message(ParserMessages::omitEndTagDeclare,
740               StringMessageArg(currentElement().type()->name()),
741               currentElement().startLocation());
742 #endif
743     EndElementEvent *event
744       = new (eventAllocator()) EndElementEvent(currentElement().type(),
745                                                currentDtdPointer(),
746                                                loc,
747                                                0);
748     eventList.insert(event);
749     undo.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
750     return 1;
751   }
752   const LeafContentToken *token = currentElement().impliedStartTag();
753   if (!token)
754     return 0;
755   const ElementType *e = token->elementType();
756   if (elementIsExcluded(e))
757     message(ParserMessages::requiredElementExcluded,
758             OrdinalMessageArg(token->typeIndex() + 1),
759             StringMessageArg(e->name()),
760             StringMessageArg(currentElement().type()->name()));
761   if (tagLevel() != 0)
762     undo.insert(new (internalAllocator())
763                      UndoTransition(currentElement().matchState()));
764   currentElement().doRequiredTransition();
765   const ElementDefinition *def = e->definition();
766   if (def->declaredContent() != ElementDefinition::modelGroup
767       && def->declaredContent() != ElementDefinition::any)
768     message(ParserMessages::omitStartTagDeclaredContent,
769             StringMessageArg(e->name()));
770   if (def->undefined())
771     message(ParserMessages::undefinedElement, StringMessageArg(e->name()));
772   else if (!def->canOmitStartTag())
773     message(ParserMessages::omitStartTagDeclare, StringMessageArg(e->name()));
774   AttributeList *attributes
775     = allocAttributeList(e->attributeDef(),
776                          attributeListIndex++);
777   // this will give an error if the element has a required attribute
778   attributes->finish(*this);
779   startImpliedCount++;
780   StartElementEvent *event
781     = new (eventAllocator()) StartElementEvent(e,
782                                                currentDtdPointer(),
783                                                attributes,
784                                                loc,
785                                                0);
786   pushElementCheck(e, event, undo, eventList);
787   const int implyCheckLimit = 30; // this is fairly arbitrary
788   if (startImpliedCount > implyCheckLimit
789       && !checkImplyLoop(startImpliedCount))
790     return 0;
791   return 1;
792 }
793
794 void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
795                               Boolean netEnabling)
796 {
797   if (tagLevel() == syntax().taglvl())
798     message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
799   noteStartElement(event->included());
800   if (event->mustOmitEnd()) {
801     EndElementEvent *end
802       = new (eventAllocator()) EndElementEvent(e,
803                                                currentDtdPointer(),
804                                                event->location(),
805                                                0);
806     if (event->included()) {
807       end->setIncluded();
808       noteEndElement(1);
809     }
810     else
811       noteEndElement(0);
812     eventHandler().startElement(event);
813     eventHandler().endElement(end);
814   }
815   else {
816     const ShortReferenceMap *map = e->map();
817     if (!map)
818       map = currentElement().map();
819     pushElement(new (internalAllocator()) OpenElement(e,
820                                                       netEnabling,
821                                                       event->included(),
822                                                       map,
823                                                       event->location()));
824     // Can't access event after it's passed to the event handler.
825     eventHandler().startElement(event);
826   }
827 }
828
829 void Parser::pushElementCheck(const ElementType *e, StartElementEvent *event,
830                               IList<Undo> &undoList,
831                               IList<Event> &eventList)
832 {
833   if (tagLevel() == syntax().taglvl())
834     message(ParserMessages::taglvlOpenElements, NumberMessageArg(syntax().taglvl()));
835   eventList.insert(event);
836   if (event->mustOmitEnd()) {
837     EndElementEvent *end
838       = new (eventAllocator()) EndElementEvent(e,
839                                                currentDtdPointer(),
840                                                event->location(),
841                                                0);
842     if (event->included())
843       end->setIncluded();
844     eventList.insert(end);
845   }
846   else {
847     undoList.insert(new (internalAllocator()) UndoStartTag);
848     const ShortReferenceMap *map = e->map();
849     if (!map)
850       map = currentElement().map();
851     pushElement(new (internalAllocator()) OpenElement(e,
852                                                       0,
853                                                       event->included(),
854                                                       map,
855                                                       event->location()));
856   }
857 }
858
859 void Parser::parseEndTag()
860 {
861   Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
862                                currentLocation());
863   currentInput()->discardInitial();
864   extendNameToken(syntax().namelen(), ParserMessages::nameLength);
865   if (markup) {
866     markup->addDelim(Syntax::dETAGO);
867     markup->addName(currentInput());
868   }
869   StringC &name = nameBuffer();
870   getCurrentToken(syntax().generalSubstTable(), name);
871   const ElementType *e = currentDtd().lookupElementType(name);
872   if (sd().rank()) {
873     if (!e)
874       e = completeRankStem(name);
875   }
876   if (!e) 
877     e = lookupCreateUndefinedElement(name, currentLocation());
878   parseEndTagClose();
879   acceptEndTag(e,
880                new (eventAllocator())
881                EndElementEvent(e,
882                                currentDtdPointer(),
883                                markupLocation(),
884                                markup));
885 }
886
887 void Parser::parseEndTagClose()
888 {
889   for (;;) {
890     Token token = getToken(tagMode);
891     switch (token) {
892     case tokenUnrecognized:
893       if (!reportNonSgmlCharacter())
894         message(ParserMessages::endTagCharacter, StringMessageArg(currentToken()));
895       return;
896     case tokenEe:
897       message(ParserMessages::endTagEntityEnd);
898       return;
899     case tokenEtago:
900     case tokenStago:
901       if (!sd().shorttag())
902         message(ParserMessages::minimizedEndTag);
903       else if (options().warnUnclosedTag)
904         message(ParserMessages::unclosedEndTag);
905       currentInput()->ungetToken();
906       return;
907     case tokenTagc:
908       if (currentMarkup())
909         currentMarkup()->addDelim(Syntax::dTAGC);
910       return;
911     case tokenS:
912       if (currentMarkup())
913         currentMarkup()->addS(currentChar());
914       break;
915     default:
916       message(ParserMessages::endTagInvalidToken,
917               TokenMessageArg(token, tagMode, syntaxPointer(), sdPointer()));
918       return;
919     }
920   }
921 }
922
923 void Parser::parseEmptyEndTag()
924 {
925   if (options().warnEmptyTag)
926     message(ParserMessages::emptyEndTag);
927   // FIXME what to do if not in base
928   if (tagLevel() == 0)
929     message(ParserMessages::emptyEndTagNoOpenElements);
930   else {
931     Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
932                                  currentLocation());
933     if (markup) {
934       markup->addDelim(Syntax::dETAGO);
935       markup->addDelim(Syntax::dTAGC);
936     }
937     acceptEndTag(currentElement().type(),
938                  new (eventAllocator()) EndElementEvent(currentElement().type(),
939                                                         currentDtdPointer(),
940                                                         currentLocation(),
941                                                         markup));
942   }
943 }
944
945 void Parser::parseNullEndTag()
946 {
947   if (options().warnNet)
948     message(ParserMessages::nullEndTag);
949   // If a null end tag was recognized, then there must be a net enabling
950   // element on the stack.
951   for (;;) {
952     ASSERT(tagLevel() > 0);
953     if (currentElement().netEnabling())
954       break;
955     if (!currentElement().isFinished())
956       message(ParserMessages::elementNotFinished,
957               StringMessageArg(currentElement().type()->name()));
958     implyCurrentElementEnd(currentLocation());
959   }
960   if (!currentElement().isFinished())
961     message(ParserMessages::elementEndTagNotFinished,
962             StringMessageArg(currentElement().type()->name()));
963   Markup *markup = startMarkup(eventsWanted().wantInstanceMarkup(),
964                                currentLocation());
965   if (markup)
966     markup->addDelim(Syntax::dNET);
967   acceptEndTag(currentElement().type(),
968                new (eventAllocator()) EndElementEvent(currentElement().type(),
969                                                       currentDtdPointer(),
970                                                       currentLocation(),
971                                                       markup));
972 }
973
974 void Parser::endAllElements()
975 {
976   while (tagLevel() > 0) {
977     if (!currentElement().isFinished())
978       message(ParserMessages::elementNotFinishedDocumentEnd,
979               StringMessageArg(currentElement().type()->name()));
980     implyCurrentElementEnd(currentLocation());
981   }
982   if (!currentElement().isFinished())
983     message(ParserMessages::noDocumentElement);
984 }
985
986 void Parser::acceptEndTag(const ElementType *e,
987                           EndElementEvent *event)
988 {
989   if (!elementIsOpen(e)) {
990     message(ParserMessages::elementNotOpen, StringMessageArg(e->name()));
991     delete event;
992     return;
993   }
994   for (;;){
995     if (currentElement().type() == e)
996       break;
997     if (!currentElement().isFinished())
998       message(ParserMessages::elementNotFinished,
999               StringMessageArg(currentElement().type()->name()));
1000     implyCurrentElementEnd(event->location());
1001   }
1002   if (!currentElement().isFinished())
1003     message(ParserMessages::elementEndTagNotFinished,
1004             StringMessageArg(currentElement().type()->name()));
1005   if (currentElement().included())
1006     event->setIncluded();
1007   noteEndElement(event->included());
1008   eventHandler().endElement(event);
1009   popElement();
1010 }
1011
1012 void Parser::implyCurrentElementEnd(const Location &loc)
1013 {
1014   if (!sd().omittag())
1015     message(ParserMessages::omitEndTagOmittag,
1016             StringMessageArg(currentElement().type()->name()),
1017             currentElement().startLocation());
1018   else {
1019     const ElementDefinition *def = currentElement().type()->definition();
1020     if (def && !def->canOmitEndTag())
1021       message(ParserMessages::omitEndTagDeclare,
1022               StringMessageArg(currentElement().type()->name()),
1023               currentElement().startLocation());
1024   }
1025   EndElementEvent *event
1026     = new (eventAllocator()) EndElementEvent(currentElement().type(),
1027                                              currentDtdPointer(),
1028                                              loc,
1029                                              0);
1030   if (currentElement().included())
1031     event->setIncluded();
1032   noteEndElement(event->included());
1033   eventHandler().endElement(event);
1034   popElement();
1035 }
1036
1037 void Parser::extendData()
1038 {
1039   XcharMap<PackedBoolean> isNormal(normalMap());
1040   InputSource *in = currentInput();
1041   size_t length = in->currentTokenLength();
1042   // This is one of the parser's inner loops, so it needs to be fast.
1043   while (isNormal[in->tokenChar(messenger())])
1044     length++;
1045   in->endToken(length);
1046 }
1047
1048 void Parser::extendContentS()
1049 {
1050   InputSource *in = currentInput();
1051   size_t length = in->currentTokenLength();
1052   XcharMap<PackedBoolean> isNormal(normalMap());
1053   for (;;) {
1054     Xchar ch = in->tokenChar(messenger());
1055     if (!syntax().isS(ch) || !isNormal[ch])
1056       break;
1057     length++;
1058   }
1059   in->endToken(length);
1060 }
1061
1062 void Parser::handleBadStartTag(const ElementType *e,
1063                                StartElementEvent *event,
1064                                Boolean netEnabling)
1065 {
1066   IList<Undo> undoList;
1067   IList<Event> eventList;
1068   keepMessages();
1069   for (;;) {
1070     Vector<const ElementType *> missing;
1071     findMissingTag(e, missing);
1072     if (missing.size() == 1) {
1073       queueElementEvents(eventList);
1074       const ElementType *m = missing[0];
1075       message(ParserMessages::missingElementInferred,
1076               StringMessageArg(e->name()),
1077               StringMessageArg(m->name()));
1078       AttributeList *attributes
1079         = allocAttributeList(m->attributeDef(), 1);
1080       // this will give an error if the element has a required attribute
1081       attributes->finish(*this);
1082       StartElementEvent *inferEvent
1083         = new (eventAllocator()) StartElementEvent(m,
1084                                                    currentDtdPointer(),
1085                                                    attributes,
1086                                                    event->location(),
1087                                                    0);
1088       if (!currentElement().tryTransition(m))
1089         inferEvent->setIncluded();
1090       pushElementCheck(m, inferEvent, 0);
1091       if (!currentElement().tryTransition(e))
1092         event->setIncluded();
1093       pushElementCheck(e, event, netEnabling);
1094       return;
1095     }
1096     if (missing.size() > 0) {
1097       queueElementEvents(eventList);
1098       Vector<StringC> missingNames;
1099       for (size_t i = 0; i < missing.size(); i++)
1100         missingNames.push_back(missing[i]->name());
1101       message(ParserMessages::missingElementMultiple,
1102               StringMessageArg(e->name()),
1103               StringVectorMessageArg(missingNames));
1104       pushElementCheck(e, event, netEnabling);
1105       return;
1106     }
1107     if (!sd().omittag()
1108         || !currentElement().isFinished()
1109         || tagLevel() == 0
1110         || !currentElement().type()->definition()->canOmitEndTag())
1111       break;
1112     EndElementEvent *endEvent
1113       = new (eventAllocator()) EndElementEvent(currentElement().type(),
1114                                                currentDtdPointer(),
1115                                                event->location(),
1116                                                0);
1117     eventList.insert(endEvent);
1118     undoList.insert(new (internalAllocator()) UndoEndTag(popSaveElement()));
1119   }
1120   discardKeptMessages();
1121   undo(undoList);
1122   message(ParserMessages::elementNotAllowed, StringMessageArg(e->name()));
1123   // If element couldn't occur because it was excluded, then
1124   // do the transition here.
1125   (void)currentElement().tryTransition(e);
1126   pushElementCheck(e, event, netEnabling);
1127 }
1128
1129 void Parser::findMissingTag(const ElementType *e,
1130                             Vector<const ElementType *> &v)
1131 {
1132   size_t i;
1133
1134   if (!currentElement().currentPosition()) {
1135     if (!e)
1136       v.push_back((const ElementType *)0);
1137     return;
1138   }
1139   if (elementIsExcluded(e))
1140     return;
1141   size_t newSize = 0;
1142   currentElement().matchState().possibleTransitions(v);
1143   // FIXME also get currentInclusions
1144   for (i = 0; i < v.size(); i++) {
1145     if (v[i] && !elementIsExcluded(v[i])) {
1146       Boolean success = 0;
1147       switch (v[i]->definition()->declaredContent()) {
1148       case ElementDefinition::modelGroup:
1149         {
1150           const CompiledModelGroup *grp
1151             = v[i]->definition()->compiledModelGroup();
1152           MatchState state(grp);
1153           if (!e) {
1154             if (state.tryTransitionPcdata())
1155               success = 1;
1156           }
1157           else {
1158             if (state.tryTransition(e))
1159               success = 1;
1160             if (!success) {
1161               for (size_t j = 0; j < v[i]->definition()->nInclusions(); j++)
1162                 if (v[i]->definition()->inclusion(j) == e) {
1163                   success = 1;
1164                   break;
1165                 }
1166             }
1167             if (success) {
1168               for (size_t j = 0; j < v[i]->definition()->nExclusions(); j++)
1169                 if (v[i]->definition()->exclusion(j) == e) {
1170                   success = 0;
1171                   break;
1172                 }
1173             }
1174           }
1175         }
1176         break;
1177 #if 0
1178       case ElementDefinition::any:
1179         success = 1;
1180         break;
1181 #endif
1182       case ElementDefinition::cdata:
1183       case ElementDefinition::rcdata:
1184         if (e == 0)
1185           success = 1;
1186         break;
1187       default:
1188         break;
1189       }
1190       if (success)
1191         v[newSize++] = v[i];
1192     }
1193   }
1194   v.resize(newSize);
1195   // Sort them according to the order of their occurrence in the DTD.
1196   // Do an insertion sort.
1197   for (i = 1; i < v.size(); i++) {
1198     const ElementType *tem = v[i];
1199     size_t j;
1200     for (j = i; j > 0 && v[j - 1]->index() > tem->index(); j--)
1201       v[j] = v[j - 1];
1202     v[j] = tem;
1203   }
1204 }
1205
1206 #if 0
1207 // This produces messages that are too verbose
1208 // This doesn't try to be very efficient.
1209 // 0 for #pcdata
1210
1211 void Parser::getAllowedElementTypes(Vector<const ElementType *> &v)
1212 {
1213   v.clear();
1214   // FIXME get a list of all inclusions first
1215   // getCurrentInclusions(v);
1216   // x says whether each element of v was excluded
1217   Vector<PackedBoolean> x(v.size(), 0);
1218   unsigned startImpliedCount = 0;
1219   IList<Undo> undoList;
1220   for (;;) {
1221     if (currentElement().currentPosition()) {
1222       // have a model group
1223       size_t i = v.size();
1224       currentElement().matchState().possibleTransitions(v);
1225       x.resize(v.size());
1226       for (size_t j = i; j < v.size(); j++)
1227         x[j] = (v[j] && elementIsExcluded(v[j]));
1228       if (!sd().omittag())
1229         break;
1230       // Try to imply a tag
1231       if (currentElement().isFinished()) {
1232         if (tagLevel() == 0)
1233           break;
1234         if (startImpliedCount)
1235           break;
1236         const ElementDefinition *def = currentElement().type()->definition();
1237         if (def && def->canOmitEndTag())
1238           undoList.insert(new (internalAllocator())
1239                           UndoEndTag(popSaveElement()));
1240         else
1241           break;
1242       }
1243       else {
1244         const LeafContentToken *token = currentElement().impliedStartTag();
1245         if (!token)
1246           break;
1247         const ElementType *e = token->elementType();
1248         if (elementIsExcluded(e))
1249           break;
1250         const ElementDefinition *def = e->definition();
1251         if (!def
1252             || def->undefined()
1253             || (def->declaredContent() != ElementDefinition::modelGroup
1254                 && def->declaredContent() != ElementDefinition::any)
1255             || !def->canOmitStartTag())
1256           break;
1257         undoList.insert(new (internalAllocator()) UndoStartTag);
1258         startImpliedCount++;
1259         pushElement(new (internalAllocator()) OpenElement(e,
1260                                                           0,
1261                                                           0,
1262                                                           0,
1263                                                           Location()));
1264         if (checkImplyLoop(startImpliedCount))
1265           break;
1266         for (size_t i = 0; i < def->nInclusions(); i++)
1267           if (!elementIsExcluded(def->inclusion(i))) {
1268             v.push_back(def->inclusion(i));
1269             x.push_back(0);
1270           }
1271       }
1272     }
1273     else {
1274       // must be allowed #pcdata
1275       v.push_back((const ElementType *)0);
1276       x.push_back((PackedBoolean)0);
1277       break;
1278     }
1279   }
1280   undo(undoList);
1281   // Remove exclusions and duplicates and undefined
1282   size_t newSize = 0;
1283   for (size_t i = 0; i < v.size(); i++)
1284     if (!x[i] && (!v[i] || !v[i]->definition()->undefined())) {
1285       Boolean dup = 0;
1286       for (size_t j = 0; j < newSize; j++)
1287         if (v[i] == v[j]) {
1288           dup = 1;
1289           break;
1290         }
1291       if (!dup)
1292         v[newSize++] = v[i];
1293     }
1294   v.resize(newSize);
1295 }
1296 #endif
1297
1298 #ifdef SP_NAMESPACE
1299 }
1300 #endif