1 /* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */
2 // Copyright (c) 1994, 1995 James Clark
3 // See the file COPYING for copying permission.
8 #include "SdFormalError.h"
9 #include "MessageBuilder.h"
10 #include "ParserMessages.h"
11 #include "MessageArg.h"
12 #include "CharsetRegistry.h"
15 #include "TokenMessageArg.h"
18 #include "NumericCharRefOrigin.h"
21 namespace SP_NAMESPACE {
27 void addSwitch(WideChar from, WideChar to);
28 SyntaxChar subst(WideChar c);
29 size_t nSwitches() const;
30 Boolean switchUsed(size_t i) const;
31 WideChar switchFrom(size_t i) const;
32 WideChar switchTo(size_t i) const;
34 Vector<PackedBoolean> switchUsed_;
35 Vector<WideChar> switches_;
38 // Information about the SGML declaration being built.
42 void addFormalError(const Location &, const MessageType1 &, const StringC &);
45 CharsetDecl syntaxCharsetDecl;
46 CharsetInfo syntaxCharset;
47 CharSwitcher switcher;
48 Boolean externalSyntax;
50 IList<SdFormalError> formalErrorList;
53 class CharsetMessageArg : public MessageArg {
55 CharsetMessageArg(const ISet<WideChar> &set);
56 MessageArg *copy() const;
57 void append(MessageBuilder &) const;
63 typedef unsigned char Type;
75 referenceReservedName,
77 reservedName // Sd::ReservedName is added to this
82 String<SyntaxChar> paramLiteralText;
85 Sd::Capacity capacityIndex;
86 Syntax::Quantity quantityIndex;
87 Syntax::ReservedName reservedNameIndex;
88 Syntax::DelimGeneral delimGeneralIndex;
92 class AllowedSdParams {
94 AllowedSdParams(SdParam::Type,
95 SdParam::Type = SdParam::invalid,
96 SdParam::Type = SdParam::invalid,
97 SdParam::Type = SdParam::invalid,
98 SdParam::Type = SdParam::invalid,
99 SdParam::Type = SdParam::invalid);
100 Boolean param(SdParam::Type) const;
101 SdParam::Type get(int i) const;
103 enum { maxAllow = 6 };
104 SdParam::Type allow_[maxAllow];
107 class AllowedSdParamsMessageArg : public MessageArg {
109 AllowedSdParamsMessageArg(const AllowedSdParams &allow,
110 const ConstPtr<Sd> &sd);
111 MessageArg *copy() const;
112 void append(MessageBuilder &) const;
114 AllowedSdParams allow_;
118 struct StandardSyntaxSpec {
119 struct AddedFunction {
121 Syntax::FunctionClass functionClass;
122 SyntaxChar syntaxChar;
124 const AddedFunction *addedFunction;
125 size_t nAddedFunction;
129 static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
130 { "TAB", Syntax::cSEPCHAR, 9 },
133 static StandardSyntaxSpec coreSyntax = {
134 coreFunctions, SIZEOF(coreFunctions), 0
137 static StandardSyntaxSpec refSyntax = {
138 coreFunctions, SIZEOF(coreFunctions), 1
141 void Parser::doInit()
147 // When document entity doesn't exist, don't give any errors
148 // other than the cannot open error.
149 if (currentInput()->get(messenger()) == InputSource::eE) {
150 if (currentInput()->accessError()) {
156 currentInput()->ungetToken();
157 const CharsetInfo &initCharset = sd().docCharset();
158 ISet<WideChar> missing;
159 findMissingMinimum(initCharset, missing);
160 if (!missing.isEmpty()) {
161 message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
167 if (scanForSgmlDecl(initCharset))
170 currentInput()->ungetToken();
171 if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
172 InputSource *in = entityManager().open(systemId,
174 new InputSourceOrigin,
179 if (scanForSgmlDecl(initCharset))
182 message(ParserMessages::badDefaultSgmlDecl);
189 if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
190 size_t nS = currentInput()->currentTokenLength() - 6;
191 for (size_t i = 0; i < nS; i++)
192 currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
193 currentMarkup()->addDelim(Syntax::dMDO);
194 currentMarkup()->addSdReservedName(Sd::rSGML,
195 currentInput()->currentTokenStart()
196 + (currentInput()->currentTokenLength() - 4),
199 Syntax *syntaxp = new Syntax(sd());
200 CharSwitcher switcher;
201 if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
206 syntaxp->implySgmlChar(sd().docCharset());
209 ConstPtr<Sd> refSd(sdPointer());
210 ConstPtr<Syntax> refSyntax(syntaxPointer());
211 if (!parseSgmlDecl()) {
215 // queue an SGML declaration event
216 eventHandler().sgmlDecl(new (eventAllocator())
217 SgmlDeclEvent(sdPointer(),
219 instanceSyntaxPointer(),
222 currentInput()->nextIndex(),
226 if (inputLevel() == 2) {
227 // FIXME perhaps check for junk after SGML declaration
232 if (!implySgmlDecl()) {
236 // queue an SGML declaration event
237 eventHandler().sgmlDecl(new (eventAllocator())
238 SgmlDeclEvent(sdPointer(),
242 // Now we have sd and syntax set up, prepare to parse the prolog.
243 compilePrologModes();
244 setPhase(prologPhase);
247 Boolean Parser::implySgmlDecl()
249 Syntax *syntaxp = new Syntax(sd());
250 const StandardSyntaxSpec *spec;
251 if (options().shortref)
255 CharSwitcher switcher;
256 if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher))
258 syntaxp->implySgmlChar(sd().docCharset());
259 for (int i = 0; i < Syntax::nQuantity; i++)
260 syntaxp->setQuantity(i, options().quantity[i]);
265 Boolean Parser::setStandardSyntax(Syntax &syn,
266 const StandardSyntaxSpec &spec,
267 const CharsetInfo &docCharset,
268 CharSwitcher &switcher)
270 static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
273 static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
274 SIZEOF(syntaxCharsetRanges));
275 static CharsetInfo syntaxCharset(syntaxCharsetDesc);
278 if (!checkSwitches(switcher, syntaxCharset))
281 for (i = 0; i < switcher.nSwitches(); i++)
282 if (switcher.switchTo(i) >= 128)
283 message(ParserMessages::switchNotInCharset,
284 NumberMessageArg(switcher.switchTo(i)));
285 static const Char shunchar[] = {
286 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
287 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
291 for (i = 0; i < SIZEOF(shunchar); i++)
292 syn.addShunchar(shunchar[i]);
293 syn.setShuncharControls();
294 static Syntax::StandardFunction standardFunctions[3] = {
295 Syntax::fRE, Syntax::fRS, Syntax::fSPACE
297 static SyntaxChar functionChars[3] = { 13, 10, 32 };
298 for (i = 0; i < 3; i++) {
300 if (translateSyntax(switcher,
305 && checkNotFunction(syn, docChar))
306 syn.setStandardFunction(standardFunctions[i], docChar);
310 for (i = 0; i < spec.nAddedFunction; i++) {
312 if (translateSyntax(switcher,
315 spec.addedFunction[i].syntaxChar,
317 && checkNotFunction(syn, docChar))
318 syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
319 spec.addedFunction[i].functionClass,
325 static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
326 ISet<Char> nameCharSet;
327 for (i = 0; i < 2; i++) {
329 if (translateSyntax(switcher,
334 nameCharSet.add(docChar);
338 if (!checkNmchars(nameCharSet, syn))
341 syn.addNameCharacters(nameCharSet);
342 syn.setNamecaseGeneral(1);
343 syn.setNamecaseEntity(0);
344 if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher))
346 setRefNames(syn, docCharset);
347 syn.enterStandardFunctionNames();
349 && !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher))
354 Boolean Parser::setRefDelimGeneral(Syntax &syntax,
355 const CharsetInfo &syntaxCharset,
356 const CharsetInfo &docCharset,
357 CharSwitcher &switcher)
359 // Column 3 from Figure 3
360 static const char delims[][2] = {
394 ISet<WideChar> missing;
395 for (int i = 0; i < Syntax::nDelimGeneral; i++)
396 if (syntax.delimGeneral(i).size() == 0) {
399 for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
400 UnivChar univChar = translateUniv(delims[i][j], switcher,
403 if (univToDescCheck(docCharset, univChar, c))
410 if (delim.size() == j) {
411 if (checkGeneralDelim(syntax, delim))
412 syntax.setDelimGeneral(i, delim);
417 if (!missing.isEmpty())
418 message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
422 void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
424 static const char *const referenceNames[] = {
483 for (i = 0; i < Syntax::nNames; i++) {
484 StringC docName(docCharset.execToDesc(referenceNames[i]));
485 Syntax::ReservedName tem;
486 if (syntax.lookupReservedName(docName, &tem))
487 message(ParserMessages::nameReferenceReservedName,
488 StringMessageArg(docName));
489 if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
490 syntax.setName(i, docName);
494 Boolean Parser::addRefDelimShortref(Syntax &syntax,
495 const CharsetInfo &syntaxCharset,
496 const CharsetInfo &docCharset,
497 CharSwitcher &switcher)
499 // Column 2 from Figure 4
500 static const char delimShortref[][3] = {
534 ISet<WideChar> missing;
536 for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
540 for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
542 UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
544 if (univToDescCheck(docCharset, univChar, c))
549 if (delim.size() == j) {
550 if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
551 message(ParserMessages::duplicateDelimShortref,
552 StringMessageArg(delim));
554 syntax.addDelimShortref(delim, docCharset);
557 if (!missing.isEmpty())
558 message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
562 // Determine whether the document starts with an SGML declaration.
563 // There is no current syntax at this point.
565 Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
568 if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
571 if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
574 if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
577 if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
579 InputSource *in = currentInput();
580 Xchar c = in->get(messenger());
581 while (c == rs || c == space || c == re || c == tab)
582 c = in->tokenChar(messenger());
583 if (c != initCharset.execToDesc('<'))
585 if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
587 c = in->tokenChar(messenger());
588 if (c != initCharset.execToDesc('S')
589 && c != initCharset.execToDesc('s'))
591 c = in->tokenChar(messenger());
592 if (c != initCharset.execToDesc('G')
593 && c != initCharset.execToDesc('g'))
595 c = in->tokenChar(messenger());
596 if (c != initCharset.execToDesc('M')
597 && c != initCharset.execToDesc('m'))
599 c = in->tokenChar(messenger());
600 if (c != initCharset.execToDesc('L')
601 && c != initCharset.execToDesc('l'))
603 c = in->tokenChar(messenger());
604 // Don't recognize this if SGML is followed by a name character.
605 if (c == InputSource::eE)
607 in->endToken(in->currentTokenLength() - 1);
608 if (c == initCharset.execToDesc('-'))
610 if (c == initCharset.execToDesc('.'))
613 if (!initCharset.descToUniv(c, univ))
615 if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
617 if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
619 if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
624 void Parser::findMissingMinimum(const CharsetInfo &charset,
625 ISet<WideChar> &missing)
629 for (i = 0; i < 26; i++) {
630 if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
631 missing += UnivCharsetDesc::A + i;
632 if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
633 missing += UnivCharsetDesc::a + i;
635 for (i = 0; i < 10; i++) {
637 if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
638 missing += UnivCharsetDesc::zero + i;
640 static const UnivChar special[] = {
641 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
644 for (i = 0; i < SIZEOF(special); i++)
645 if (!univToDescCheck(charset, special[i], to))
646 missing += special[i];
650 Boolean Parser::parseSgmlDecl()
655 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
657 StringC version(sd().execToDoc("ISO 8879:1986"));
658 if (parm.literalText.string() != version)
659 message(ParserMessages::standardVersion,
660 StringMessageArg(parm.literalText.string()));
661 sdBuilder.sd = new Sd;
662 typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
663 static SdParser parsers[] = {
664 &Parser::sdParseDocumentCharset,
665 &Parser::sdParseCapacity,
666 &Parser::sdParseScope,
667 &Parser::sdParseSyntax,
668 &Parser::sdParseFeatures,
669 &Parser::sdParseAppinfo,
671 for (size_t i = 0; i < SIZEOF(parsers); i++) {
672 if (!(this->*(parsers[i]))(sdBuilder, parm))
674 if (!sdBuilder.valid)
677 if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
679 if (sdBuilder.sd->formal()) {
680 while (!sdBuilder.formalErrorList.empty()) {
681 SdFormalError *p = sdBuilder.formalErrorList.get();
682 ParserState *state = this; // work around lcc 3.0 bug
687 setSd(sdBuilder.sd.pointer());
688 if (sdBuilder.sd->scopeInstance()) {
689 Syntax *proSyntax = new Syntax(sd());
690 CharSwitcher switcher;
691 setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
692 proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
693 ISet<WideChar> invalidSgmlChar;
694 proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
695 sdBuilder.syntax.pointer(),
697 sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
700 if (!invalidSgmlChar.isEmpty())
701 message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
702 setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
705 setSyntax(sdBuilder.syntax.pointer());
706 if (syntax().multicode())
707 currentInput()->setMarkupScanTable(syntax().markupScanTable());
711 Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
713 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
716 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
720 UnivCharsetDesc desc;
721 if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
723 ISet<WideChar> missing;
724 findMissingMinimum(desc, missing);
725 if (!missing.isEmpty()) {
726 message(ParserMessages::missingMinimumChars,
727 CharsetMessageArg(missing));
731 decl.usedSet(sgmlChar);
732 sdBuilder.sd->setDocCharsetDesc(desc);
733 sdBuilder.sd->setDocCharsetDecl(decl);
734 sdBuilder.syntax = new Syntax(*sdBuilder.sd);
735 sdBuilder.syntax->setSgmlChar(sgmlChar);
739 Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
743 UnivCharsetDesc &desc)
746 ISet<WideChar> multiplyDeclared;
747 // This is for checking whether the syntax reference character set
748 // is ISO 646 when SCOPE is INSTANCE.
749 Boolean maybeISO646 = 1;
751 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
753 UnivCharsetDesc baseDesc;
756 PublicId::TextClass textClass;
757 const MessageType1 *err;
758 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
759 sdBuilder.addFormalError(currentLocation(),
762 else if (id.getTextClass(textClass)
763 && textClass != PublicId::CHARSET)
764 sdBuilder.addFormalError(currentLocation(),
765 ParserMessages::basesetTextClass,
768 if (referencePublic(id, PublicId::CHARSET, givenError))
769 found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
770 else if (!givenError) {
771 found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
772 if (!found && options().warnSgmlDecl)
773 message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
780 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
783 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
786 WideChar min = parm.n;
787 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
789 Number count = parm.n;
791 if (options().warnSgmlDecl && count == 0)
792 message(ParserMessages::zeroNumberOfCharacters);
793 decl.rangeDeclared(min, count, multiplyDeclared);
796 && (min > charMax || count - 1 > charMax - min)) {
797 message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
798 adjCount = min > charMax ? 0 : 1 + (charMax - min);
804 if (!parseSdParam(AllowedSdParams(SdParam::number,
805 SdParam::minimumLiteral,
806 SdParam::reservedName + Sd::rUNUSED),
810 case SdParam::number:
811 decl.addRange(min, count, parm.n);
812 if (found && adjCount > 0) {
813 ISet<WideChar> baseMissing;
814 desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
816 if (!baseMissing.isEmpty() && options().warnSgmlDecl)
817 message(ParserMessages::basesetCharsMissing,
818 CharsetMessageArg(baseMissing));
821 case SdParam::reservedName + Sd::rUNUSED:
822 decl.addRange(min, count);
824 case SdParam::minimumLiteral:
826 UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string());
827 if (adjCount > 256) {
828 message(ParserMessages::tooManyCharsMinimumLiteral);
831 for (Number i = 0; i < adjCount; i++)
832 desc.addRange(min + i, min + i, c);
835 decl.addRange(min, count, parm.literalText.string());
840 SdParam::Type follow = (isDocument
841 ? SdParam::reservedName + Sd::rCAPACITY
842 : SdParam::reservedName + Sd::rFUNCTION);
843 if (!parseSdParam(AllowedSdParams(SdParam::number,
844 SdParam::reservedName + Sd::rBASESET,
849 } while (parm.type == SdParam::number);
850 } while (parm.type == SdParam::reservedName + Sd::rBASESET);
851 if (!multiplyDeclared.isEmpty())
852 message(ParserMessages::duplicateCharNumbers,
853 CharsetMessageArg(multiplyDeclared));
854 ISet<WideChar> declaredSet;
855 decl.declaredSet(declaredSet);
856 ISetIter<WideChar> iter(declaredSet);
857 WideChar min, max, lastMax;
858 if (iter.next(min, max)) {
859 ISet<WideChar> holes;
861 while (iter.next(min, max)) {
862 if (min - lastMax > 1)
863 holes.addRange(lastMax + 1, min - 1);
866 if (!holes.isEmpty())
867 message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
869 if (!isDocument && sdBuilder.sd->scopeInstance()) {
870 // If scope is INSTANCE, syntax reference character set
871 // must be same as reference.
872 UnivCharsetDescIter iter(desc);
873 WideChar descMin, descMax;
875 if (!iter.next(descMin, descMax, univMin)
880 message(ParserMessages::scopeInstanceSyntaxCharset);
885 Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
889 if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
892 if (parm.type == SdParam::eE)
894 WideChar min = parm.n;
895 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
897 Number count = parm.n;
898 if (!parseSdParam(AllowedSdParams(SdParam::number,
899 SdParam::minimumLiteral,
900 SdParam::reservedName + Sd::rUNUSED),
903 if (parm.type == SdParam::number) {
905 desc.addRange(min, min + (count - 1), parm.n);
907 else if (parm.type == SdParam::minimumLiteral) {
908 UnivChar c = sd.nameToUniv(parm.literalText.string());
910 message(ParserMessages::tooManyCharsMinimumLiteral);
913 for (Number i = 0; i < count; i++)
914 desc.addRange(min + i, min + i, c);
921 Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
923 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
924 SdParam::reservedName + Sd::rSGMLREF),
928 if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
929 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
932 PublicId::TextClass textClass;
933 const MessageType1 *err;
934 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
935 sdBuilder.addFormalError(currentLocation(),
938 else if (id.getTextClass(textClass)
939 && textClass != PublicId::CAPACITY)
940 sdBuilder.addFormalError(currentLocation(),
941 ParserMessages::capacityTextClass,
943 const StringC &str = id.string();
944 if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
945 && str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) {
947 if (referencePublic(id, PublicId::CAPACITY, givenError))
949 else if (!givenError)
950 message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
953 return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
957 PackedBoolean capacitySpecified[Sd::nCapacity];
959 for (i = 0; i < Sd::nCapacity; i++)
960 capacitySpecified[i] = 0;
961 if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
964 Sd::Capacity capacityIndex = parm.capacityIndex;
965 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
968 if (!capacitySpecified[capacityIndex]) {
969 sdBuilder.sd->setCapacity(capacityIndex, parm.n);
970 capacitySpecified[capacityIndex] = 1;
972 else if (options().warnSgmlDecl)
973 message(ParserMessages::duplicateCapacity,
974 StringMessageArg(sd().capacityName(i)));
975 int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
976 if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
979 } while (parm.type == SdParam::capacityName);
980 Number totalcap = sdBuilder.sd->capacity(0);
981 for (i = 1; i < Sd::nCapacity; i++)
982 if (sdBuilder.sd->capacity(i) > totalcap)
983 message(ParserMessages::capacityExceedsTotalcap,
984 StringMessageArg(sd().capacityName(i)));
986 return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
991 Boolean Parser::referencePublic(const PublicId &id,
992 PublicId::TextClass entityType,
997 if (entityCatalog().lookupPublic(id.string(),
1001 Location loc = currentLocation();
1002 eventHandler().sgmlDeclEntity(new (eventAllocator())
1003 SgmlDeclEntityEvent(id,
1007 Ptr<EntityOrigin> origin(new EntityOrigin(loc));
1008 if (currentMarkup())
1009 currentMarkup()->addEntityStart(origin);
1010 InputSource *in = entityManager().open(sysid,
1025 Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
1027 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
1028 SdParam::reservedName + Sd::rDOCUMENT),
1031 if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
1032 sdBuilder.sd->setScopeInstance();
1036 Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
1038 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
1041 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
1042 SdParam::reservedName + Sd::rPUBLIC),
1046 if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
1047 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
1050 const MessageType1 *err;
1051 PublicId::TextClass textClass;
1052 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
1053 sdBuilder.addFormalError(currentLocation(),
1056 else if (id.getTextClass(textClass)
1057 && textClass != PublicId::SYNTAX)
1058 sdBuilder.addFormalError(currentLocation(),
1059 ParserMessages::syntaxTextClass,
1061 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
1062 SdParam::reservedName + Sd::rSWITCHES),
1065 Vector<UnivChar> charSwitches;
1066 if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
1067 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1070 SyntaxChar c = parm.n;
1071 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1073 sdBuilder.switcher.addSwitch(c, parm.n);
1074 if (!parseSdParam(AllowedSdParams(SdParam::number,
1075 SdParam::reservedName
1079 if (parm.type != SdParam::number)
1083 const StandardSyntaxSpec *spec = lookupSyntax(id);
1085 if (!setStandardSyntax(*sdBuilder.syntax,
1087 sdBuilder.sd->docCharset(),
1088 sdBuilder.switcher))
1089 sdBuilder.valid = 0;
1093 if (referencePublic(id, PublicId::SYNTAX, givenError)) {
1094 sdBuilder.externalSyntax = 1;
1096 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1100 if (!sdParseExplicitSyntax(sdBuilder, parm2))
1105 message(ParserMessages::unknownPublicSyntax,
1106 StringMessageArg(id.string()));
1107 sdBuilder.valid = 0;
1112 if (!sdParseExplicitSyntax(sdBuilder, parm))
1115 if (!sdBuilder.sd->scopeInstance()) {
1116 // we know the significant chars now
1117 ISet<WideChar> invalidSgmlChar;
1118 sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
1121 if (!invalidSgmlChar.isEmpty())
1122 message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
1124 checkSyntaxNamelen(*sdBuilder.syntax);
1125 checkSwitchesMarkup(sdBuilder.switcher);
1129 Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
1132 typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
1133 static SdParser parsers[] = {
1134 &Parser::sdParseShunchar,
1135 &Parser::sdParseSyntaxCharset,
1136 &Parser::sdParseFunction,
1137 &Parser::sdParseNaming,
1138 &Parser::sdParseDelim,
1139 &Parser::sdParseNames,
1140 &Parser::sdParseQuantity
1142 for (size_t i = 0; i < SIZEOF(parsers); i++)
1143 if (!(this->*(parsers[i]))(sdBuilder, parm))
1148 const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
1150 PublicId::OwnerType ownerType;
1151 if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
1154 if (!id.getOwner(str))
1156 if (str != sd().execToDoc("ISO 8879:1986")
1157 && str != sd().execToDoc("ISO 8879-1986"))
1159 PublicId::TextClass textClass;
1160 if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
1162 if (!id.getDescription(str))
1164 if (str == sd().execToDoc("Reference"))
1166 if (str == sd().execToDoc("Core"))
1171 Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
1173 UnivCharsetDesc desc;
1174 if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
1176 sdBuilder.syntaxCharset.set(desc);
1177 checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
1178 for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
1179 if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
1180 message(ParserMessages::switchNotInCharset,
1181 NumberMessageArg(sdBuilder.switcher.switchTo(i)));
1182 ISet<WideChar> missing;
1183 findMissingMinimum(sdBuilder.syntaxCharset, missing);
1184 if (!missing.isEmpty())
1185 message(ParserMessages::missingMinimumChars,
1186 CharsetMessageArg(missing));
1190 Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
1192 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
1193 SdParam::reservedName + Sd::rCONTROLS,
1194 SdParam::number), parm))
1196 if (parm.type == SdParam::reservedName + Sd::rNONE) {
1197 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
1202 if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
1203 sdBuilder.syntax->setShuncharControls();
1205 if (parm.n <= charMax)
1206 sdBuilder.syntax->addShunchar(Char(parm.n));
1209 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
1210 SdParam::number), parm))
1212 if (parm.type != SdParam::number)
1214 if (parm.n <= charMax)
1215 sdBuilder.syntax->addShunchar(Char(parm.n));
1220 Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
1222 static Sd::ReservedName standardNames[3] = {
1223 Sd::rRE, Sd::rRS, Sd::rSPACE
1225 for (int i = 0; i < 3; i++) {
1226 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1227 + standardNames[i]),
1230 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1233 if (translateSyntax(sdBuilder, parm.n, c)) {
1234 if (checkNotFunction(*sdBuilder.syntax, c))
1235 sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
1237 sdBuilder.valid = 0;
1240 Boolean haveMsichar = 0;
1241 Boolean haveMsochar = 0;
1243 if (!parseSdParam(sdBuilder.externalSyntax
1244 ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
1245 : AllowedSdParams(SdParam::name),
1248 Boolean nameWasLiteral;
1249 Boolean invalidName = 0;
1251 if (parm.type == SdParam::paramLiteral) {
1253 if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
1257 parm.token.swap(name);
1260 if (!parseSdParam(nameWasLiteral
1261 ? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
1262 SdParam::reservedName + Sd::rMSICHAR,
1263 SdParam::reservedName + Sd::rMSOCHAR,
1264 SdParam::reservedName + Sd::rMSSCHAR,
1265 SdParam::reservedName + Sd::rSEPCHAR)
1266 : AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
1267 SdParam::reservedName + Sd::rMSICHAR,
1268 SdParam::reservedName + Sd::rMSOCHAR,
1269 SdParam::reservedName + Sd::rMSSCHAR,
1270 SdParam::reservedName + Sd::rSEPCHAR,
1271 SdParam::reservedName + Sd::rLCNMSTRT),
1274 if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
1275 if (name != sd().reservedName(Sd::rNAMING))
1276 message(ParserMessages::namingBeforeLcnmstrt,
1277 StringMessageArg(name));
1280 if (!nameWasLiteral) {
1283 if (!translateName(sdBuilder, tem, name))
1286 Syntax::FunctionClass functionClass;
1287 switch (parm.type) {
1288 case SdParam::reservedName + Sd::rFUNCHAR:
1289 functionClass = Syntax::cFUNCHAR;
1291 case SdParam::reservedName + Sd::rMSICHAR:
1293 functionClass = Syntax::cMSICHAR;
1295 case SdParam::reservedName + Sd::rMSOCHAR:
1297 functionClass = Syntax::cMSOCHAR;
1299 case SdParam::reservedName + Sd::rMSSCHAR:
1300 functionClass = Syntax::cMSSCHAR;
1302 case SdParam::reservedName + Sd::rSEPCHAR:
1303 functionClass = Syntax::cSEPCHAR;
1308 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1311 if (translateSyntax(sdBuilder, parm.n, c)
1312 && checkNotFunction(*sdBuilder.syntax, c)
1315 if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
1316 message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
1318 sdBuilder.syntax->addFunctionChar(name, functionClass, c);
1321 if (haveMsochar && !haveMsichar)
1322 message(ParserMessages::msocharRequiresMsichar);
1326 Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
1328 static Sd::ReservedName keys[4] = {
1329 Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE
1332 ISet<Char> nameStartChar;
1333 ISet<Char> nameChar;
1335 String<SyntaxChar> lc;
1336 Vector<size_t> rangeIndex;
1338 Boolean allowThrough = 0;
1340 if (!parseSdParam(sdBuilder.externalSyntax
1341 ? AllowedSdParams(SdParam::reservedName
1342 + keys[isNamechar * 2],
1343 SdParam::paramLiteral,
1347 ? AllowedSdParams(SdParam::paramLiteral)
1348 : AllowedSdParams(SdParam::reservedName
1349 + keys[isNamechar * 2])),
1353 Boolean wasRange = 0;
1354 sdParamConvertToLiteral(parm);
1355 if (parm.type == SdParam::ellipsis) {
1357 message(ParserMessages::sdInvalidEllipsis);
1358 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1362 sdParamConvertToLiteral(parm);
1363 if (parm.paramLiteralText.size() == 0)
1364 message(ParserMessages::sdInvalidEllipsis);
1365 else if (allowThrough) {
1366 SyntaxChar n = parm.paramLiteralText[0];
1367 if (n < lc[lc.size() - 1])
1368 message(ParserMessages::sdInvalidRange);
1369 else if (n > lc[lc.size() - 1] + 1)
1370 rangeIndex.push_back(lc.size() - 1);
1374 if (parm.type != SdParam::paramLiteral)
1376 lc += parm.paramLiteralText;
1377 allowThrough = (parm.paramLiteralText.size() - wasRange) > 0;
1380 size_t rangeIndexPos = 0;
1381 unsigned long rangeLeft = 0;
1382 SyntaxChar nextRangeChar;
1383 ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
1384 String<SyntaxChar> chars;
1388 if (!parseSdParam(sdBuilder.externalSyntax
1389 ? AllowedSdParams(SdParam::reservedName
1390 + keys[isNamechar * 2 + 1],
1391 SdParam::paramLiteral,
1395 ? AllowedSdParams(SdParam::paramLiteral)
1396 : AllowedSdParams(SdParam::reservedName
1397 + keys[isNamechar * 2 + 1])),
1400 sdParamConvertToLiteral(parm);
1402 Boolean isRange = parm.type == SdParam::ellipsis;
1403 size_t nChars = chars.size();
1406 for (size_t i = 0; i < nChars; i++) {
1408 && rangeIndexPos < rangeIndex.size()
1409 && rangeIndex[rangeIndexPos] == lcPos) {
1410 rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
1411 nextRangeChar = lc[lcPos];
1416 if (rangeLeft > 0) {
1418 c = nextRangeChar++;
1420 else if (lcPos < lc.size())
1426 // map from c to chars[i]
1427 Char transLc, transUc;
1428 if (translateSyntax(sdBuilder, c, transLc)
1429 && translateSyntax(sdBuilder, chars[i], transUc)) {
1431 if (transLc != transUc) {
1433 sdBuilder.syntax->addSubst(transLc, transUc);
1438 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1442 sdParamConvertToLiteral(parm);
1443 if (chars.size() == 0 || parm.paramLiteralText.size() == 0)
1444 message(ParserMessages::sdInvalidEllipsis);
1446 SyntaxChar start = chars[chars.size() - 1];
1447 SyntaxChar end = parm.paramLiteralText[0];
1449 message(ParserMessages::sdInvalidRange);
1451 size_t count = end + 1 - start;
1454 && rangeIndexPos < rangeIndex.size()
1455 && rangeIndex[rangeIndexPos] == lcPos) {
1456 rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
1457 nextRangeChar = lc[lcPos];
1462 if (rangeLeft > 0) {
1464 c = nextRangeChar++;
1466 else if (lcPos < lc.size())
1472 if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
1476 else if (rangeLeft < count)
1480 translateRange(sdBuilder, start, start + (count - 1), set);
1485 Char transLc, transUc;
1486 if (translateSyntax(sdBuilder, c, transLc)
1487 && translateSyntax(sdBuilder, start, transUc)) {
1489 if (transLc != transUc) {
1491 sdBuilder.syntax->addSubst(transLc, transUc);
1501 if (parm.type != SdParam::paramLiteral)
1503 chars.append(parm.paramLiteralText.data() + 1,
1504 parm.paramLiteralText.size() - 1);
1506 else if (parm.type == SdParam::paramLiteral)
1507 parm.paramLiteralText.swap(chars);
1511 if ((runOut && !sdBuilder.externalSyntax)
1512 || rangeLeft > 0 || lcPos < lc.size())
1514 ? ParserMessages::nmcharLength
1515 : ParserMessages::nmstrtLength);
1516 if (!checkNmchars(set, *sdBuilder.syntax))
1517 sdBuilder.valid = 0;
1518 } while (!isNamechar++);
1520 intersectCharSets(nameStartChar, nameChar, bad);
1521 if (!bad.isEmpty()) {
1522 sdBuilder.valid = 0;
1523 message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
1525 sdBuilder.syntax->addNameStartCharacters(nameStartChar);
1526 sdBuilder.syntax->addNameCharacters(nameChar);
1527 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
1530 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1531 SdParam::reservedName + Sd::rYES),
1534 sdBuilder.syntax->setNamecaseGeneral(parm.type
1535 == SdParam::reservedName + Sd::rYES);
1537 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
1540 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1541 SdParam::reservedName + Sd::rYES),
1544 sdBuilder.syntax->setNamecaseEntity(parm.type
1545 == SdParam::reservedName + Sd::rYES);
1549 Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
1553 intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
1554 if (!bad.isEmpty()) {
1555 message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
1559 intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
1560 if (!bad.isEmpty()) {
1561 message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
1566 if (syntax.getStandardFunction(Syntax::fRE, funChar)
1567 && set.contains(funChar)) {
1568 message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
1571 if (syntax.getStandardFunction(Syntax::fRS, funChar)
1572 && set.contains(funChar)) {
1573 message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
1576 if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
1577 && set.contains(funChar)) {
1578 message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
1581 intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
1582 if (!bad.isEmpty()) {
1583 message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
1589 // Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
1591 void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
1592 ISet<WideChar> &inter)
1594 ISetIter<Char> i1(s1);
1595 ISetIter<Char> i2(s2);
1596 Char min1, max1, min2, max2;
1597 if (!i1.next(min1, max1))
1599 if (!i2.next(min2, max2))
1603 if (!i1.next(min1, max1))
1606 else if (max2 < min1) {
1607 if (!i2.next(min2, max2))
1613 Char min = min1 > min2 ? min1 : min2;
1614 Char max = max1 < max2 ? max1 : max2;
1615 inter.addRange(min, max);
1616 if (!i1.next(min1, max1))
1618 if (!i2.next(min2, max2))
1624 Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
1626 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
1629 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
1632 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1635 PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
1636 for (int i = 0; i < Syntax::nDelimGeneral; i++)
1637 delimGeneralSpecified[i] = 0;
1639 if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
1640 SdParam::reservedName + Sd::rSHORTREF),
1643 if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
1645 Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
1646 if (delimGeneralSpecified[delimGeneral])
1647 message(ParserMessages::duplicateDelimGeneral,
1648 StringMessageArg(sd().generalDelimiterName(delimGeneral)));
1649 if (!parseSdParam(sdBuilder.externalSyntax
1650 ? AllowedSdParams(SdParam::paramLiteral,
1652 : AllowedSdParams(SdParam::paramLiteral),
1655 sdParamConvertToLiteral(parm);
1657 if (parm.paramLiteralText.size() == 0)
1658 message(ParserMessages::sdEmptyDelimiter);
1659 else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
1660 const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
1661 for (size_t i = 0; i < str.size(); i++)
1662 table->subst(str[i]);
1663 if (checkGeneralDelim(*sdBuilder.syntax, str)
1664 && !delimGeneralSpecified[delimGeneral])
1665 sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
1667 sdBuilder.valid = 0;
1669 delimGeneralSpecified[delimGeneral] = 1;
1671 if (!setRefDelimGeneral(*sdBuilder.syntax,
1672 sdBuilder.syntaxCharset,
1673 sdBuilder.sd->docCharset(),
1674 sdBuilder.switcher))
1675 sdBuilder.valid = 0;
1676 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
1677 SdParam::reservedName + Sd::rNONE),
1680 if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
1681 if (!addRefDelimShortref(*sdBuilder.syntax,
1682 sdBuilder.syntaxCharset,
1683 sdBuilder.sd->docCharset(),
1684 sdBuilder.switcher))
1685 sdBuilder.valid = 0;
1687 String<SyntaxChar> lastLiteral;
1689 if (!parseSdParam(sdBuilder.externalSyntax
1690 ? AllowedSdParams(SdParam::paramLiteral,
1693 SdParam::reservedName + Sd::rNAMES)
1694 : AllowedSdParams(SdParam::paramLiteral,
1695 SdParam::reservedName + Sd::rNAMES),
1698 sdParamConvertToLiteral(parm);
1699 if (parm.type == SdParam::ellipsis) {
1700 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1704 sdParamConvertToLiteral(parm);
1705 if (parm.paramLiteralText.size() == 0)
1706 message(ParserMessages::sdEmptyDelimiter);
1707 else if (lastLiteral.size() != 1
1708 || parm.paramLiteralText.size() != 1)
1709 message(ParserMessages::sdInvalidEllipsis);
1710 else if (parm.paramLiteralText[0] < lastLiteral[0])
1711 message(ParserMessages::sdInvalidRange);
1712 else if (parm.paramLiteralText[0] != lastLiteral[0]) {
1713 ISet<Char> shortrefChars;
1714 translateRange(sdBuilder,
1716 parm.paramLiteralText[0],
1718 ISet<WideChar> duplicates;
1719 intersectCharSets(shortrefChars,
1720 sdBuilder.syntax->delimShortrefSimple(),
1722 int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
1723 for (int i = 0; i < nComplexShortrefs; i++) {
1724 const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
1725 if (delim.size() == 1 && shortrefChars.contains(delim[0]))
1726 duplicates.add(delim[0]);
1728 if (!duplicates.isEmpty())
1729 message(ParserMessages::duplicateDelimShortrefSet,
1730 CharsetMessageArg(duplicates));
1731 sdBuilder.syntax->addDelimShortrefs(shortrefChars,
1732 sdBuilder.sd->docCharset());
1734 lastLiteral.resize(0);
1736 else if (parm.type == SdParam::paramLiteral) {
1737 parm.paramLiteralText.swap(lastLiteral);
1739 if (lastLiteral.size() == 0)
1740 message(ParserMessages::sdEmptyDelimiter);
1741 else if (translateSyntax(sdBuilder, lastLiteral, str)) {
1742 const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
1743 for (size_t i = 0; i < str.size(); i++)
1744 table->subst(str[i]);
1746 || checkShortrefDelim(*sdBuilder.syntax,
1747 sdBuilder.sd->docCharset(),
1749 if (sdBuilder.syntax->isValidShortref(str))
1750 message(ParserMessages::duplicateDelimShortref,
1751 StringMessageArg(str));
1753 sdBuilder.syntax->addDelimShortref(str,
1754 sdBuilder.sd->docCharset());
1764 Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
1766 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1770 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
1771 SdParam::referenceReservedName),
1774 if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
1776 Syntax::ReservedName reservedName = parm.reservedNameIndex;
1777 if (!parseSdParam(sdBuilder.externalSyntax
1778 ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
1779 : AllowedSdParams(SdParam::name),
1783 if (parm.type == SdParam::name
1784 ? translateName(sdBuilder, parm.token, transName)
1785 : translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
1786 Syntax::ReservedName tem;
1787 if (sdBuilder.syntax->lookupReservedName(transName, &tem))
1788 message(ParserMessages::ambiguousReservedName,
1789 StringMessageArg(transName));
1791 if (transName.size() == 0
1792 || !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
1793 message(ParserMessages::reservedNameSyntax,
1794 StringMessageArg(transName));
1795 transName.resize(0);
1798 // Check that its a valid name in the declared syntax
1799 // (- and . might not be name characters).
1800 for (i = 1; i < transName.size(); i++)
1801 if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
1802 message(ParserMessages::reservedNameSyntax,
1803 StringMessageArg(transName));
1804 transName.resize(0);
1807 for (i = 0; i < transName.size(); i++)
1808 sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
1809 if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
1810 message(ParserMessages::duplicateReservedName,
1811 StringMessageArg(syntax().reservedName(reservedName)));
1812 else if (transName.size() > 0)
1813 sdBuilder.syntax->setName(reservedName, transName);
1815 sdBuilder.valid = 0;
1819 setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
1820 static Syntax::ReservedName functionNameIndex[3] = {
1821 Syntax::rRE, Syntax::rRS, Syntax::rSPACE
1823 for (int i = 0; i < 3; i++) {
1824 const StringC &functionName
1825 = sdBuilder.syntax->reservedName(functionNameIndex[i]);
1827 if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
1828 message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
1830 sdBuilder.syntax->enterStandardFunctionNames();
1834 Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
1836 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1840 int final = (sdBuilder.externalSyntax
1842 : SdParam::reservedName + Sd::rFEATURES);
1843 if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm))
1845 if (parm.type != SdParam::quantityName)
1847 Syntax::Quantity quantity = parm.quantityIndex;
1848 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1850 sdBuilder.syntax->setQuantity(quantity, parm.n);
1852 if (sdBuilder.sd->scopeInstance()) {
1853 for (int i = 0; i < Syntax::nQuantity; i++)
1854 if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
1855 < syntax().quantity(Syntax::Quantity(i)))
1856 message(ParserMessages::scopeInstanceQuantity,
1857 StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
1862 Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
1864 struct FeatureInfo {
1865 Sd::ReservedName name;
1872 static FeatureInfo features[] = {
1873 { Sd::rMINIMIZE, FeatureInfo::__none },
1874 { Sd::rDATATAG, FeatureInfo::__boolean },
1875 { Sd::rOMITTAG, FeatureInfo::__boolean },
1876 { Sd::rRANK, FeatureInfo::__boolean },
1877 { Sd::rSHORTTAG, FeatureInfo::__boolean },
1878 { Sd::rLINK, FeatureInfo::__none },
1879 { Sd::rSIMPLE, FeatureInfo::__number },
1880 { Sd::rIMPLICIT, FeatureInfo::__boolean },
1881 { Sd::rEXPLICIT, FeatureInfo::__number },
1882 { Sd::rOTHER, FeatureInfo::__none },
1883 { Sd::rCONCUR, FeatureInfo::__number },
1884 { Sd::rSUBDOC, FeatureInfo::__number },
1885 { Sd::rFORMAL, FeatureInfo::__boolean }
1887 int booleanFeature = 0;
1888 int numberFeature = 0;
1889 for (size_t i = 0; i < SIZEOF(features); i++) {
1890 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1891 + features[i].name), parm))
1893 if (features[i].arg != FeatureInfo::__none) {
1894 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1895 SdParam::reservedName + Sd::rYES),
1899 if (features[i].name == Sd::rDATATAG
1900 && parm.type == (SdParam::reservedName + Sd::rYES))
1901 message(ParserMessages::datatagNotImplemented);
1903 if (features[i].arg == FeatureInfo::__number) {
1904 if (parm.type == SdParam::reservedName + Sd::rYES) {
1905 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1907 sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
1911 sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
1915 sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
1916 parm.type == (SdParam::reservedName
1923 Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
1925 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
1928 Location location(currentLocation());
1929 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
1930 SdParam::minimumLiteral),
1933 AppinfoEvent *event;
1934 if (parm.type == SdParam::minimumLiteral)
1935 event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
1937 event = new (eventAllocator()) AppinfoEvent(location);
1938 eventHandler().appinfo(event);
1942 Boolean Parser::translateSyntax(CharSwitcher &switcher,
1943 const CharsetInfo &syntaxCharset,
1944 const CharsetInfo &docCharset,
1945 WideChar syntaxChar,
1948 syntaxChar = switcher.subst(syntaxChar);
1950 if (syntaxCharset.descToUniv(syntaxChar, univChar)
1951 && univToDescCheck(docCharset, univChar, docChar))
1953 message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
1957 void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
1958 SyntaxChar end, ISet<Char> &chars)
1963 if (!translateSyntax(sdBuilder, start, docChar))
1966 } while (start++ != end);
1969 SyntaxChar doneUpTo = end;
1970 Boolean gotSwitch = 0;
1971 WideChar firstSwitch;
1972 for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
1973 WideChar c = sdBuilder.switcher.switchFrom(i);
1974 if (start <= c && c <= end) {
1979 else if (c < firstSwitch)
1983 if (gotSwitch && firstSwitch == start) {
1986 if (translateSyntax(sdBuilder, start, docChar))
1991 doneUpTo = firstSwitch - 1;
1994 if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
1995 if (count - 1 < doneUpTo - start)
1996 doneUpTo = start + (count - 1);
1997 chars.addRange(docChar, docChar + (doneUpTo - start));
2000 if (doneUpTo == end)
2002 start = doneUpTo + 1;
2006 Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
2007 WideChar syntaxChar, Char &docChar)
2010 return translateSyntaxNoSwitch(sdBuilder,
2011 sdBuilder.switcher.subst(syntaxChar),
2016 Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
2017 WideChar syntaxChar, Char &docChar,
2022 CharsetDeclRange::Type type;
2024 if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
2030 ISet<WideChar> docChars;
2032 case CharsetDeclRange::unused:
2034 case CharsetDeclRange::string:
2035 sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
2037 case CharsetDeclRange::number:
2040 sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
2041 if (!docChars.isEmpty() && count2 < count)
2048 if (!docChars.isEmpty()) {
2049 if (!docChars.isSingleton() && options().warnSgmlDecl)
2050 message(ParserMessages::ambiguousDocCharacter,
2051 CharsetMessageArg(docChars));
2052 ISetIter<WideChar> iter(docChars);
2054 if (iter.next(min, max) && min <= charMax) {
2055 docChar = Char(min);
2061 WideChar alsoMax, count2;
2062 if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
2063 && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar,
2065 count = (alsoMax - syntaxChar) + 1;
2070 sdBuilder.valid = 0;
2071 message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
2076 Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
2077 const String<SyntaxChar> &syntaxString,
2080 docString.resize(0);
2082 for (size_t i = 0; i < syntaxString.size(); i++) {
2084 if (translateSyntax(sdBuilder, syntaxString[i], c))
2092 Boolean Parser::translateName(SdBuilder &sdBuilder,
2093 const StringC &name,
2096 str.resize(name.size());
2097 for (size_t i = 0; i < name.size(); i++) {
2099 Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
2100 // Might switch hyphen or period.
2101 univChar = translateUniv(univChar, sdBuilder.switcher,
2102 sdBuilder.syntaxCharset);
2104 if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
2105 message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
2106 sdBuilder.valid = 0;
2113 UnivChar Parser::translateUniv(UnivChar univChar,
2114 CharSwitcher &switcher,
2115 const CharsetInfo &syntaxCharset)
2117 WideChar syntaxChar;
2118 ISet<WideChar> syntaxChars;
2119 if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
2120 message(ParserMessages::missingSyntaxChar,
2121 NumberMessageArg(univChar));
2124 SyntaxChar tem = switcher.subst(syntaxChar);
2125 if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
2126 message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem));
2130 Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
2132 if (syn.charSet(Syntax::functionChar)->contains(c)) {
2133 message(ParserMessages::oneFunction, NumberMessageArg(c));
2141 // Check that it has at most one B sequence and that it
2142 // is not adjacent to a blank sequence.
2144 Boolean Parser::checkShortrefDelim(const Syntax &syn,
2145 const CharsetInfo &charset,
2146 const StringC &delim)
2149 Char letterB = charset.execToDesc('B');
2150 const ISet<Char> *bSet = syn.charSet(Syntax::blank);
2151 for (size_t i = 0; i < delim.size(); i++)
2152 if (delim[i] == letterB) {
2154 message(ParserMessages::multipleBSequence, StringMessageArg(delim));
2158 if (i > 0 && bSet->contains(delim[i - 1])) {
2159 message(ParserMessages::blankAdjacentBSequence,
2160 StringMessageArg(delim));
2163 while (i + 1 < delim.size() && delim[i + 1] == letterB)
2165 if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
2166 message(ParserMessages::blankAdjacentBSequence,
2167 StringMessageArg(delim));
2174 Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
2176 const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
2177 if (delim.size() > 0) {
2178 Boolean allFunction = 1;
2179 for (size_t i = 0; i < delim.size(); i++)
2180 if (!functionSet->contains(delim[i]))
2183 message(ParserMessages::generalDelimAllFunction,
2184 StringMessageArg(delim));
2191 Boolean Parser::checkSwitches(CharSwitcher &switcher,
2192 const CharsetInfo &syntaxCharset)
2195 for (size_t i = 0; i < switcher.nSwitches(); i++) {
2197 c[0] = switcher.switchFrom(i);
2198 c[1] = switcher.switchTo(i);
2199 for (int j = 0; j < 2; j++) {
2201 if (syntaxCharset.descToUniv(c[j], univChar)) {
2202 // Check that it is not Digit Lcletter or Ucletter
2203 if ((UnivCharsetDesc::a <= univChar
2204 && univChar < UnivCharsetDesc::a + 26)
2205 || (UnivCharsetDesc::A <= univChar
2206 && univChar < UnivCharsetDesc::A + 26)
2207 || (UnivCharsetDesc::zero <= univChar
2208 && univChar < UnivCharsetDesc::zero + 10)) {
2209 message(ParserMessages::switchLetterDigit,
2210 NumberMessageArg(univChar));
2219 Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
2222 size_t nSwitches = switcher.nSwitches();
2223 for (size_t i = 0; i < nSwitches; i++)
2224 if (!switcher.switchUsed(i)) {
2225 // If the switch wasn't used,
2226 // then the character wasn't a markup character.
2227 message(ParserMessages::switchNotMarkup,
2228 NumberMessageArg(switcher.switchFrom(i)));
2234 void Parser::checkSyntaxNamelen(const Syntax &syn)
2236 size_t namelen = syn.namelen();
2238 for (i = 0; i < Syntax::nDelimGeneral; i++)
2239 if (syn.delimGeneral(i).size() > namelen)
2240 message(ParserMessages::delimiterLength,
2241 StringMessageArg(syn.delimGeneral(i)),
2242 NumberMessageArg(namelen));
2243 for (i = 0; i < syn.nDelimShortrefComplex(); i++)
2244 if (syn.delimShortrefComplex(i).size() > namelen)
2245 message(ParserMessages::delimiterLength,
2246 StringMessageArg(syn.delimShortrefComplex(i)),
2247 NumberMessageArg(namelen));
2248 for (i = 0; i < Syntax::nNames; i++)
2249 if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
2250 && options().warnSgmlDecl)
2251 message(ParserMessages::reservedNameLength,
2252 StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
2253 NumberMessageArg(namelen));
2256 Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
2260 return univToDescCheck(charset, from, to, count);
2263 Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
2264 Char &to, WideChar &count)
2267 ISet<WideChar> descSet;
2268 unsigned ret = charset.univToDesc(from, c, descSet, count);
2270 if (options().warnSgmlDecl)
2271 message(ParserMessages::ambiguousDocCharacter,
2272 CharsetMessageArg(descSet));
2275 if (ret && c <= charMax) {
2282 Boolean Parser::parseSdParam(const AllowedSdParams &allow,
2286 Token token = getToken(mdMode);
2288 case tokenUnrecognized:
2289 if (reportNonSgmlCharacter())
2292 message(ParserMessages::markupDeclarationCharacter,
2293 StringMessageArg(currentToken()),
2294 AllowedSdParamsMessageArg(allow, sdPointer()));
2298 if (allow.param(SdParam::eE)) {
2299 parm.type = SdParam::eE;
2300 if (currentMarkup())
2301 currentMarkup()->addEntityEnd();
2305 message(ParserMessages::sdEntityEnd,
2306 AllowedSdParamsMessageArg(allow, sdPointer()));
2309 if (currentMarkup())
2310 currentMarkup()->addS(currentChar());
2313 if (!parseComment(sdcomMode))
2318 case tokenMinusGrpo:
2321 case tokenPeroNameStart:
2323 sdParamInvalidToken(token, allow);
2325 case tokenLcUcNmchar:
2326 if (allow.param(SdParam::ellipsis)) {
2327 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
2328 getCurrentToken(syntax().generalSubstTable(), parm.token);
2329 if (parm.token == sd().execToDoc("...")) {
2330 parm.type = SdParam::ellipsis;
2333 message(ParserMessages::sdInvalidNameToken,
2334 StringMessageArg(parm.token),
2335 AllowedSdParamsMessageArg(allow, sdPointer()));
2338 sdParamInvalidToken(token, allow);
2344 Boolean lita = (token == tokenLita);
2345 if (allow.param(SdParam::minimumLiteral)) {
2346 if (!parseMinimumLiteral(lita, parm.literalText))
2348 parm.type = SdParam::minimumLiteral;
2349 if (currentMarkup())
2350 currentMarkup()->addLiteral(parm.literalText);
2352 else if (allow.param(SdParam::paramLiteral)) {
2353 if (!parseSdParamLiteral(lita, parm.paramLiteralText))
2355 parm.type = SdParam::paramLiteral;
2358 sdParamInvalidToken(token, allow);
2364 if (allow.param(SdParam::mdc)) {
2365 parm.type = SdParam::mdc;
2366 if (currentMarkup())
2367 currentMarkup()->addDelim(Syntax::dMDC);
2370 sdParamInvalidToken(tokenMdc, allow);
2372 case tokenNameStart:
2374 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
2375 getCurrentToken(syntax().generalSubstTable(), parm.token);
2376 if (allow.param(SdParam::capacityName)) {
2377 if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
2378 parm.type = SdParam::capacityName;
2379 if (currentMarkup())
2380 currentMarkup()->addName(currentInput());
2384 if (allow.param(SdParam::referenceReservedName)) {
2385 if (syntax().lookupReservedName(parm.token,
2386 &parm.reservedNameIndex)) {
2387 parm.type = SdParam::referenceReservedName;
2388 if (currentMarkup())
2389 currentMarkup()->addName(currentInput());
2393 if (allow.param(SdParam::generalDelimiterName)) {
2394 if (sd().lookupGeneralDelimiterName(parm.token,
2395 parm.delimGeneralIndex)) {
2396 parm.type = SdParam::generalDelimiterName;
2397 if (currentMarkup())
2398 currentMarkup()->addName(currentInput());
2402 if (allow.param(SdParam::quantityName)) {
2403 if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
2404 parm.type = SdParam::quantityName;
2405 if (currentMarkup())
2406 currentMarkup()->addName(currentInput());
2410 for (int i = 0;; i++) {
2411 SdParam::Type t = allow.get(i);
2412 if (t == SdParam::invalid)
2414 if (t >= SdParam::reservedName) {
2415 Sd::ReservedName sdReservedName
2416 = Sd::ReservedName(t - SdParam::reservedName);
2417 if (parm.token == sd().reservedName(sdReservedName)) {
2419 if (currentMarkup())
2420 currentMarkup()->addSdReservedName(sdReservedName,
2426 if (allow.param(SdParam::name)) {
2427 parm.type = SdParam::name;
2428 if (currentMarkup())
2429 currentMarkup()->addName(currentInput());
2433 message(ParserMessages::sdInvalidNameToken,
2434 StringMessageArg(parm.token),
2435 AllowedSdParamsMessageArg(allow, sdPointer()));
2440 if (allow.param(SdParam::number)) {
2441 extendNumber(syntax().namelen(), ParserMessages::numberLength);
2442 parm.type = SdParam::number;
2444 if (!stringToNumber(currentInput()->currentTokenStart(),
2445 currentInput()->currentTokenLength(),
2447 || n > Number(-1)) {
2448 message(ParserMessages::numberTooBig,
2449 StringMessageArg(currentToken()));
2450 parm.n = Number(-1);
2453 if (currentMarkup())
2454 currentMarkup()->addNumber(currentInput());
2457 Token token = getToken(mdMode);
2458 if (token == tokenNameStart)
2459 message(ParserMessages::psRequired);
2460 currentInput()->ungetToken();
2463 sdParamInvalidToken(tokenDigit, allow);
2471 // This is a separate function, because we might want SyntaxChar
2472 // to be bigger than Char.
2474 Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
2476 Location loc(currentLocation());
2478 SdText text(loc, lita); // first character of content
2480 const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
2482 Mode mode = lita ? sdplitaMode : sdplitMode;
2485 Token token = getToken(mode);
2488 message(ParserMessages::literalLevel);
2490 case tokenUnrecognized:
2491 if (reportNonSgmlCharacter())
2493 if (options().errorSignificant)
2494 message(ParserMessages::sdLiteralSignificant,
2495 StringMessageArg(currentToken()));
2496 text.addChar(currentChar(), currentLocation());
2500 InputSource *in = currentInput();
2501 Location startLocation = currentLocation();
2502 in->discardInitial();
2503 extendNumber(syntax().namelen(), ParserMessages::numberLength);
2506 if (!stringToNumber(in->currentTokenStart(),
2507 in->currentTokenLength(),
2509 || n > syntaxCharMax) {
2510 message(ParserMessages::syntaxCharacterNumber,
2511 StringMessageArg(currentToken()));
2516 Owner<Markup> markupPtr;
2517 if (eventsWanted().wantPrologMarkup()) {
2518 markupPtr = new Markup;
2519 markupPtr->addDelim(Syntax::dCRO);
2520 markupPtr->addNumber(in);
2521 switch (getToken(refMode)) {
2523 markupPtr->addDelim(Syntax::dREFC);
2526 markupPtr->addRefEndRe();
2533 (void)getToken(refMode);
2535 text.addChar(SyntaxChar(n),
2536 Location(new NumericCharRefOrigin(startLocation,
2537 currentLocation().index()
2538 + currentInput()->currentTokenLength()
2539 - startLocation.index(),
2544 case tokenCroNameStart:
2545 if (!parseNamedCharRef())
2552 case tokenPeroNameStart:
2554 message(ParserMessages::sdParameterEntity);
2556 Location loc(currentLocation());
2557 const Char *p = currentInput()->currentTokenStart();
2558 for (size_t count = currentInput()->currentTokenLength();
2561 text.addChar(*p++, loc);
2567 if (text.string().size() > refLitlen
2568 && currentChar() == syntax().standardFunction(Syntax::fRE)) {
2569 message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
2570 // guess that the closing delimiter has been omitted
2571 message(ParserMessages::literalClosingDelimiter);
2574 text.addChar(currentChar(), currentLocation());
2579 if (text.string().size() > refLitlen)
2580 message(ParserMessages::parameterLiteralLength,
2581 NumberMessageArg(refLitlen));
2583 str = text.string();
2584 if (currentMarkup())
2585 currentMarkup()->addSdLiteral(text);
2589 Boolean Parser::stringToNumber(const Char *s, size_t length,
2590 unsigned long &result)
2592 unsigned long n = 0;
2593 for (; length > 0; length--, s++) {
2594 int val = sd().digitWeight(*s);
2595 if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
2604 void Parser::sdParamInvalidToken(Token token,
2605 const AllowedSdParams &allow)
2607 message(ParserMessages::sdParamInvalidToken,
2608 TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
2609 AllowedSdParamsMessageArg(allow, sdPointer()));
2612 void Parser::sdParamConvertToLiteral(SdParam &parm)
2614 if (parm.type == SdParam::number) {
2615 parm.type = SdParam::paramLiteral;
2616 parm.paramLiteralText.resize(1);
2617 parm.paramLiteralText[0] = parm.n;
2621 AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
2622 SdParam::Type arg3, SdParam::Type arg4,
2623 SdParam::Type arg5, SdParam::Type arg6)
2633 Boolean AllowedSdParams::param(SdParam::Type t) const
2635 for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
2641 SdParam::Type AllowedSdParams::get(int i) const
2643 return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
2646 AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
2647 const AllowedSdParams &allow,
2648 const ConstPtr<Sd> &sd)
2649 : allow_(allow), sd_(sd)
2653 MessageArg *AllowedSdParamsMessageArg::copy() const
2655 return new AllowedSdParamsMessageArg(*this);
2658 void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
2660 for (int i = 0;; i++) {
2661 SdParam::Type type = allow_.get(i);
2662 if (type == SdParam::invalid)
2665 builder.appendFragment(ParserMessages::listSep);
2668 builder.appendFragment(ParserMessages::entityEnd);
2670 case SdParam::minimumLiteral:
2671 builder.appendFragment(ParserMessages::minimumLiteral);
2675 builder.appendFragment(ParserMessages::delimStart);
2676 Char c = sd_->execToDoc('>');
2677 builder.appendChars(&c, 1);
2678 builder.appendFragment(ParserMessages::delimEnd);
2681 case SdParam::number:
2682 builder.appendFragment(ParserMessages::number);
2685 builder.appendFragment(ParserMessages::name);
2687 case SdParam::paramLiteral:
2688 builder.appendFragment(ParserMessages::parameterLiteral);
2690 case SdParam::capacityName:
2691 builder.appendFragment(ParserMessages::capacityName);
2693 case SdParam::generalDelimiterName:
2694 builder.appendFragment(ParserMessages::generalDelimiteRoleName);
2696 case SdParam::referenceReservedName:
2697 builder.appendFragment(ParserMessages::referenceReservedName);
2699 case SdParam::quantityName:
2700 builder.appendFragment(ParserMessages::quantityName);
2702 case SdParam::ellipsis:
2704 StringC str(sd_->execToDoc("..."));
2705 builder.appendChars(str.data(), str.size());
2710 StringC str(sd_->reservedName(type - SdParam::reservedName));
2711 builder.appendChars(str.data(), str.size());
2718 SdBuilder::SdBuilder()
2719 : valid(1), externalSyntax(0)
2723 void SdBuilder::addFormalError(const Location &location,
2724 const MessageType1 &message,
2727 formalErrorList.insert(new SdFormalError(location, message, id));
2730 SdFormalError::SdFormalError(const Location &location,
2731 const MessageType1 &message,
2733 : location_(location),
2739 void SdFormalError::send(ParserState &parser)
2741 parser.Messenger::setNextLocation(location_);
2742 parser.message(*message_, StringMessageArg(id_));
2745 CharSwitcher::CharSwitcher()
2749 void CharSwitcher::addSwitch(WideChar from, WideChar to)
2751 switches_.push_back(from);
2752 switches_.push_back(to);
2753 switchUsed_.push_back(0);
2756 SyntaxChar CharSwitcher::subst(WideChar c)
2758 for (size_t i = 0; i < switches_.size(); i += 2)
2759 if (switches_[i] == c) {
2760 switchUsed_[i/2] = 1;
2761 return switches_[i + 1];
2766 size_t CharSwitcher::nSwitches() const
2768 return switchUsed_.size();
2771 Boolean CharSwitcher::switchUsed(size_t i) const
2773 return switchUsed_[i];
2776 WideChar CharSwitcher::switchFrom(size_t i) const
2778 return switches_[i*2];
2781 WideChar CharSwitcher::switchTo(size_t i) const
2783 return switches_[i*2 + 1];
2786 CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
2791 MessageArg *CharsetMessageArg::copy() const
2793 return new CharsetMessageArg(*this);
2796 void CharsetMessageArg::append(MessageBuilder &builder) const
2798 ISetIter<WideChar> iter(set_);
2801 while (iter.next(min, max)) {
2805 builder.appendFragment(ParserMessages::listSep);
2806 builder.appendNumber(min);
2808 builder.appendFragment(max == min + 1
2809 ? ParserMessages::listSep
2810 : ParserMessages::rangeSep);
2811 builder.appendNumber(max);