2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */
24 // Copyright (c) 1994, 1995 James Clark
25 // See the file COPYING for copying permission.
30 #include "SdFormalError.h"
31 #include "MessageBuilder.h"
32 #include "ParserMessages.h"
33 #include "MessageArg.h"
34 #include "CharsetRegistry.h"
37 #include "TokenMessageArg.h"
40 #include "NumericCharRefOrigin.h"
43 namespace SP_NAMESPACE {
49 void addSwitch(WideChar from, WideChar to);
50 SyntaxChar subst(WideChar c);
51 size_t nSwitches() const;
52 Boolean switchUsed(size_t i) const;
53 WideChar switchFrom(size_t i) const;
54 WideChar switchTo(size_t i) const;
56 Vector<PackedBoolean> switchUsed_;
57 Vector<WideChar> switches_;
60 // Information about the SGML declaration being built.
64 void addFormalError(const Location &, const MessageType1 &, const StringC &);
67 CharsetDecl syntaxCharsetDecl;
68 CharsetInfo syntaxCharset;
69 CharSwitcher switcher;
70 Boolean externalSyntax;
72 IList<SdFormalError> formalErrorList;
75 class CharsetMessageArg : public MessageArg {
77 CharsetMessageArg(const ISet<WideChar> &set);
78 MessageArg *copy() const;
79 void append(MessageBuilder &) const;
85 typedef unsigned char Type;
97 referenceReservedName,
99 reservedName // Sd::ReservedName is added to this
104 String<SyntaxChar> paramLiteralText;
107 Sd::Capacity capacityIndex;
108 Syntax::Quantity quantityIndex;
109 Syntax::ReservedName reservedNameIndex;
110 Syntax::DelimGeneral delimGeneralIndex;
114 class AllowedSdParams {
116 AllowedSdParams(SdParam::Type,
117 SdParam::Type = SdParam::invalid,
118 SdParam::Type = SdParam::invalid,
119 SdParam::Type = SdParam::invalid,
120 SdParam::Type = SdParam::invalid,
121 SdParam::Type = SdParam::invalid);
122 Boolean param(SdParam::Type) const;
123 SdParam::Type get(int i) const;
125 enum { maxAllow = 6 };
126 SdParam::Type allow_[maxAllow];
129 class AllowedSdParamsMessageArg : public MessageArg {
131 AllowedSdParamsMessageArg(const AllowedSdParams &allow,
132 const ConstPtr<Sd> &sd);
133 MessageArg *copy() const;
134 void append(MessageBuilder &) const;
136 AllowedSdParams allow_;
140 struct StandardSyntaxSpec {
141 struct AddedFunction {
143 Syntax::FunctionClass functionClass;
144 SyntaxChar syntaxChar;
146 const AddedFunction *addedFunction;
147 size_t nAddedFunction;
151 static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
152 { "TAB", Syntax::cSEPCHAR, 9 },
155 static StandardSyntaxSpec coreSyntax = {
156 coreFunctions, SIZEOF(coreFunctions), 0
159 static StandardSyntaxSpec refSyntax = {
160 coreFunctions, SIZEOF(coreFunctions), 1
163 void Parser::doInit()
169 // When document entity doesn't exist, don't give any errors
170 // other than the cannot open error.
171 if (currentInput()->get(messenger()) == InputSource::eE) {
172 if (currentInput()->accessError()) {
178 currentInput()->ungetToken();
179 const CharsetInfo &initCharset = sd().docCharset();
180 ISet<WideChar> missing;
181 findMissingMinimum(initCharset, missing);
182 if (!missing.isEmpty()) {
183 message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
189 if (scanForSgmlDecl(initCharset))
192 currentInput()->ungetToken();
193 if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
194 InputSource *in = entityManager().open(systemId,
196 new InputSourceOrigin,
201 if (scanForSgmlDecl(initCharset))
204 message(ParserMessages::badDefaultSgmlDecl);
211 if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
212 size_t nS = currentInput()->currentTokenLength() - 6;
213 for (size_t i = 0; i < nS; i++)
214 currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
215 currentMarkup()->addDelim(Syntax::dMDO);
216 currentMarkup()->addSdReservedName(Sd::rSGML,
217 currentInput()->currentTokenStart()
218 + (currentInput()->currentTokenLength() - 4),
221 Syntax *syntaxp = new Syntax(sd());
222 CharSwitcher switcher;
223 if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
229 syntaxp->implySgmlChar(sd().docCharset());
232 ConstPtr<Sd> refSd(sdPointer());
233 ConstPtr<Syntax> refSyntax(syntaxPointer());
234 if (!parseSgmlDecl()) {
238 // queue an SGML declaration event
239 eventHandler().sgmlDecl(new (eventAllocator())
240 SgmlDeclEvent(sdPointer(),
242 instanceSyntaxPointer(),
245 currentInput()->nextIndex(),
249 if (inputLevel() == 2) {
250 // FIXME perhaps check for junk after SGML declaration
255 if (!implySgmlDecl()) {
259 // queue an SGML declaration event
260 eventHandler().sgmlDecl(new (eventAllocator())
261 SgmlDeclEvent(sdPointer(),
265 // Now we have sd and syntax set up, prepare to parse the prolog.
266 compilePrologModes();
267 setPhase(prologPhase);
270 Boolean Parser::implySgmlDecl()
272 Syntax *syntaxp = new Syntax(sd());
273 const StandardSyntaxSpec *spec;
274 if (options().shortref)
278 CharSwitcher switcher;
279 if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher)) {
283 syntaxp->implySgmlChar(sd().docCharset());
284 for (int i = 0; i < Syntax::nQuantity; i++)
285 syntaxp->setQuantity(i, options().quantity[i]);
290 Boolean Parser::setStandardSyntax(Syntax &syn,
291 const StandardSyntaxSpec &spec,
292 const CharsetInfo &docCharset,
293 CharSwitcher &switcher)
295 static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
298 static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
299 SIZEOF(syntaxCharsetRanges));
300 static CharsetInfo syntaxCharset(syntaxCharsetDesc);
303 if (!checkSwitches(switcher, syntaxCharset))
306 for (i = 0; i < switcher.nSwitches(); i++)
307 if (switcher.switchTo(i) >= 128)
308 message(ParserMessages::switchNotInCharset,
309 NumberMessageArg(switcher.switchTo(i)));
310 static const Char shunchar[] = {
311 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
312 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
316 for (i = 0; i < SIZEOF(shunchar); i++)
317 syn.addShunchar(shunchar[i]);
318 syn.setShuncharControls();
319 static Syntax::StandardFunction standardFunctions[3] = {
320 Syntax::fRE, Syntax::fRS, Syntax::fSPACE
322 static SyntaxChar functionChars[3] = { 13, 10, 32 };
323 for (i = 0; i < 3; i++) {
325 if (translateSyntax(switcher,
330 && checkNotFunction(syn, docChar))
331 syn.setStandardFunction(standardFunctions[i], docChar);
335 for (i = 0; i < spec.nAddedFunction; i++) {
337 if (translateSyntax(switcher,
340 spec.addedFunction[i].syntaxChar,
342 && checkNotFunction(syn, docChar))
343 syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
344 spec.addedFunction[i].functionClass,
350 static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
351 ISet<Char> nameCharSet;
352 for (i = 0; i < 2; i++) {
354 if (translateSyntax(switcher,
359 nameCharSet.add(docChar);
363 if (!checkNmchars(nameCharSet, syn))
366 syn.addNameCharacters(nameCharSet);
367 syn.setNamecaseGeneral(1);
368 syn.setNamecaseEntity(0);
369 if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher))
371 setRefNames(syn, docCharset);
372 syn.enterStandardFunctionNames();
374 && !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher))
379 Boolean Parser::setRefDelimGeneral(Syntax &syntax,
380 const CharsetInfo &syntaxCharset,
381 const CharsetInfo &docCharset,
382 CharSwitcher &switcher)
384 // Column 3 from Figure 3
385 static const char delims[][2] = {
419 ISet<WideChar> missing;
420 for (int i = 0; i < Syntax::nDelimGeneral; i++)
421 if (syntax.delimGeneral(i).size() == 0) {
424 for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
425 UnivChar univChar = translateUniv(delims[i][j], switcher,
428 if (univToDescCheck(docCharset, univChar, c))
435 if (delim.size() == j) {
436 if (checkGeneralDelim(syntax, delim))
437 syntax.setDelimGeneral(i, delim);
442 if (!missing.isEmpty())
443 message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
447 void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
449 static const char *const referenceNames[] = {
508 for (i = 0; i < Syntax::nNames; i++) {
509 StringC docName(docCharset.execToDesc(referenceNames[i]));
510 Syntax::ReservedName tem;
511 if (syntax.lookupReservedName(docName, &tem))
512 message(ParserMessages::nameReferenceReservedName,
513 StringMessageArg(docName));
514 if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
515 syntax.setName(i, docName);
519 Boolean Parser::addRefDelimShortref(Syntax &syntax,
520 const CharsetInfo &syntaxCharset,
521 const CharsetInfo &docCharset,
522 CharSwitcher &switcher)
524 // Column 2 from Figure 4
525 static const char delimShortref[][3] = {
559 ISet<WideChar> missing;
561 for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
565 for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
567 UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
569 if (univToDescCheck(docCharset, univChar, c))
574 if (delim.size() == j) {
575 if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
576 message(ParserMessages::duplicateDelimShortref,
577 StringMessageArg(delim));
579 syntax.addDelimShortref(delim, docCharset);
582 if (!missing.isEmpty())
583 message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
587 // Determine whether the document starts with an SGML declaration.
588 // There is no current syntax at this point.
590 Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
593 if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
596 if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
599 if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
602 if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
604 InputSource *in = currentInput();
605 Xchar c = in->get(messenger());
606 while (c == rs || c == space || c == re || c == tab)
607 c = in->tokenChar(messenger());
608 if (c != initCharset.execToDesc('<'))
610 if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
612 c = in->tokenChar(messenger());
613 if (c != initCharset.execToDesc('S')
614 && c != initCharset.execToDesc('s'))
616 c = in->tokenChar(messenger());
617 if (c != initCharset.execToDesc('G')
618 && c != initCharset.execToDesc('g'))
620 c = in->tokenChar(messenger());
621 if (c != initCharset.execToDesc('M')
622 && c != initCharset.execToDesc('m'))
624 c = in->tokenChar(messenger());
625 if (c != initCharset.execToDesc('L')
626 && c != initCharset.execToDesc('l'))
628 c = in->tokenChar(messenger());
629 // Don't recognize this if SGML is followed by a name character.
630 if (c == InputSource::eE)
632 in->endToken(in->currentTokenLength() - 1);
633 if (c == initCharset.execToDesc('-'))
635 if (c == initCharset.execToDesc('.'))
638 if (!initCharset.descToUniv(c, univ))
640 if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
642 if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
644 if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
649 void Parser::findMissingMinimum(const CharsetInfo &charset,
650 ISet<WideChar> &missing)
654 for (i = 0; i < 26; i++) {
655 if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
656 missing += UnivCharsetDesc::A + i;
657 if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
658 missing += UnivCharsetDesc::a + i;
660 for (i = 0; i < 10; i++) {
662 if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
663 missing += UnivCharsetDesc::zero + i;
665 static const UnivChar special[] = {
666 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
669 for (i = 0; i < SIZEOF(special); i++)
670 if (!univToDescCheck(charset, special[i], to))
671 missing += special[i];
675 Boolean Parser::parseSgmlDecl()
680 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
682 StringC version(sd().execToDoc("ISO 8879:1986"));
683 if (parm.literalText.string() != version)
684 message(ParserMessages::standardVersion,
685 StringMessageArg(parm.literalText.string()));
686 sdBuilder.sd = new Sd;
687 typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
688 static SdParser parsers[] = {
689 &Parser::sdParseDocumentCharset,
690 &Parser::sdParseCapacity,
691 &Parser::sdParseScope,
692 &Parser::sdParseSyntax,
693 &Parser::sdParseFeatures,
694 &Parser::sdParseAppinfo,
696 for (size_t i = 0; i < SIZEOF(parsers); i++) {
697 if (!(this->*(parsers[i]))(sdBuilder, parm))
699 if (!sdBuilder.valid)
702 if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
704 if (sdBuilder.sd->formal()) {
705 while (!sdBuilder.formalErrorList.empty()) {
706 SdFormalError *p = sdBuilder.formalErrorList.get();
707 ParserState *state = this; // work around lcc 3.0 bug
712 setSd(sdBuilder.sd.pointer());
713 if (sdBuilder.sd->scopeInstance()) {
714 Syntax *proSyntax = new Syntax(sd());
715 CharSwitcher switcher;
716 setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
717 proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
718 ISet<WideChar> invalidSgmlChar;
719 proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
720 sdBuilder.syntax.pointer(),
722 sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
725 if (!invalidSgmlChar.isEmpty())
726 message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
727 setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
730 setSyntax(sdBuilder.syntax.pointer());
731 if (syntax().multicode())
732 currentInput()->setMarkupScanTable(syntax().markupScanTable());
736 Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
738 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
741 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
745 UnivCharsetDesc desc;
746 if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
748 ISet<WideChar> missing;
749 findMissingMinimum(desc, missing);
750 if (!missing.isEmpty()) {
751 message(ParserMessages::missingMinimumChars,
752 CharsetMessageArg(missing));
756 decl.usedSet(sgmlChar);
757 sdBuilder.sd->setDocCharsetDesc(desc);
758 sdBuilder.sd->setDocCharsetDecl(decl);
759 sdBuilder.syntax = new Syntax(*sdBuilder.sd);
760 sdBuilder.syntax->setSgmlChar(sgmlChar);
764 Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
768 UnivCharsetDesc &desc)
771 ISet<WideChar> multiplyDeclared;
772 // This is for checking whether the syntax reference character set
773 // is ISO 646 when SCOPE is INSTANCE.
774 Boolean maybeISO646 = 1;
776 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
778 UnivCharsetDesc baseDesc;
781 PublicId::TextClass textClass;
782 const MessageType1 *err;
783 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
784 sdBuilder.addFormalError(currentLocation(),
787 else if (id.getTextClass(textClass)
788 && textClass != PublicId::CHARSET)
789 sdBuilder.addFormalError(currentLocation(),
790 ParserMessages::basesetTextClass,
793 if (referencePublic(id, PublicId::CHARSET, givenError))
794 found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
795 else if (!givenError) {
796 found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
797 if (!found && options().warnSgmlDecl)
798 message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
805 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
808 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
811 WideChar min = parm.n;
812 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
814 Number count = parm.n;
816 if (options().warnSgmlDecl && count == 0)
817 message(ParserMessages::zeroNumberOfCharacters);
818 decl.rangeDeclared(min, count, multiplyDeclared);
821 && (min > charMax || count - 1 > charMax - min)) {
822 message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
823 adjCount = min > charMax ? 0 : 1 + (charMax - min);
829 if (!parseSdParam(AllowedSdParams(SdParam::number,
830 SdParam::minimumLiteral,
831 SdParam::reservedName + Sd::rUNUSED),
835 case SdParam::number:
836 decl.addRange(min, count, parm.n);
837 if (found && adjCount > 0) {
838 ISet<WideChar> baseMissing;
839 desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
841 if (!baseMissing.isEmpty() && options().warnSgmlDecl)
842 message(ParserMessages::basesetCharsMissing,
843 CharsetMessageArg(baseMissing));
846 case SdParam::reservedName + Sd::rUNUSED:
847 decl.addRange(min, count);
849 case SdParam::minimumLiteral:
851 UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string());
852 if (adjCount > 256) {
853 message(ParserMessages::tooManyCharsMinimumLiteral);
856 for (Number i = 0; i < adjCount; i++)
857 desc.addRange(min + i, min + i, c);
860 decl.addRange(min, count, parm.literalText.string());
865 SdParam::Type follow = (isDocument
866 ? SdParam::reservedName + Sd::rCAPACITY
867 : SdParam::reservedName + Sd::rFUNCTION);
868 if (!parseSdParam(AllowedSdParams(SdParam::number,
869 SdParam::reservedName + Sd::rBASESET,
874 } while (parm.type == SdParam::number);
875 } while (parm.type == SdParam::reservedName + Sd::rBASESET);
876 if (!multiplyDeclared.isEmpty())
877 message(ParserMessages::duplicateCharNumbers,
878 CharsetMessageArg(multiplyDeclared));
879 ISet<WideChar> declaredSet;
880 decl.declaredSet(declaredSet);
881 ISetIter<WideChar> iter(declaredSet);
882 WideChar min, max, lastMax;
883 if (iter.next(min, max)) {
884 ISet<WideChar> holes;
886 while (iter.next(min, max)) {
887 if (min - lastMax > 1)
888 holes.addRange(lastMax + 1, min - 1);
891 if (!holes.isEmpty())
892 message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
894 if (!isDocument && sdBuilder.sd->scopeInstance()) {
895 // If scope is INSTANCE, syntax reference character set
896 // must be same as reference.
897 UnivCharsetDescIter iter(desc);
898 WideChar descMin, descMax;
900 if (!iter.next(descMin, descMax, univMin)
905 message(ParserMessages::scopeInstanceSyntaxCharset);
910 Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
914 if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
917 if (parm.type == SdParam::eE)
919 WideChar min = parm.n;
920 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
922 Number count = parm.n;
923 if (!parseSdParam(AllowedSdParams(SdParam::number,
924 SdParam::minimumLiteral,
925 SdParam::reservedName + Sd::rUNUSED),
928 if (parm.type == SdParam::number) {
930 desc.addRange(min, min + (count - 1), parm.n);
932 else if (parm.type == SdParam::minimumLiteral) {
933 UnivChar c = sd.nameToUniv(parm.literalText.string());
935 message(ParserMessages::tooManyCharsMinimumLiteral);
938 for (Number i = 0; i < count; i++)
939 desc.addRange(min + i, min + i, c);
946 Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
948 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
949 SdParam::reservedName + Sd::rSGMLREF),
953 if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
954 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
957 PublicId::TextClass textClass;
958 const MessageType1 *err;
959 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
960 sdBuilder.addFormalError(currentLocation(),
963 else if (id.getTextClass(textClass)
964 && textClass != PublicId::CAPACITY)
965 sdBuilder.addFormalError(currentLocation(),
966 ParserMessages::capacityTextClass,
968 const StringC &str = id.string();
969 if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
970 && str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) {
972 if (referencePublic(id, PublicId::CAPACITY, givenError))
974 else if (!givenError)
975 message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
978 return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
982 PackedBoolean capacitySpecified[Sd::nCapacity];
984 for (i = 0; i < Sd::nCapacity; i++)
985 capacitySpecified[i] = 0;
986 if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
989 Sd::Capacity capacityIndex = parm.capacityIndex;
990 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
993 if (!capacitySpecified[capacityIndex]) {
994 sdBuilder.sd->setCapacity(capacityIndex, parm.n);
995 capacitySpecified[capacityIndex] = 1;
997 else if (options().warnSgmlDecl)
998 message(ParserMessages::duplicateCapacity,
999 StringMessageArg(sd().capacityName(i)));
1000 int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
1001 if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
1004 } while (parm.type == SdParam::capacityName);
1005 Number totalcap = sdBuilder.sd->capacity(0);
1006 for (i = 1; i < Sd::nCapacity; i++)
1007 if (sdBuilder.sd->capacity(i) > totalcap)
1008 message(ParserMessages::capacityExceedsTotalcap,
1009 StringMessageArg(sd().capacityName(i)));
1011 return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
1016 Boolean Parser::referencePublic(const PublicId &id,
1017 PublicId::TextClass entityType,
1018 Boolean &givenError)
1022 if (entityCatalog().lookupPublic(id.string(),
1026 Location loc = currentLocation();
1027 eventHandler().sgmlDeclEntity(new (eventAllocator())
1028 SgmlDeclEntityEvent(id,
1032 Ptr<EntityOrigin> origin(new EntityOrigin(loc));
1033 if (currentMarkup())
1034 currentMarkup()->addEntityStart(origin);
1035 InputSource *in = entityManager().open(sysid,
1050 Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
1052 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
1053 SdParam::reservedName + Sd::rDOCUMENT),
1056 if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
1057 sdBuilder.sd->setScopeInstance();
1061 Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
1063 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
1066 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
1067 SdParam::reservedName + Sd::rPUBLIC),
1071 if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
1072 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
1075 const MessageType1 *err;
1076 PublicId::TextClass textClass;
1077 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
1078 sdBuilder.addFormalError(currentLocation(),
1081 else if (id.getTextClass(textClass)
1082 && textClass != PublicId::SYNTAX)
1083 sdBuilder.addFormalError(currentLocation(),
1084 ParserMessages::syntaxTextClass,
1086 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
1087 SdParam::reservedName + Sd::rSWITCHES),
1090 Vector<UnivChar> charSwitches;
1091 if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
1092 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1095 SyntaxChar c = parm.n;
1096 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1098 sdBuilder.switcher.addSwitch(c, parm.n);
1099 if (!parseSdParam(AllowedSdParams(SdParam::number,
1100 SdParam::reservedName
1104 if (parm.type != SdParam::number)
1108 const StandardSyntaxSpec *spec = lookupSyntax(id);
1110 if (!setStandardSyntax(*sdBuilder.syntax,
1112 sdBuilder.sd->docCharset(),
1113 sdBuilder.switcher))
1114 sdBuilder.valid = 0;
1118 if (referencePublic(id, PublicId::SYNTAX, givenError)) {
1119 sdBuilder.externalSyntax = 1;
1121 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1125 if (!sdParseExplicitSyntax(sdBuilder, parm2))
1130 message(ParserMessages::unknownPublicSyntax,
1131 StringMessageArg(id.string()));
1132 sdBuilder.valid = 0;
1137 if (!sdParseExplicitSyntax(sdBuilder, parm))
1140 if (!sdBuilder.sd->scopeInstance()) {
1141 // we know the significant chars now
1142 ISet<WideChar> invalidSgmlChar;
1143 sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
1146 if (!invalidSgmlChar.isEmpty())
1147 message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
1149 checkSyntaxNamelen(*sdBuilder.syntax);
1150 checkSwitchesMarkup(sdBuilder.switcher);
1154 Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
1157 typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
1158 static SdParser parsers[] = {
1159 &Parser::sdParseShunchar,
1160 &Parser::sdParseSyntaxCharset,
1161 &Parser::sdParseFunction,
1162 &Parser::sdParseNaming,
1163 &Parser::sdParseDelim,
1164 &Parser::sdParseNames,
1165 &Parser::sdParseQuantity
1167 for (size_t i = 0; i < SIZEOF(parsers); i++)
1168 if (!(this->*(parsers[i]))(sdBuilder, parm))
1173 const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
1175 PublicId::OwnerType ownerType;
1176 if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
1179 if (!id.getOwner(str))
1181 if (str != sd().execToDoc("ISO 8879:1986")
1182 && str != sd().execToDoc("ISO 8879-1986"))
1184 PublicId::TextClass textClass;
1185 if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
1187 if (!id.getDescription(str))
1189 if (str == sd().execToDoc("Reference"))
1191 if (str == sd().execToDoc("Core"))
1196 Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
1198 UnivCharsetDesc desc;
1199 if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
1201 sdBuilder.syntaxCharset.set(desc);
1202 checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
1203 for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
1204 if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
1205 message(ParserMessages::switchNotInCharset,
1206 NumberMessageArg(sdBuilder.switcher.switchTo(i)));
1207 ISet<WideChar> missing;
1208 findMissingMinimum(sdBuilder.syntaxCharset, missing);
1209 if (!missing.isEmpty())
1210 message(ParserMessages::missingMinimumChars,
1211 CharsetMessageArg(missing));
1215 Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
1217 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
1218 SdParam::reservedName + Sd::rCONTROLS,
1219 SdParam::number), parm))
1221 if (parm.type == SdParam::reservedName + Sd::rNONE) {
1222 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
1227 if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
1228 sdBuilder.syntax->setShuncharControls();
1230 if (parm.n <= charMax)
1231 sdBuilder.syntax->addShunchar(Char(parm.n));
1234 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
1235 SdParam::number), parm))
1237 if (parm.type != SdParam::number)
1239 if (parm.n <= charMax)
1240 sdBuilder.syntax->addShunchar(Char(parm.n));
1245 Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
1247 static Sd::ReservedName standardNames[3] = {
1248 Sd::rRE, Sd::rRS, Sd::rSPACE
1250 for (int i = 0; i < 3; i++) {
1251 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1252 + standardNames[i]),
1255 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1258 if (translateSyntax(sdBuilder, parm.n, c)) {
1259 if (checkNotFunction(*sdBuilder.syntax, c))
1260 sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
1262 sdBuilder.valid = 0;
1265 Boolean haveMsichar = 0;
1266 Boolean haveMsochar = 0;
1268 if (!parseSdParam(sdBuilder.externalSyntax
1269 ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
1270 : AllowedSdParams(SdParam::name),
1273 Boolean nameWasLiteral;
1274 Boolean invalidName = 0;
1276 if (parm.type == SdParam::paramLiteral) {
1278 if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
1282 parm.token.swap(name);
1285 if (!parseSdParam(nameWasLiteral
1286 ? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
1287 SdParam::reservedName + Sd::rMSICHAR,
1288 SdParam::reservedName + Sd::rMSOCHAR,
1289 SdParam::reservedName + Sd::rMSSCHAR,
1290 SdParam::reservedName + Sd::rSEPCHAR)
1291 : AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
1292 SdParam::reservedName + Sd::rMSICHAR,
1293 SdParam::reservedName + Sd::rMSOCHAR,
1294 SdParam::reservedName + Sd::rMSSCHAR,
1295 SdParam::reservedName + Sd::rSEPCHAR,
1296 SdParam::reservedName + Sd::rLCNMSTRT),
1299 if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
1300 if (name != sd().reservedName(Sd::rNAMING))
1301 message(ParserMessages::namingBeforeLcnmstrt,
1302 StringMessageArg(name));
1305 if (!nameWasLiteral) {
1308 if (!translateName(sdBuilder, tem, name))
1311 Syntax::FunctionClass functionClass;
1312 switch (parm.type) {
1313 case SdParam::reservedName + Sd::rFUNCHAR:
1314 functionClass = Syntax::cFUNCHAR;
1316 case SdParam::reservedName + Sd::rMSICHAR:
1318 functionClass = Syntax::cMSICHAR;
1320 case SdParam::reservedName + Sd::rMSOCHAR:
1322 functionClass = Syntax::cMSOCHAR;
1324 case SdParam::reservedName + Sd::rMSSCHAR:
1325 functionClass = Syntax::cMSSCHAR;
1327 case SdParam::reservedName + Sd::rSEPCHAR:
1328 functionClass = Syntax::cSEPCHAR;
1333 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1336 if (translateSyntax(sdBuilder, parm.n, c)
1337 && checkNotFunction(*sdBuilder.syntax, c)
1340 if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
1341 message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
1343 sdBuilder.syntax->addFunctionChar(name, functionClass, c);
1346 if (haveMsochar && !haveMsichar)
1347 message(ParserMessages::msocharRequiresMsichar);
1351 Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
1353 static Sd::ReservedName keys[4] = {
1354 Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE
1357 ISet<Char> nameStartChar;
1358 ISet<Char> nameChar;
1360 String<SyntaxChar> lc;
1361 Vector<size_t> rangeIndex;
1363 Boolean allowThrough = 0;
1365 if (!parseSdParam(sdBuilder.externalSyntax
1366 ? AllowedSdParams(SdParam::reservedName
1367 + keys[isNamechar * 2],
1368 SdParam::paramLiteral,
1372 ? AllowedSdParams(SdParam::paramLiteral)
1373 : AllowedSdParams(SdParam::reservedName
1374 + keys[isNamechar * 2])),
1378 Boolean wasRange = 0;
1379 sdParamConvertToLiteral(parm);
1380 if (parm.type == SdParam::ellipsis) {
1382 message(ParserMessages::sdInvalidEllipsis);
1383 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1387 sdParamConvertToLiteral(parm);
1388 if (parm.paramLiteralText.size() == 0)
1389 message(ParserMessages::sdInvalidEllipsis);
1390 else if (allowThrough) {
1391 SyntaxChar n = parm.paramLiteralText[0];
1392 if (n < lc[lc.size() - 1])
1393 message(ParserMessages::sdInvalidRange);
1394 else if (n > lc[lc.size() - 1] + 1)
1395 rangeIndex.push_back(lc.size() - 1);
1399 if (parm.type != SdParam::paramLiteral)
1401 lc += parm.paramLiteralText;
1402 allowThrough = (parm.paramLiteralText.size() - wasRange) > 0;
1405 size_t rangeIndexPos = 0;
1406 unsigned long rangeLeft = 0;
1407 SyntaxChar nextRangeChar;
1408 ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
1409 String<SyntaxChar> chars;
1413 if (!parseSdParam(sdBuilder.externalSyntax
1414 ? AllowedSdParams(SdParam::reservedName
1415 + keys[isNamechar * 2 + 1],
1416 SdParam::paramLiteral,
1420 ? AllowedSdParams(SdParam::paramLiteral)
1421 : AllowedSdParams(SdParam::reservedName
1422 + keys[isNamechar * 2 + 1])),
1425 sdParamConvertToLiteral(parm);
1427 Boolean isRange = parm.type == SdParam::ellipsis;
1428 size_t nChars = chars.size();
1431 for (size_t i = 0; i < nChars; i++) {
1433 && rangeIndexPos < rangeIndex.size()
1434 && rangeIndex[rangeIndexPos] == lcPos) {
1435 rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
1436 nextRangeChar = lc[lcPos];
1441 if (rangeLeft > 0) {
1443 c = nextRangeChar++;
1445 else if (lcPos < lc.size())
1451 // map from c to chars[i]
1452 Char transLc, transUc;
1453 if (translateSyntax(sdBuilder, c, transLc)
1454 && translateSyntax(sdBuilder, chars[i], transUc)) {
1456 if (transLc != transUc) {
1458 sdBuilder.syntax->addSubst(transLc, transUc);
1463 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1467 sdParamConvertToLiteral(parm);
1468 if (chars.size() == 0 || parm.paramLiteralText.size() == 0)
1469 message(ParserMessages::sdInvalidEllipsis);
1471 SyntaxChar start = chars[chars.size() - 1];
1472 SyntaxChar end = parm.paramLiteralText[0];
1474 message(ParserMessages::sdInvalidRange);
1476 size_t count = end + 1 - start;
1479 && rangeIndexPos < rangeIndex.size()
1480 && rangeIndex[rangeIndexPos] == lcPos) {
1481 rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
1482 nextRangeChar = lc[lcPos];
1487 if (rangeLeft > 0) {
1489 c = nextRangeChar++;
1491 else if (lcPos < lc.size())
1497 if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
1501 else if (rangeLeft < count)
1505 translateRange(sdBuilder, start, start + (count - 1), set);
1510 Char transLc, transUc;
1511 if (translateSyntax(sdBuilder, c, transLc)
1512 && translateSyntax(sdBuilder, start, transUc)) {
1514 if (transLc != transUc) {
1516 sdBuilder.syntax->addSubst(transLc, transUc);
1526 if (parm.type != SdParam::paramLiteral)
1528 chars.append(parm.paramLiteralText.data() + 1,
1529 parm.paramLiteralText.size() - 1);
1531 else if (parm.type == SdParam::paramLiteral)
1532 parm.paramLiteralText.swap(chars);
1536 if ((runOut && !sdBuilder.externalSyntax)
1537 || rangeLeft > 0 || lcPos < lc.size())
1539 ? ParserMessages::nmcharLength
1540 : ParserMessages::nmstrtLength);
1541 if (!checkNmchars(set, *sdBuilder.syntax))
1542 sdBuilder.valid = 0;
1543 } while (!isNamechar++);
1545 intersectCharSets(nameStartChar, nameChar, bad);
1546 if (!bad.isEmpty()) {
1547 sdBuilder.valid = 0;
1548 message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
1550 sdBuilder.syntax->addNameStartCharacters(nameStartChar);
1551 sdBuilder.syntax->addNameCharacters(nameChar);
1552 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
1555 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1556 SdParam::reservedName + Sd::rYES),
1559 sdBuilder.syntax->setNamecaseGeneral(parm.type
1560 == SdParam::reservedName + Sd::rYES);
1562 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
1565 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1566 SdParam::reservedName + Sd::rYES),
1569 sdBuilder.syntax->setNamecaseEntity(parm.type
1570 == SdParam::reservedName + Sd::rYES);
1574 Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
1578 intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
1579 if (!bad.isEmpty()) {
1580 message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
1584 intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
1585 if (!bad.isEmpty()) {
1586 message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
1591 if (syntax.getStandardFunction(Syntax::fRE, funChar)
1592 && set.contains(funChar)) {
1593 message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
1596 if (syntax.getStandardFunction(Syntax::fRS, funChar)
1597 && set.contains(funChar)) {
1598 message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
1601 if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
1602 && set.contains(funChar)) {
1603 message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
1606 intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
1607 if (!bad.isEmpty()) {
1608 message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
1614 // Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
1616 void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
1617 ISet<WideChar> &inter)
1619 ISetIter<Char> i1(s1);
1620 ISetIter<Char> i2(s2);
1621 Char min1, max1, min2, max2;
1622 if (!i1.next(min1, max1))
1624 if (!i2.next(min2, max2))
1628 if (!i1.next(min1, max1))
1631 else if (max2 < min1) {
1632 if (!i2.next(min2, max2))
1638 Char min = min1 > min2 ? min1 : min2;
1639 Char max = max1 < max2 ? max1 : max2;
1640 inter.addRange(min, max);
1641 if (!i1.next(min1, max1))
1643 if (!i2.next(min2, max2))
1649 Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
1651 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
1654 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
1657 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1660 PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
1661 for (int i = 0; i < Syntax::nDelimGeneral; i++)
1662 delimGeneralSpecified[i] = 0;
1664 if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
1665 SdParam::reservedName + Sd::rSHORTREF),
1668 if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
1670 Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
1671 if (delimGeneralSpecified[delimGeneral])
1672 message(ParserMessages::duplicateDelimGeneral,
1673 StringMessageArg(sd().generalDelimiterName(delimGeneral)));
1674 if (!parseSdParam(sdBuilder.externalSyntax
1675 ? AllowedSdParams(SdParam::paramLiteral,
1677 : AllowedSdParams(SdParam::paramLiteral),
1680 sdParamConvertToLiteral(parm);
1682 if (parm.paramLiteralText.size() == 0)
1683 message(ParserMessages::sdEmptyDelimiter);
1684 else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
1685 const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
1686 for (size_t i = 0; i < str.size(); i++)
1687 table->subst(str[i]);
1688 if (checkGeneralDelim(*sdBuilder.syntax, str)
1689 && !delimGeneralSpecified[delimGeneral])
1690 sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
1692 sdBuilder.valid = 0;
1694 delimGeneralSpecified[delimGeneral] = 1;
1696 if (!setRefDelimGeneral(*sdBuilder.syntax,
1697 sdBuilder.syntaxCharset,
1698 sdBuilder.sd->docCharset(),
1699 sdBuilder.switcher))
1700 sdBuilder.valid = 0;
1701 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
1702 SdParam::reservedName + Sd::rNONE),
1705 if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
1706 if (!addRefDelimShortref(*sdBuilder.syntax,
1707 sdBuilder.syntaxCharset,
1708 sdBuilder.sd->docCharset(),
1709 sdBuilder.switcher))
1710 sdBuilder.valid = 0;
1712 String<SyntaxChar> lastLiteral;
1714 if (!parseSdParam(sdBuilder.externalSyntax
1715 ? AllowedSdParams(SdParam::paramLiteral,
1718 SdParam::reservedName + Sd::rNAMES)
1719 : AllowedSdParams(SdParam::paramLiteral,
1720 SdParam::reservedName + Sd::rNAMES),
1723 sdParamConvertToLiteral(parm);
1724 if (parm.type == SdParam::ellipsis) {
1725 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1729 sdParamConvertToLiteral(parm);
1730 if (parm.paramLiteralText.size() == 0)
1731 message(ParserMessages::sdEmptyDelimiter);
1732 else if (lastLiteral.size() != 1
1733 || parm.paramLiteralText.size() != 1)
1734 message(ParserMessages::sdInvalidEllipsis);
1735 else if (parm.paramLiteralText[0] < lastLiteral[0])
1736 message(ParserMessages::sdInvalidRange);
1737 else if (parm.paramLiteralText[0] != lastLiteral[0]) {
1738 ISet<Char> shortrefChars;
1739 translateRange(sdBuilder,
1741 parm.paramLiteralText[0],
1743 ISet<WideChar> duplicates;
1744 intersectCharSets(shortrefChars,
1745 sdBuilder.syntax->delimShortrefSimple(),
1747 int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
1748 for (int i = 0; i < nComplexShortrefs; i++) {
1749 const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
1750 if (delim.size() == 1 && shortrefChars.contains(delim[0]))
1751 duplicates.add(delim[0]);
1753 if (!duplicates.isEmpty())
1754 message(ParserMessages::duplicateDelimShortrefSet,
1755 CharsetMessageArg(duplicates));
1756 sdBuilder.syntax->addDelimShortrefs(shortrefChars,
1757 sdBuilder.sd->docCharset());
1759 lastLiteral.resize(0);
1761 else if (parm.type == SdParam::paramLiteral) {
1762 parm.paramLiteralText.swap(lastLiteral);
1764 if (lastLiteral.size() == 0)
1765 message(ParserMessages::sdEmptyDelimiter);
1766 else if (translateSyntax(sdBuilder, lastLiteral, str)) {
1767 const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
1768 for (size_t i = 0; i < str.size(); i++)
1769 table->subst(str[i]);
1771 || checkShortrefDelim(*sdBuilder.syntax,
1772 sdBuilder.sd->docCharset(),
1774 if (sdBuilder.syntax->isValidShortref(str))
1775 message(ParserMessages::duplicateDelimShortref,
1776 StringMessageArg(str));
1778 sdBuilder.syntax->addDelimShortref(str,
1779 sdBuilder.sd->docCharset());
1789 Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
1791 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1795 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
1796 SdParam::referenceReservedName),
1799 if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
1801 Syntax::ReservedName reservedName = parm.reservedNameIndex;
1802 if (!parseSdParam(sdBuilder.externalSyntax
1803 ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
1804 : AllowedSdParams(SdParam::name),
1808 if (parm.type == SdParam::name
1809 ? translateName(sdBuilder, parm.token, transName)
1810 : translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
1811 Syntax::ReservedName tem;
1812 if (sdBuilder.syntax->lookupReservedName(transName, &tem))
1813 message(ParserMessages::ambiguousReservedName,
1814 StringMessageArg(transName));
1816 if (transName.size() == 0
1817 || !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
1818 message(ParserMessages::reservedNameSyntax,
1819 StringMessageArg(transName));
1820 transName.resize(0);
1823 // Check that its a valid name in the declared syntax
1824 // (- and . might not be name characters).
1825 for (i = 1; i < transName.size(); i++)
1826 if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
1827 message(ParserMessages::reservedNameSyntax,
1828 StringMessageArg(transName));
1829 transName.resize(0);
1832 for (i = 0; i < transName.size(); i++)
1833 sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
1834 if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
1835 message(ParserMessages::duplicateReservedName,
1836 StringMessageArg(syntax().reservedName(reservedName)));
1837 else if (transName.size() > 0)
1838 sdBuilder.syntax->setName(reservedName, transName);
1840 sdBuilder.valid = 0;
1844 setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
1845 static Syntax::ReservedName functionNameIndex[3] = {
1846 Syntax::rRE, Syntax::rRS, Syntax::rSPACE
1848 for (int i = 0; i < 3; i++) {
1849 const StringC &functionName
1850 = sdBuilder.syntax->reservedName(functionNameIndex[i]);
1852 if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
1853 message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
1855 sdBuilder.syntax->enterStandardFunctionNames();
1859 Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
1861 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1865 int final = (sdBuilder.externalSyntax
1867 : SdParam::reservedName + Sd::rFEATURES);
1868 if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm))
1870 if (parm.type != SdParam::quantityName)
1872 Syntax::Quantity quantity = parm.quantityIndex;
1873 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1875 sdBuilder.syntax->setQuantity(quantity, parm.n);
1877 if (sdBuilder.sd->scopeInstance()) {
1878 for (int i = 0; i < Syntax::nQuantity; i++)
1879 if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
1880 < syntax().quantity(Syntax::Quantity(i)))
1881 message(ParserMessages::scopeInstanceQuantity,
1882 StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
1887 Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
1889 struct FeatureInfo {
1890 Sd::ReservedName name;
1897 static FeatureInfo features[] = {
1898 { Sd::rMINIMIZE, FeatureInfo::__none },
1899 { Sd::rDATATAG, FeatureInfo::__boolean },
1900 { Sd::rOMITTAG, FeatureInfo::__boolean },
1901 { Sd::rRANK, FeatureInfo::__boolean },
1902 { Sd::rSHORTTAG, FeatureInfo::__boolean },
1903 { Sd::rLINK, FeatureInfo::__none },
1904 { Sd::rSIMPLE, FeatureInfo::__number },
1905 { Sd::rIMPLICIT, FeatureInfo::__boolean },
1906 { Sd::rEXPLICIT, FeatureInfo::__number },
1907 { Sd::rOTHER, FeatureInfo::__none },
1908 { Sd::rCONCUR, FeatureInfo::__number },
1909 { Sd::rSUBDOC, FeatureInfo::__number },
1910 { Sd::rFORMAL, FeatureInfo::__boolean }
1912 int booleanFeature = 0;
1913 int numberFeature = 0;
1914 for (size_t i = 0; i < SIZEOF(features); i++) {
1915 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1916 + features[i].name), parm))
1918 if (features[i].arg != FeatureInfo::__none) {
1919 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1920 SdParam::reservedName + Sd::rYES),
1924 if (features[i].name == Sd::rDATATAG
1925 && parm.type == (SdParam::reservedName + Sd::rYES))
1926 message(ParserMessages::datatagNotImplemented);
1928 if (features[i].arg == FeatureInfo::__number) {
1929 if (parm.type == SdParam::reservedName + Sd::rYES) {
1930 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1932 sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
1936 sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
1940 sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
1941 parm.type == (SdParam::reservedName
1948 Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
1950 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
1953 Location location(currentLocation());
1954 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
1955 SdParam::minimumLiteral),
1958 AppinfoEvent *event;
1959 if (parm.type == SdParam::minimumLiteral)
1960 event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
1962 event = new (eventAllocator()) AppinfoEvent(location);
1963 eventHandler().appinfo(event);
1967 Boolean Parser::translateSyntax(CharSwitcher &switcher,
1968 const CharsetInfo &syntaxCharset,
1969 const CharsetInfo &docCharset,
1970 WideChar syntaxChar,
1973 syntaxChar = switcher.subst(syntaxChar);
1975 if (syntaxCharset.descToUniv(syntaxChar, univChar)
1976 && univToDescCheck(docCharset, univChar, docChar))
1978 message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
1982 void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
1983 SyntaxChar end, ISet<Char> &chars)
1988 if (!translateSyntax(sdBuilder, start, docChar))
1991 } while (start++ != end);
1994 SyntaxChar doneUpTo = end;
1995 Boolean gotSwitch = 0;
1996 WideChar firstSwitch;
1997 for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
1998 WideChar c = sdBuilder.switcher.switchFrom(i);
1999 if (start <= c && c <= end) {
2004 else if (c < firstSwitch)
2008 if (gotSwitch && firstSwitch == start) {
2011 if (translateSyntax(sdBuilder, start, docChar))
2016 doneUpTo = firstSwitch - 1;
2019 if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
2020 if (count - 1 < doneUpTo - start)
2021 doneUpTo = start + (count - 1);
2022 chars.addRange(docChar, docChar + (doneUpTo - start));
2025 if (doneUpTo == end)
2027 start = doneUpTo + 1;
2031 Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
2032 WideChar syntaxChar, Char &docChar)
2035 return translateSyntaxNoSwitch(sdBuilder,
2036 sdBuilder.switcher.subst(syntaxChar),
2041 Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
2042 WideChar syntaxChar, Char &docChar,
2047 CharsetDeclRange::Type type;
2049 if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
2055 ISet<WideChar> docChars;
2057 case CharsetDeclRange::unused:
2059 case CharsetDeclRange::string:
2060 sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
2062 case CharsetDeclRange::number:
2065 sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
2066 if (!docChars.isEmpty() && count2 < count)
2073 if (!docChars.isEmpty()) {
2074 if (!docChars.isSingleton() && options().warnSgmlDecl)
2075 message(ParserMessages::ambiguousDocCharacter,
2076 CharsetMessageArg(docChars));
2077 ISetIter<WideChar> iter(docChars);
2079 if (iter.next(min, max) && min <= charMax) {
2080 docChar = Char(min);
2086 WideChar alsoMax, count2;
2087 if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
2088 && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar,
2090 count = (alsoMax - syntaxChar) + 1;
2095 sdBuilder.valid = 0;
2096 message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
2101 Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
2102 const String<SyntaxChar> &syntaxString,
2105 docString.resize(0);
2107 for (size_t i = 0; i < syntaxString.size(); i++) {
2109 if (translateSyntax(sdBuilder, syntaxString[i], c))
2117 Boolean Parser::translateName(SdBuilder &sdBuilder,
2118 const StringC &name,
2121 str.resize(name.size());
2122 for (size_t i = 0; i < name.size(); i++) {
2124 Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
2125 // Might switch hyphen or period.
2126 univChar = translateUniv(univChar, sdBuilder.switcher,
2127 sdBuilder.syntaxCharset);
2129 if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
2130 message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
2131 sdBuilder.valid = 0;
2138 UnivChar Parser::translateUniv(UnivChar univChar,
2139 CharSwitcher &switcher,
2140 const CharsetInfo &syntaxCharset)
2142 WideChar syntaxChar;
2143 ISet<WideChar> syntaxChars;
2144 if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
2145 message(ParserMessages::missingSyntaxChar,
2146 NumberMessageArg(univChar));
2149 SyntaxChar tem = switcher.subst(syntaxChar);
2150 if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
2151 message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem));
2155 Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
2157 if (syn.charSet(Syntax::functionChar)->contains(c)) {
2158 message(ParserMessages::oneFunction, NumberMessageArg(c));
2166 // Check that it has at most one B sequence and that it
2167 // is not adjacent to a blank sequence.
2169 Boolean Parser::checkShortrefDelim(const Syntax &syn,
2170 const CharsetInfo &charset,
2171 const StringC &delim)
2174 Char letterB = charset.execToDesc('B');
2175 const ISet<Char> *bSet = syn.charSet(Syntax::blank);
2176 for (size_t i = 0; i < delim.size(); i++)
2177 if (delim[i] == letterB) {
2179 message(ParserMessages::multipleBSequence, StringMessageArg(delim));
2183 if (i > 0 && bSet->contains(delim[i - 1])) {
2184 message(ParserMessages::blankAdjacentBSequence,
2185 StringMessageArg(delim));
2188 while (i + 1 < delim.size() && delim[i + 1] == letterB)
2190 if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
2191 message(ParserMessages::blankAdjacentBSequence,
2192 StringMessageArg(delim));
2199 Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
2201 const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
2202 if (delim.size() > 0) {
2203 Boolean allFunction = 1;
2204 for (size_t i = 0; i < delim.size(); i++)
2205 if (!functionSet->contains(delim[i]))
2208 message(ParserMessages::generalDelimAllFunction,
2209 StringMessageArg(delim));
2216 Boolean Parser::checkSwitches(CharSwitcher &switcher,
2217 const CharsetInfo &syntaxCharset)
2220 for (size_t i = 0; i < switcher.nSwitches(); i++) {
2222 c[0] = switcher.switchFrom(i);
2223 c[1] = switcher.switchTo(i);
2224 for (int j = 0; j < 2; j++) {
2226 if (syntaxCharset.descToUniv(c[j], univChar)) {
2227 // Check that it is not Digit Lcletter or Ucletter
2228 if ((UnivCharsetDesc::a <= univChar
2229 && univChar < UnivCharsetDesc::a + 26)
2230 || (UnivCharsetDesc::A <= univChar
2231 && univChar < UnivCharsetDesc::A + 26)
2232 || (UnivCharsetDesc::zero <= univChar
2233 && univChar < UnivCharsetDesc::zero + 10)) {
2234 message(ParserMessages::switchLetterDigit,
2235 NumberMessageArg(univChar));
2244 Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
2247 size_t nSwitches = switcher.nSwitches();
2248 for (size_t i = 0; i < nSwitches; i++)
2249 if (!switcher.switchUsed(i)) {
2250 // If the switch wasn't used,
2251 // then the character wasn't a markup character.
2252 message(ParserMessages::switchNotMarkup,
2253 NumberMessageArg(switcher.switchFrom(i)));
2259 void Parser::checkSyntaxNamelen(const Syntax &syn)
2261 size_t namelen = syn.namelen();
2263 for (i = 0; i < Syntax::nDelimGeneral; i++)
2264 if (syn.delimGeneral(i).size() > namelen)
2265 message(ParserMessages::delimiterLength,
2266 StringMessageArg(syn.delimGeneral(i)),
2267 NumberMessageArg(namelen));
2268 for (i = 0; i < syn.nDelimShortrefComplex(); i++)
2269 if (syn.delimShortrefComplex(i).size() > namelen)
2270 message(ParserMessages::delimiterLength,
2271 StringMessageArg(syn.delimShortrefComplex(i)),
2272 NumberMessageArg(namelen));
2273 for (i = 0; i < Syntax::nNames; i++)
2274 if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
2275 && options().warnSgmlDecl)
2276 message(ParserMessages::reservedNameLength,
2277 StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
2278 NumberMessageArg(namelen));
2281 Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
2285 return univToDescCheck(charset, from, to, count);
2288 Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
2289 Char &to, WideChar &count)
2292 ISet<WideChar> descSet;
2293 unsigned ret = charset.univToDesc(from, c, descSet, count);
2295 if (options().warnSgmlDecl)
2296 message(ParserMessages::ambiguousDocCharacter,
2297 CharsetMessageArg(descSet));
2300 if (ret && c <= charMax) {
2307 Boolean Parser::parseSdParam(const AllowedSdParams &allow,
2311 Token token = getToken(mdMode);
2313 case tokenUnrecognized:
2314 if (reportNonSgmlCharacter())
2317 message(ParserMessages::markupDeclarationCharacter,
2318 StringMessageArg(currentToken()),
2319 AllowedSdParamsMessageArg(allow, sdPointer()));
2323 if (allow.param(SdParam::eE)) {
2324 parm.type = SdParam::eE;
2325 if (currentMarkup())
2326 currentMarkup()->addEntityEnd();
2330 message(ParserMessages::sdEntityEnd,
2331 AllowedSdParamsMessageArg(allow, sdPointer()));
2334 if (currentMarkup())
2335 currentMarkup()->addS(currentChar());
2338 if (!parseComment(sdcomMode))
2343 case tokenMinusGrpo:
2346 case tokenPeroNameStart:
2348 sdParamInvalidToken(token, allow);
2350 case tokenLcUcNmchar:
2351 if (allow.param(SdParam::ellipsis)) {
2352 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
2353 getCurrentToken(syntax().generalSubstTable(), parm.token);
2354 if (parm.token == sd().execToDoc("...")) {
2355 parm.type = SdParam::ellipsis;
2358 message(ParserMessages::sdInvalidNameToken,
2359 StringMessageArg(parm.token),
2360 AllowedSdParamsMessageArg(allow, sdPointer()));
2363 sdParamInvalidToken(token, allow);
2369 Boolean lita = (token == tokenLita);
2370 if (allow.param(SdParam::minimumLiteral)) {
2371 if (!parseMinimumLiteral(lita, parm.literalText))
2373 parm.type = SdParam::minimumLiteral;
2374 if (currentMarkup())
2375 currentMarkup()->addLiteral(parm.literalText);
2377 else if (allow.param(SdParam::paramLiteral)) {
2378 if (!parseSdParamLiteral(lita, parm.paramLiteralText))
2380 parm.type = SdParam::paramLiteral;
2383 sdParamInvalidToken(token, allow);
2389 if (allow.param(SdParam::mdc)) {
2390 parm.type = SdParam::mdc;
2391 if (currentMarkup())
2392 currentMarkup()->addDelim(Syntax::dMDC);
2395 sdParamInvalidToken(tokenMdc, allow);
2397 case tokenNameStart:
2399 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
2400 getCurrentToken(syntax().generalSubstTable(), parm.token);
2401 if (allow.param(SdParam::capacityName)) {
2402 if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
2403 parm.type = SdParam::capacityName;
2404 if (currentMarkup())
2405 currentMarkup()->addName(currentInput());
2409 if (allow.param(SdParam::referenceReservedName)) {
2410 if (syntax().lookupReservedName(parm.token,
2411 &parm.reservedNameIndex)) {
2412 parm.type = SdParam::referenceReservedName;
2413 if (currentMarkup())
2414 currentMarkup()->addName(currentInput());
2418 if (allow.param(SdParam::generalDelimiterName)) {
2419 if (sd().lookupGeneralDelimiterName(parm.token,
2420 parm.delimGeneralIndex)) {
2421 parm.type = SdParam::generalDelimiterName;
2422 if (currentMarkup())
2423 currentMarkup()->addName(currentInput());
2427 if (allow.param(SdParam::quantityName)) {
2428 if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
2429 parm.type = SdParam::quantityName;
2430 if (currentMarkup())
2431 currentMarkup()->addName(currentInput());
2435 for (int i = 0;; i++) {
2436 SdParam::Type t = allow.get(i);
2437 if (t == SdParam::invalid)
2439 if (t >= SdParam::reservedName) {
2440 Sd::ReservedName sdReservedName
2441 = Sd::ReservedName(t - SdParam::reservedName);
2442 if (parm.token == sd().reservedName(sdReservedName)) {
2444 if (currentMarkup())
2445 currentMarkup()->addSdReservedName(sdReservedName,
2451 if (allow.param(SdParam::name)) {
2452 parm.type = SdParam::name;
2453 if (currentMarkup())
2454 currentMarkup()->addName(currentInput());
2458 message(ParserMessages::sdInvalidNameToken,
2459 StringMessageArg(parm.token),
2460 AllowedSdParamsMessageArg(allow, sdPointer()));
2465 if (allow.param(SdParam::number)) {
2466 extendNumber(syntax().namelen(), ParserMessages::numberLength);
2467 parm.type = SdParam::number;
2469 if (!stringToNumber(currentInput()->currentTokenStart(),
2470 currentInput()->currentTokenLength(),
2472 || n > Number(-1)) {
2473 message(ParserMessages::numberTooBig,
2474 StringMessageArg(currentToken()));
2475 parm.n = Number(-1);
2478 if (currentMarkup())
2479 currentMarkup()->addNumber(currentInput());
2482 Token token = getToken(mdMode);
2483 if (token == tokenNameStart)
2484 message(ParserMessages::psRequired);
2485 currentInput()->ungetToken();
2488 sdParamInvalidToken(tokenDigit, allow);
2496 // This is a separate function, because we might want SyntaxChar
2497 // to be bigger than Char.
2499 Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
2501 Location loc(currentLocation());
2503 SdText text(loc, lita); // first character of content
2505 const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
2507 Mode mode = lita ? sdplitaMode : sdplitMode;
2510 Token token = getToken(mode);
2513 message(ParserMessages::literalLevel);
2515 case tokenUnrecognized:
2516 if (reportNonSgmlCharacter())
2518 if (options().errorSignificant)
2519 message(ParserMessages::sdLiteralSignificant,
2520 StringMessageArg(currentToken()));
2521 text.addChar(currentChar(), currentLocation());
2525 InputSource *in = currentInput();
2526 Location startLocation = currentLocation();
2527 in->discardInitial();
2528 extendNumber(syntax().namelen(), ParserMessages::numberLength);
2531 if (!stringToNumber(in->currentTokenStart(),
2532 in->currentTokenLength(),
2534 || n > syntaxCharMax) {
2535 message(ParserMessages::syntaxCharacterNumber,
2536 StringMessageArg(currentToken()));
2541 Owner<Markup> markupPtr;
2542 if (eventsWanted().wantPrologMarkup()) {
2543 markupPtr = new Markup;
2544 markupPtr->addDelim(Syntax::dCRO);
2545 markupPtr->addNumber(in);
2546 switch (getToken(refMode)) {
2548 markupPtr->addDelim(Syntax::dREFC);
2551 markupPtr->addRefEndRe();
2558 (void)getToken(refMode);
2560 text.addChar(SyntaxChar(n),
2561 Location(new NumericCharRefOrigin(startLocation,
2562 currentLocation().index()
2563 + currentInput()->currentTokenLength()
2564 - startLocation.index(),
2569 case tokenCroNameStart:
2570 if (!parseNamedCharRef())
2577 case tokenPeroNameStart:
2579 message(ParserMessages::sdParameterEntity);
2581 Location loc(currentLocation());
2582 const Char *p = currentInput()->currentTokenStart();
2583 for (size_t count = currentInput()->currentTokenLength();
2586 text.addChar(*p++, loc);
2592 if (text.string().size() > refLitlen
2593 && currentChar() == syntax().standardFunction(Syntax::fRE)) {
2594 message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
2595 // guess that the closing delimiter has been omitted
2596 message(ParserMessages::literalClosingDelimiter);
2599 text.addChar(currentChar(), currentLocation());
2604 if (text.string().size() > refLitlen)
2605 message(ParserMessages::parameterLiteralLength,
2606 NumberMessageArg(refLitlen));
2608 str = text.string();
2609 if (currentMarkup())
2610 currentMarkup()->addSdLiteral(text);
2614 Boolean Parser::stringToNumber(const Char *s, size_t length,
2615 unsigned long &result)
2617 unsigned long n = 0;
2618 for (; length > 0; length--, s++) {
2619 int val = sd().digitWeight(*s);
2620 if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
2629 void Parser::sdParamInvalidToken(Token token,
2630 const AllowedSdParams &allow)
2632 message(ParserMessages::sdParamInvalidToken,
2633 TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
2634 AllowedSdParamsMessageArg(allow, sdPointer()));
2637 void Parser::sdParamConvertToLiteral(SdParam &parm)
2639 if (parm.type == SdParam::number) {
2640 parm.type = SdParam::paramLiteral;
2641 parm.paramLiteralText.resize(1);
2642 parm.paramLiteralText[0] = parm.n;
2646 AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
2647 SdParam::Type arg3, SdParam::Type arg4,
2648 SdParam::Type arg5, SdParam::Type arg6)
2658 Boolean AllowedSdParams::param(SdParam::Type t) const
2660 for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
2666 SdParam::Type AllowedSdParams::get(int i) const
2668 return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
2671 AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
2672 const AllowedSdParams &allow,
2673 const ConstPtr<Sd> &sd)
2674 : allow_(allow), sd_(sd)
2678 MessageArg *AllowedSdParamsMessageArg::copy() const
2680 return new AllowedSdParamsMessageArg(*this);
2683 void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
2685 for (int i = 0;; i++) {
2686 SdParam::Type type = allow_.get(i);
2687 if (type == SdParam::invalid)
2690 builder.appendFragment(ParserMessages::listSep);
2693 builder.appendFragment(ParserMessages::entityEnd);
2695 case SdParam::minimumLiteral:
2696 builder.appendFragment(ParserMessages::minimumLiteral);
2700 builder.appendFragment(ParserMessages::delimStart);
2701 Char c = sd_->execToDoc('>');
2702 builder.appendChars(&c, 1);
2703 builder.appendFragment(ParserMessages::delimEnd);
2706 case SdParam::number:
2707 builder.appendFragment(ParserMessages::number);
2710 builder.appendFragment(ParserMessages::name);
2712 case SdParam::paramLiteral:
2713 builder.appendFragment(ParserMessages::parameterLiteral);
2715 case SdParam::capacityName:
2716 builder.appendFragment(ParserMessages::capacityName);
2718 case SdParam::generalDelimiterName:
2719 builder.appendFragment(ParserMessages::generalDelimiteRoleName);
2721 case SdParam::referenceReservedName:
2722 builder.appendFragment(ParserMessages::referenceReservedName);
2724 case SdParam::quantityName:
2725 builder.appendFragment(ParserMessages::quantityName);
2727 case SdParam::ellipsis:
2729 StringC str(sd_->execToDoc("..."));
2730 builder.appendChars(str.data(), str.size());
2735 StringC str(sd_->reservedName(type - SdParam::reservedName));
2736 builder.appendChars(str.data(), str.size());
2743 SdBuilder::SdBuilder()
2744 : valid(1), externalSyntax(0)
2748 void SdBuilder::addFormalError(const Location &location,
2749 const MessageType1 &message,
2752 formalErrorList.insert(new SdFormalError(location, message, id));
2755 SdFormalError::SdFormalError(const Location &location,
2756 const MessageType1 &message,
2758 : location_(location),
2764 void SdFormalError::send(ParserState &parser)
2766 parser.Messenger::setNextLocation(location_);
2767 parser.message(*message_, StringMessageArg(id_));
2770 CharSwitcher::CharSwitcher()
2774 void CharSwitcher::addSwitch(WideChar from, WideChar to)
2776 switches_.push_back(from);
2777 switches_.push_back(to);
2778 switchUsed_.push_back(0);
2781 SyntaxChar CharSwitcher::subst(WideChar c)
2783 for (size_t i = 0; i < switches_.size(); i += 2)
2784 if (switches_[i] == c) {
2785 switchUsed_[i/2] = 1;
2786 return switches_[i + 1];
2791 size_t CharSwitcher::nSwitches() const
2793 return switchUsed_.size();
2796 Boolean CharSwitcher::switchUsed(size_t i) const
2798 return switchUsed_[i];
2801 WideChar CharSwitcher::switchFrom(size_t i) const
2803 return switches_[i*2];
2806 WideChar CharSwitcher::switchTo(size_t i) const
2808 return switches_[i*2 + 1];
2811 CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
2816 MessageArg *CharsetMessageArg::copy() const
2818 return new CharsetMessageArg(*this);
2821 void CharsetMessageArg::append(MessageBuilder &builder) const
2823 ISetIter<WideChar> iter(set_);
2826 while (iter.next(min, max)) {
2830 builder.appendFragment(ParserMessages::listSep);
2831 builder.appendNumber(min);
2833 builder.appendFragment(max == min + 1
2834 ? ParserMessages::listSep
2835 : ParserMessages::rangeSep);
2836 builder.appendNumber(max);