2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */
24 // Copyright (c) 1994, 1995 James Clark
25 // See the file COPYING for copying permission.
30 #include "SdFormalError.h"
31 #include "MessageBuilder.h"
32 #include "ParserMessages.h"
33 #include "MessageArg.h"
34 #include "CharsetRegistry.h"
37 #include "TokenMessageArg.h"
40 #include "NumericCharRefOrigin.h"
43 namespace SP_NAMESPACE {
49 void addSwitch(WideChar from, WideChar to);
50 SyntaxChar subst(WideChar c);
51 size_t nSwitches() const;
52 Boolean switchUsed(size_t i) const;
53 WideChar switchFrom(size_t i) const;
54 WideChar switchTo(size_t i) const;
56 Vector<PackedBoolean> switchUsed_;
57 Vector<WideChar> switches_;
60 // Information about the SGML declaration being built.
64 void addFormalError(const Location &, const MessageType1 &, const StringC &);
67 CharsetDecl syntaxCharsetDecl;
68 CharsetInfo syntaxCharset;
69 CharSwitcher switcher;
70 Boolean externalSyntax;
72 IList<SdFormalError> formalErrorList;
75 class CharsetMessageArg : public MessageArg {
77 CharsetMessageArg(const ISet<WideChar> &set);
78 MessageArg *copy() const;
79 void append(MessageBuilder &) const;
85 typedef unsigned char Type;
97 referenceReservedName,
99 reservedName // Sd::ReservedName is added to this
104 String<SyntaxChar> paramLiteralText;
107 Sd::Capacity capacityIndex;
108 Syntax::Quantity quantityIndex;
109 Syntax::ReservedName reservedNameIndex;
110 Syntax::DelimGeneral delimGeneralIndex;
114 class AllowedSdParams {
116 AllowedSdParams(SdParam::Type,
117 SdParam::Type = SdParam::invalid,
118 SdParam::Type = SdParam::invalid,
119 SdParam::Type = SdParam::invalid,
120 SdParam::Type = SdParam::invalid,
121 SdParam::Type = SdParam::invalid);
122 Boolean param(SdParam::Type) const;
123 SdParam::Type get(int i) const;
125 enum { maxAllow = 6 };
126 SdParam::Type allow_[maxAllow];
129 class AllowedSdParamsMessageArg : public MessageArg {
131 AllowedSdParamsMessageArg(const AllowedSdParams &allow,
132 const ConstPtr<Sd> &sd);
133 MessageArg *copy() const;
134 void append(MessageBuilder &) const;
136 AllowedSdParams allow_;
140 struct StandardSyntaxSpec {
141 struct AddedFunction {
143 Syntax::FunctionClass functionClass;
144 SyntaxChar syntaxChar;
146 const AddedFunction *addedFunction;
147 size_t nAddedFunction;
151 static StandardSyntaxSpec::AddedFunction coreFunctions[] = {
152 { "TAB", Syntax::cSEPCHAR, 9 },
155 static StandardSyntaxSpec coreSyntax = {
156 coreFunctions, SIZEOF(coreFunctions), 0
159 static StandardSyntaxSpec refSyntax = {
160 coreFunctions, SIZEOF(coreFunctions), 1
163 void Parser::doInit()
169 // When document entity doesn't exist, don't give any errors
170 // other than the cannot open error.
171 if (currentInput()->get(messenger()) == InputSource::eE) {
172 if (currentInput()->accessError()) {
178 currentInput()->ungetToken();
179 const CharsetInfo &initCharset = sd().docCharset();
180 ISet<WideChar> missing;
181 findMissingMinimum(initCharset, missing);
182 if (!missing.isEmpty()) {
183 message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing));
189 if (scanForSgmlDecl(initCharset))
192 currentInput()->ungetToken();
193 if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) {
194 InputSource *in = entityManager().open(systemId,
196 new InputSourceOrigin,
201 if (scanForSgmlDecl(initCharset))
204 message(ParserMessages::badDefaultSgmlDecl);
211 if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) {
212 size_t nS = currentInput()->currentTokenLength() - 6;
213 for (size_t i = 0; i < nS; i++)
214 currentMarkup()->addS(currentInput()->currentTokenStart()[i]);
215 currentMarkup()->addDelim(Syntax::dMDO);
216 currentMarkup()->addSdReservedName(Sd::rSGML,
217 currentInput()->currentTokenStart()
218 + (currentInput()->currentTokenLength() - 4),
221 Syntax *syntaxp = new Syntax(sd());
222 CharSwitcher switcher;
223 if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(),
228 syntaxp->implySgmlChar(sd().docCharset());
231 ConstPtr<Sd> refSd(sdPointer());
232 ConstPtr<Syntax> refSyntax(syntaxPointer());
233 if (!parseSgmlDecl()) {
237 // queue an SGML declaration event
238 eventHandler().sgmlDecl(new (eventAllocator())
239 SgmlDeclEvent(sdPointer(),
241 instanceSyntaxPointer(),
244 currentInput()->nextIndex(),
248 if (inputLevel() == 2) {
249 // FIXME perhaps check for junk after SGML declaration
254 if (!implySgmlDecl()) {
258 // queue an SGML declaration event
259 eventHandler().sgmlDecl(new (eventAllocator())
260 SgmlDeclEvent(sdPointer(),
264 // Now we have sd and syntax set up, prepare to parse the prolog.
265 compilePrologModes();
266 setPhase(prologPhase);
269 Boolean Parser::implySgmlDecl()
271 Syntax *syntaxp = new Syntax(sd());
272 const StandardSyntaxSpec *spec;
273 if (options().shortref)
277 CharSwitcher switcher;
278 if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher))
280 syntaxp->implySgmlChar(sd().docCharset());
281 for (int i = 0; i < Syntax::nQuantity; i++)
282 syntaxp->setQuantity(i, options().quantity[i]);
287 Boolean Parser::setStandardSyntax(Syntax &syn,
288 const StandardSyntaxSpec &spec,
289 const CharsetInfo &docCharset,
290 CharSwitcher &switcher)
292 static UnivCharsetDesc::Range syntaxCharsetRanges[] = {
295 static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges,
296 SIZEOF(syntaxCharsetRanges));
297 static CharsetInfo syntaxCharset(syntaxCharsetDesc);
300 if (!checkSwitches(switcher, syntaxCharset))
303 for (i = 0; i < switcher.nSwitches(); i++)
304 if (switcher.switchTo(i) >= 128)
305 message(ParserMessages::switchNotInCharset,
306 NumberMessageArg(switcher.switchTo(i)));
307 static const Char shunchar[] = {
308 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
309 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31,
313 for (i = 0; i < SIZEOF(shunchar); i++)
314 syn.addShunchar(shunchar[i]);
315 syn.setShuncharControls();
316 static Syntax::StandardFunction standardFunctions[3] = {
317 Syntax::fRE, Syntax::fRS, Syntax::fSPACE
319 static SyntaxChar functionChars[3] = { 13, 10, 32 };
320 for (i = 0; i < 3; i++) {
322 if (translateSyntax(switcher,
327 && checkNotFunction(syn, docChar))
328 syn.setStandardFunction(standardFunctions[i], docChar);
332 for (i = 0; i < spec.nAddedFunction; i++) {
334 if (translateSyntax(switcher,
337 spec.addedFunction[i].syntaxChar,
339 && checkNotFunction(syn, docChar))
340 syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name),
341 spec.addedFunction[i].functionClass,
347 static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.'
348 ISet<Char> nameCharSet;
349 for (i = 0; i < 2; i++) {
351 if (translateSyntax(switcher,
356 nameCharSet.add(docChar);
360 if (!checkNmchars(nameCharSet, syn))
363 syn.addNameCharacters(nameCharSet);
364 syn.setNamecaseGeneral(1);
365 syn.setNamecaseEntity(0);
366 if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher))
368 setRefNames(syn, docCharset);
369 syn.enterStandardFunctionNames();
371 && !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher))
376 Boolean Parser::setRefDelimGeneral(Syntax &syntax,
377 const CharsetInfo &syntaxCharset,
378 const CharsetInfo &docCharset,
379 CharSwitcher &switcher)
381 // Column 3 from Figure 3
382 static const char delims[][2] = {
416 ISet<WideChar> missing;
417 for (int i = 0; i < Syntax::nDelimGeneral; i++)
418 if (syntax.delimGeneral(i).size() == 0) {
421 for (j = 0; j < 2 && delims[i][j] != '\0'; j++) {
422 UnivChar univChar = translateUniv(delims[i][j], switcher,
425 if (univToDescCheck(docCharset, univChar, c))
432 if (delim.size() == j) {
433 if (checkGeneralDelim(syntax, delim))
434 syntax.setDelimGeneral(i, delim);
439 if (!missing.isEmpty())
440 message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
444 void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset)
446 static const char *const referenceNames[] = {
505 for (i = 0; i < Syntax::nNames; i++) {
506 StringC docName(docCharset.execToDesc(referenceNames[i]));
507 Syntax::ReservedName tem;
508 if (syntax.lookupReservedName(docName, &tem))
509 message(ParserMessages::nameReferenceReservedName,
510 StringMessageArg(docName));
511 if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0)
512 syntax.setName(i, docName);
516 Boolean Parser::addRefDelimShortref(Syntax &syntax,
517 const CharsetInfo &syntaxCharset,
518 const CharsetInfo &docCharset,
519 CharSwitcher &switcher)
521 // Column 2 from Figure 4
522 static const char delimShortref[][3] = {
556 ISet<WideChar> missing;
558 for (size_t i = 0; i < SIZEOF(delimShortref); i++) {
562 for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) {
564 UnivChar univChar = translateUniv(delimShortref[i][j], switcher,
566 if (univToDescCheck(docCharset, univChar, c))
571 if (delim.size() == j) {
572 if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim))
573 message(ParserMessages::duplicateDelimShortref,
574 StringMessageArg(delim));
576 syntax.addDelimShortref(delim, docCharset);
579 if (!missing.isEmpty())
580 message(ParserMessages::missingSignificant646, CharsetMessageArg(missing));
584 // Determine whether the document starts with an SGML declaration.
585 // There is no current syntax at this point.
587 Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset)
590 if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs))
593 if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re))
596 if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space))
599 if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab))
601 InputSource *in = currentInput();
602 Xchar c = in->get(messenger());
603 while (c == rs || c == space || c == re || c == tab)
604 c = in->tokenChar(messenger());
605 if (c != initCharset.execToDesc('<'))
607 if (in->tokenChar(messenger()) != initCharset.execToDesc('!'))
609 c = in->tokenChar(messenger());
610 if (c != initCharset.execToDesc('S')
611 && c != initCharset.execToDesc('s'))
613 c = in->tokenChar(messenger());
614 if (c != initCharset.execToDesc('G')
615 && c != initCharset.execToDesc('g'))
617 c = in->tokenChar(messenger());
618 if (c != initCharset.execToDesc('M')
619 && c != initCharset.execToDesc('m'))
621 c = in->tokenChar(messenger());
622 if (c != initCharset.execToDesc('L')
623 && c != initCharset.execToDesc('l'))
625 c = in->tokenChar(messenger());
626 // Don't recognize this if SGML is followed by a name character.
627 if (c == InputSource::eE)
629 in->endToken(in->currentTokenLength() - 1);
630 if (c == initCharset.execToDesc('-'))
632 if (c == initCharset.execToDesc('.'))
635 if (!initCharset.descToUniv(c, univ))
637 if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26)
639 if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26)
641 if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10)
646 void Parser::findMissingMinimum(const CharsetInfo &charset,
647 ISet<WideChar> &missing)
651 for (i = 0; i < 26; i++) {
652 if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to))
653 missing += UnivCharsetDesc::A + i;
654 if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to))
655 missing += UnivCharsetDesc::a + i;
657 for (i = 0; i < 10; i++) {
659 if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to))
660 missing += UnivCharsetDesc::zero + i;
662 static const UnivChar special[] = {
663 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63
666 for (i = 0; i < SIZEOF(special); i++)
667 if (!univToDescCheck(charset, special[i], to))
668 missing += special[i];
672 Boolean Parser::parseSgmlDecl()
677 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
679 StringC version(sd().execToDoc("ISO 8879:1986"));
680 if (parm.literalText.string() != version)
681 message(ParserMessages::standardVersion,
682 StringMessageArg(parm.literalText.string()));
683 sdBuilder.sd = new Sd;
684 typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
685 static SdParser parsers[] = {
686 &Parser::sdParseDocumentCharset,
687 &Parser::sdParseCapacity,
688 &Parser::sdParseScope,
689 &Parser::sdParseSyntax,
690 &Parser::sdParseFeatures,
691 &Parser::sdParseAppinfo,
693 for (size_t i = 0; i < SIZEOF(parsers); i++) {
694 if (!(this->*(parsers[i]))(sdBuilder, parm))
696 if (!sdBuilder.valid)
699 if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm))
701 if (sdBuilder.sd->formal()) {
702 while (!sdBuilder.formalErrorList.empty()) {
703 SdFormalError *p = sdBuilder.formalErrorList.get();
704 ParserState *state = this; // work around lcc 3.0 bug
709 setSd(sdBuilder.sd.pointer());
710 if (sdBuilder.sd->scopeInstance()) {
711 Syntax *proSyntax = new Syntax(sd());
712 CharSwitcher switcher;
713 setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher);
714 proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar));
715 ISet<WideChar> invalidSgmlChar;
716 proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(),
717 sdBuilder.syntax.pointer(),
719 sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
722 if (!invalidSgmlChar.isEmpty())
723 message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
724 setSyntaxes(proSyntax, sdBuilder.syntax.pointer());
727 setSyntax(sdBuilder.syntax.pointer());
728 if (syntax().multicode())
729 currentInput()->setMarkupScanTable(syntax().markupScanTable());
733 Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm)
735 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET),
738 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
742 UnivCharsetDesc desc;
743 if (!sdParseCharset(sdBuilder, parm, 1, decl, desc))
745 ISet<WideChar> missing;
746 findMissingMinimum(desc, missing);
747 if (!missing.isEmpty()) {
748 message(ParserMessages::missingMinimumChars,
749 CharsetMessageArg(missing));
753 decl.usedSet(sgmlChar);
754 sdBuilder.sd->setDocCharsetDesc(desc);
755 sdBuilder.sd->setDocCharsetDecl(decl);
756 sdBuilder.syntax = new Syntax(*sdBuilder.sd);
757 sdBuilder.syntax->setSgmlChar(sgmlChar);
761 Boolean Parser::sdParseCharset(SdBuilder &sdBuilder,
765 UnivCharsetDesc &desc)
768 ISet<WideChar> multiplyDeclared;
769 // This is for checking whether the syntax reference character set
770 // is ISO 646 when SCOPE is INSTANCE.
771 Boolean maybeISO646 = 1;
773 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
775 UnivCharsetDesc baseDesc;
778 PublicId::TextClass textClass;
779 const MessageType1 *err;
780 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
781 sdBuilder.addFormalError(currentLocation(),
784 else if (id.getTextClass(textClass)
785 && textClass != PublicId::CHARSET)
786 sdBuilder.addFormalError(currentLocation(),
787 ParserMessages::basesetTextClass,
790 if (referencePublic(id, PublicId::CHARSET, givenError))
791 found = sdParseExternalCharset(*sdBuilder.sd, baseDesc);
792 else if (!givenError) {
793 found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc);
794 if (!found && options().warnSgmlDecl)
795 message(ParserMessages::unknownBaseset, StringMessageArg(id.string()));
802 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET),
805 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
808 WideChar min = parm.n;
809 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
811 Number count = parm.n;
813 if (options().warnSgmlDecl && count == 0)
814 message(ParserMessages::zeroNumberOfCharacters);
815 decl.rangeDeclared(min, count, multiplyDeclared);
818 && (min > charMax || count - 1 > charMax - min)) {
819 message(ParserMessages::documentCharMax, NumberMessageArg(charMax));
820 adjCount = min > charMax ? 0 : 1 + (charMax - min);
826 if (!parseSdParam(AllowedSdParams(SdParam::number,
827 SdParam::minimumLiteral,
828 SdParam::reservedName + Sd::rUNUSED),
832 case SdParam::number:
833 decl.addRange(min, count, parm.n);
834 if (found && adjCount > 0) {
835 ISet<WideChar> baseMissing;
836 desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n,
838 if (!baseMissing.isEmpty() && options().warnSgmlDecl)
839 message(ParserMessages::basesetCharsMissing,
840 CharsetMessageArg(baseMissing));
843 case SdParam::reservedName + Sd::rUNUSED:
844 decl.addRange(min, count);
846 case SdParam::minimumLiteral:
848 UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string());
849 if (adjCount > 256) {
850 message(ParserMessages::tooManyCharsMinimumLiteral);
853 for (Number i = 0; i < adjCount; i++)
854 desc.addRange(min + i, min + i, c);
857 decl.addRange(min, count, parm.literalText.string());
862 SdParam::Type follow = (isDocument
863 ? SdParam::reservedName + Sd::rCAPACITY
864 : SdParam::reservedName + Sd::rFUNCTION);
865 if (!parseSdParam(AllowedSdParams(SdParam::number,
866 SdParam::reservedName + Sd::rBASESET,
871 } while (parm.type == SdParam::number);
872 } while (parm.type == SdParam::reservedName + Sd::rBASESET);
873 if (!multiplyDeclared.isEmpty())
874 message(ParserMessages::duplicateCharNumbers,
875 CharsetMessageArg(multiplyDeclared));
876 ISet<WideChar> declaredSet;
877 decl.declaredSet(declaredSet);
878 ISetIter<WideChar> iter(declaredSet);
879 WideChar min, max, lastMax;
880 if (iter.next(min, max)) {
881 ISet<WideChar> holes;
883 while (iter.next(min, max)) {
884 if (min - lastMax > 1)
885 holes.addRange(lastMax + 1, min - 1);
888 if (!holes.isEmpty())
889 message(ParserMessages::codeSetHoles, CharsetMessageArg(holes));
891 if (!isDocument && sdBuilder.sd->scopeInstance()) {
892 // If scope is INSTANCE, syntax reference character set
893 // must be same as reference.
894 UnivCharsetDescIter iter(desc);
895 WideChar descMin, descMax;
897 if (!iter.next(descMin, descMax, univMin)
902 message(ParserMessages::scopeInstanceSyntaxCharset);
907 Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc)
911 if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE),
914 if (parm.type == SdParam::eE)
916 WideChar min = parm.n;
917 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
919 Number count = parm.n;
920 if (!parseSdParam(AllowedSdParams(SdParam::number,
921 SdParam::minimumLiteral,
922 SdParam::reservedName + Sd::rUNUSED),
925 if (parm.type == SdParam::number) {
927 desc.addRange(min, min + (count - 1), parm.n);
929 else if (parm.type == SdParam::minimumLiteral) {
930 UnivChar c = sd.nameToUniv(parm.literalText.string());
932 message(ParserMessages::tooManyCharsMinimumLiteral);
935 for (Number i = 0; i < count; i++)
936 desc.addRange(min + i, min + i, c);
943 Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm)
945 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC,
946 SdParam::reservedName + Sd::rSGMLREF),
950 if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
951 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
954 PublicId::TextClass textClass;
955 const MessageType1 *err;
956 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
957 sdBuilder.addFormalError(currentLocation(),
960 else if (id.getTextClass(textClass)
961 && textClass != PublicId::CAPACITY)
962 sdBuilder.addFormalError(currentLocation(),
963 ParserMessages::capacityTextClass,
965 const StringC &str = id.string();
966 if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN")
967 && str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) {
969 if (referencePublic(id, PublicId::CAPACITY, givenError))
971 else if (!givenError)
972 message(ParserMessages::unknownCapacitySet, StringMessageArg(str));
975 return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
979 PackedBoolean capacitySpecified[Sd::nCapacity];
981 for (i = 0; i < Sd::nCapacity; i++)
982 capacitySpecified[i] = 0;
983 if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm))
986 Sd::Capacity capacityIndex = parm.capacityIndex;
987 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
990 if (!capacitySpecified[capacityIndex]) {
991 sdBuilder.sd->setCapacity(capacityIndex, parm.n);
992 capacitySpecified[capacityIndex] = 1;
994 else if (options().warnSgmlDecl)
995 message(ParserMessages::duplicateCapacity,
996 StringMessageArg(sd().capacityName(i)));
997 int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE;
998 if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final),
1001 } while (parm.type == SdParam::capacityName);
1002 Number totalcap = sdBuilder.sd->capacity(0);
1003 for (i = 1; i < Sd::nCapacity; i++)
1004 if (sdBuilder.sd->capacity(i) > totalcap)
1005 message(ParserMessages::capacityExceedsTotalcap,
1006 StringMessageArg(sd().capacityName(i)));
1008 return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE),
1013 Boolean Parser::referencePublic(const PublicId &id,
1014 PublicId::TextClass entityType,
1015 Boolean &givenError)
1019 if (entityCatalog().lookupPublic(id.string(),
1023 Location loc = currentLocation();
1024 eventHandler().sgmlDeclEntity(new (eventAllocator())
1025 SgmlDeclEntityEvent(id,
1029 Ptr<EntityOrigin> origin(new EntityOrigin(loc));
1030 if (currentMarkup())
1031 currentMarkup()->addEntityStart(origin);
1032 InputSource *in = entityManager().open(sysid,
1047 Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm)
1049 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE,
1050 SdParam::reservedName + Sd::rDOCUMENT),
1053 if (parm.type == SdParam::reservedName + Sd::rINSTANCE)
1054 sdBuilder.sd->setScopeInstance();
1058 Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm)
1060 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX),
1063 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR,
1064 SdParam::reservedName + Sd::rPUBLIC),
1068 if (parm.type == SdParam::reservedName + Sd::rPUBLIC) {
1069 if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm))
1072 const MessageType1 *err;
1073 PublicId::TextClass textClass;
1074 if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err))
1075 sdBuilder.addFormalError(currentLocation(),
1078 else if (id.getTextClass(textClass)
1079 && textClass != PublicId::SYNTAX)
1080 sdBuilder.addFormalError(currentLocation(),
1081 ParserMessages::syntaxTextClass,
1083 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES,
1084 SdParam::reservedName + Sd::rSWITCHES),
1087 Vector<UnivChar> charSwitches;
1088 if (parm.type == SdParam::reservedName + Sd::rSWITCHES) {
1089 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1092 SyntaxChar c = parm.n;
1093 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1095 sdBuilder.switcher.addSwitch(c, parm.n);
1096 if (!parseSdParam(AllowedSdParams(SdParam::number,
1097 SdParam::reservedName
1101 if (parm.type != SdParam::number)
1105 const StandardSyntaxSpec *spec = lookupSyntax(id);
1107 if (!setStandardSyntax(*sdBuilder.syntax,
1109 sdBuilder.sd->docCharset(),
1110 sdBuilder.switcher))
1111 sdBuilder.valid = 0;
1115 if (referencePublic(id, PublicId::SYNTAX, givenError)) {
1116 sdBuilder.externalSyntax = 1;
1118 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1122 if (!sdParseExplicitSyntax(sdBuilder, parm2))
1127 message(ParserMessages::unknownPublicSyntax,
1128 StringMessageArg(id.string()));
1129 sdBuilder.valid = 0;
1134 if (!sdParseExplicitSyntax(sdBuilder, parm))
1137 if (!sdBuilder.sd->scopeInstance()) {
1138 // we know the significant chars now
1139 ISet<WideChar> invalidSgmlChar;
1140 sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(),
1143 if (!invalidSgmlChar.isEmpty())
1144 message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar));
1146 checkSyntaxNamelen(*sdBuilder.syntax);
1147 checkSwitchesMarkup(sdBuilder.switcher);
1151 Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder,
1154 typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &);
1155 static SdParser parsers[] = {
1156 &Parser::sdParseShunchar,
1157 &Parser::sdParseSyntaxCharset,
1158 &Parser::sdParseFunction,
1159 &Parser::sdParseNaming,
1160 &Parser::sdParseDelim,
1161 &Parser::sdParseNames,
1162 &Parser::sdParseQuantity
1164 for (size_t i = 0; i < SIZEOF(parsers); i++)
1165 if (!(this->*(parsers[i]))(sdBuilder, parm))
1170 const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id)
1172 PublicId::OwnerType ownerType;
1173 if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO)
1176 if (!id.getOwner(str))
1178 if (str != sd().execToDoc("ISO 8879:1986")
1179 && str != sd().execToDoc("ISO 8879-1986"))
1181 PublicId::TextClass textClass;
1182 if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX)
1184 if (!id.getDescription(str))
1186 if (str == sd().execToDoc("Reference"))
1188 if (str == sd().execToDoc("Core"))
1193 Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm)
1195 UnivCharsetDesc desc;
1196 if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc))
1198 sdBuilder.syntaxCharset.set(desc);
1199 checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset);
1200 for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++)
1201 if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i)))
1202 message(ParserMessages::switchNotInCharset,
1203 NumberMessageArg(sdBuilder.switcher.switchTo(i)));
1204 ISet<WideChar> missing;
1205 findMissingMinimum(sdBuilder.syntaxCharset, missing);
1206 if (!missing.isEmpty())
1207 message(ParserMessages::missingMinimumChars,
1208 CharsetMessageArg(missing));
1212 Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm)
1214 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
1215 SdParam::reservedName + Sd::rCONTROLS,
1216 SdParam::number), parm))
1218 if (parm.type == SdParam::reservedName + Sd::rNONE) {
1219 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET),
1224 if (parm.type == SdParam::reservedName + Sd::rCONTROLS)
1225 sdBuilder.syntax->setShuncharControls();
1227 if (parm.n <= charMax)
1228 sdBuilder.syntax->addShunchar(Char(parm.n));
1231 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET,
1232 SdParam::number), parm))
1234 if (parm.type != SdParam::number)
1236 if (parm.n <= charMax)
1237 sdBuilder.syntax->addShunchar(Char(parm.n));
1242 Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm)
1244 static Sd::ReservedName standardNames[3] = {
1245 Sd::rRE, Sd::rRS, Sd::rSPACE
1247 for (int i = 0; i < 3; i++) {
1248 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1249 + standardNames[i]),
1252 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1255 if (translateSyntax(sdBuilder, parm.n, c)) {
1256 if (checkNotFunction(*sdBuilder.syntax, c))
1257 sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c);
1259 sdBuilder.valid = 0;
1262 Boolean haveMsichar = 0;
1263 Boolean haveMsochar = 0;
1265 if (!parseSdParam(sdBuilder.externalSyntax
1266 ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
1267 : AllowedSdParams(SdParam::name),
1270 Boolean nameWasLiteral;
1271 Boolean invalidName = 0;
1273 if (parm.type == SdParam::paramLiteral) {
1275 if (!translateSyntax(sdBuilder, parm.paramLiteralText, name))
1279 parm.token.swap(name);
1282 if (!parseSdParam(nameWasLiteral
1283 ? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
1284 SdParam::reservedName + Sd::rMSICHAR,
1285 SdParam::reservedName + Sd::rMSOCHAR,
1286 SdParam::reservedName + Sd::rMSSCHAR,
1287 SdParam::reservedName + Sd::rSEPCHAR)
1288 : AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR,
1289 SdParam::reservedName + Sd::rMSICHAR,
1290 SdParam::reservedName + Sd::rMSOCHAR,
1291 SdParam::reservedName + Sd::rMSSCHAR,
1292 SdParam::reservedName + Sd::rSEPCHAR,
1293 SdParam::reservedName + Sd::rLCNMSTRT),
1296 if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) {
1297 if (name != sd().reservedName(Sd::rNAMING))
1298 message(ParserMessages::namingBeforeLcnmstrt,
1299 StringMessageArg(name));
1302 if (!nameWasLiteral) {
1305 if (!translateName(sdBuilder, tem, name))
1308 Syntax::FunctionClass functionClass;
1309 switch (parm.type) {
1310 case SdParam::reservedName + Sd::rFUNCHAR:
1311 functionClass = Syntax::cFUNCHAR;
1313 case SdParam::reservedName + Sd::rMSICHAR:
1315 functionClass = Syntax::cMSICHAR;
1317 case SdParam::reservedName + Sd::rMSOCHAR:
1319 functionClass = Syntax::cMSOCHAR;
1321 case SdParam::reservedName + Sd::rMSSCHAR:
1322 functionClass = Syntax::cMSSCHAR;
1324 case SdParam::reservedName + Sd::rSEPCHAR:
1325 functionClass = Syntax::cSEPCHAR;
1330 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1333 if (translateSyntax(sdBuilder, parm.n, c)
1334 && checkNotFunction(*sdBuilder.syntax, c)
1337 if (sdBuilder.syntax->lookupFunctionChar(name, &tem))
1338 message(ParserMessages::duplicateFunctionName, StringMessageArg(name));
1340 sdBuilder.syntax->addFunctionChar(name, functionClass, c);
1343 if (haveMsochar && !haveMsichar)
1344 message(ParserMessages::msocharRequiresMsichar);
1348 Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm)
1350 static Sd::ReservedName keys[4] = {
1351 Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE
1354 ISet<Char> nameStartChar;
1355 ISet<Char> nameChar;
1357 String<SyntaxChar> lc;
1358 Vector<size_t> rangeIndex;
1360 Boolean allowThrough = 0;
1362 if (!parseSdParam(sdBuilder.externalSyntax
1363 ? AllowedSdParams(SdParam::reservedName
1364 + keys[isNamechar * 2],
1365 SdParam::paramLiteral,
1369 ? AllowedSdParams(SdParam::paramLiteral)
1370 : AllowedSdParams(SdParam::reservedName
1371 + keys[isNamechar * 2])),
1375 Boolean wasRange = 0;
1376 sdParamConvertToLiteral(parm);
1377 if (parm.type == SdParam::ellipsis) {
1379 message(ParserMessages::sdInvalidEllipsis);
1380 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1384 sdParamConvertToLiteral(parm);
1385 if (parm.paramLiteralText.size() == 0)
1386 message(ParserMessages::sdInvalidEllipsis);
1387 else if (allowThrough) {
1388 SyntaxChar n = parm.paramLiteralText[0];
1389 if (n < lc[lc.size() - 1])
1390 message(ParserMessages::sdInvalidRange);
1391 else if (n > lc[lc.size() - 1] + 1)
1392 rangeIndex.push_back(lc.size() - 1);
1396 if (parm.type != SdParam::paramLiteral)
1398 lc += parm.paramLiteralText;
1399 allowThrough = (parm.paramLiteralText.size() - wasRange) > 0;
1402 size_t rangeIndexPos = 0;
1403 unsigned long rangeLeft = 0;
1404 SyntaxChar nextRangeChar;
1405 ISet<Char> &set = isNamechar ? nameChar : nameStartChar;
1406 String<SyntaxChar> chars;
1410 if (!parseSdParam(sdBuilder.externalSyntax
1411 ? AllowedSdParams(SdParam::reservedName
1412 + keys[isNamechar * 2 + 1],
1413 SdParam::paramLiteral,
1417 ? AllowedSdParams(SdParam::paramLiteral)
1418 : AllowedSdParams(SdParam::reservedName
1419 + keys[isNamechar * 2 + 1])),
1422 sdParamConvertToLiteral(parm);
1424 Boolean isRange = parm.type == SdParam::ellipsis;
1425 size_t nChars = chars.size();
1428 for (size_t i = 0; i < nChars; i++) {
1430 && rangeIndexPos < rangeIndex.size()
1431 && rangeIndex[rangeIndexPos] == lcPos) {
1432 rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
1433 nextRangeChar = lc[lcPos];
1438 if (rangeLeft > 0) {
1440 c = nextRangeChar++;
1442 else if (lcPos < lc.size())
1448 // map from c to chars[i]
1449 Char transLc, transUc;
1450 if (translateSyntax(sdBuilder, c, transLc)
1451 && translateSyntax(sdBuilder, chars[i], transUc)) {
1453 if (transLc != transUc) {
1455 sdBuilder.syntax->addSubst(transLc, transUc);
1460 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1464 sdParamConvertToLiteral(parm);
1465 if (chars.size() == 0 || parm.paramLiteralText.size() == 0)
1466 message(ParserMessages::sdInvalidEllipsis);
1468 SyntaxChar start = chars[chars.size() - 1];
1469 SyntaxChar end = parm.paramLiteralText[0];
1471 message(ParserMessages::sdInvalidRange);
1473 size_t count = end + 1 - start;
1476 && rangeIndexPos < rangeIndex.size()
1477 && rangeIndex[rangeIndexPos] == lcPos) {
1478 rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos];
1479 nextRangeChar = lc[lcPos];
1484 if (rangeLeft > 0) {
1486 c = nextRangeChar++;
1488 else if (lcPos < lc.size())
1494 if (c == start && count > 1 && (runOut || rangeLeft > 0)) {
1498 else if (rangeLeft < count)
1502 translateRange(sdBuilder, start, start + (count - 1), set);
1507 Char transLc, transUc;
1508 if (translateSyntax(sdBuilder, c, transLc)
1509 && translateSyntax(sdBuilder, start, transUc)) {
1511 if (transLc != transUc) {
1513 sdBuilder.syntax->addSubst(transLc, transUc);
1523 if (parm.type != SdParam::paramLiteral)
1525 chars.append(parm.paramLiteralText.data() + 1,
1526 parm.paramLiteralText.size() - 1);
1528 else if (parm.type == SdParam::paramLiteral)
1529 parm.paramLiteralText.swap(chars);
1533 if ((runOut && !sdBuilder.externalSyntax)
1534 || rangeLeft > 0 || lcPos < lc.size())
1536 ? ParserMessages::nmcharLength
1537 : ParserMessages::nmstrtLength);
1538 if (!checkNmchars(set, *sdBuilder.syntax))
1539 sdBuilder.valid = 0;
1540 } while (!isNamechar++);
1542 intersectCharSets(nameStartChar, nameChar, bad);
1543 if (!bad.isEmpty()) {
1544 sdBuilder.valid = 0;
1545 message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad));
1547 sdBuilder.syntax->addNameStartCharacters(nameStartChar);
1548 sdBuilder.syntax->addNameCharacters(nameChar);
1549 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
1552 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1553 SdParam::reservedName + Sd::rYES),
1556 sdBuilder.syntax->setNamecaseGeneral(parm.type
1557 == SdParam::reservedName + Sd::rYES);
1559 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY),
1562 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1563 SdParam::reservedName + Sd::rYES),
1566 sdBuilder.syntax->setNamecaseEntity(parm.type
1567 == SdParam::reservedName + Sd::rYES);
1571 Boolean Parser::checkNmchars(const ISet<Char> &set, const Syntax &syntax)
1575 intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad);
1576 if (!bad.isEmpty()) {
1577 message(ParserMessages::nmcharLetter, CharsetMessageArg(bad));
1581 intersectCharSets(set, *syntax.charSet(Syntax::digit), bad);
1582 if (!bad.isEmpty()) {
1583 message(ParserMessages::nmcharDigit, CharsetMessageArg(bad));
1588 if (syntax.getStandardFunction(Syntax::fRE, funChar)
1589 && set.contains(funChar)) {
1590 message(ParserMessages::nmcharRe, NumberMessageArg(funChar));
1593 if (syntax.getStandardFunction(Syntax::fRS, funChar)
1594 && set.contains(funChar)) {
1595 message(ParserMessages::nmcharRs, NumberMessageArg(funChar));
1598 if (syntax.getStandardFunction(Syntax::fSPACE, funChar)
1599 && set.contains(funChar)) {
1600 message(ParserMessages::nmcharSpace, NumberMessageArg(funChar));
1603 intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad);
1604 if (!bad.isEmpty()) {
1605 message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad));
1611 // Result is a ISet<WideChar>, so it can be used with CharsetMessageArg.
1613 void Parser::intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
1614 ISet<WideChar> &inter)
1616 ISetIter<Char> i1(s1);
1617 ISetIter<Char> i2(s2);
1618 Char min1, max1, min2, max2;
1619 if (!i1.next(min1, max1))
1621 if (!i2.next(min2, max2))
1625 if (!i1.next(min1, max1))
1628 else if (max2 < min1) {
1629 if (!i2.next(min2, max2))
1635 Char min = min1 > min2 ? min1 : min2;
1636 Char max = max1 < max2 ? max1 : max2;
1637 inter.addRange(min, max);
1638 if (!i1.next(min1, max1))
1640 if (!i2.next(min2, max2))
1646 Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm)
1648 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM),
1651 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL),
1654 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1657 PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral];
1658 for (int i = 0; i < Syntax::nDelimGeneral; i++)
1659 delimGeneralSpecified[i] = 0;
1661 if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName,
1662 SdParam::reservedName + Sd::rSHORTREF),
1665 if (parm.type == SdParam::reservedName + Sd::rSHORTREF)
1667 Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex;
1668 if (delimGeneralSpecified[delimGeneral])
1669 message(ParserMessages::duplicateDelimGeneral,
1670 StringMessageArg(sd().generalDelimiterName(delimGeneral)));
1671 if (!parseSdParam(sdBuilder.externalSyntax
1672 ? AllowedSdParams(SdParam::paramLiteral,
1674 : AllowedSdParams(SdParam::paramLiteral),
1677 sdParamConvertToLiteral(parm);
1679 if (parm.paramLiteralText.size() == 0)
1680 message(ParserMessages::sdEmptyDelimiter);
1681 else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) {
1682 const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
1683 for (size_t i = 0; i < str.size(); i++)
1684 table->subst(str[i]);
1685 if (checkGeneralDelim(*sdBuilder.syntax, str)
1686 && !delimGeneralSpecified[delimGeneral])
1687 sdBuilder.syntax->setDelimGeneral(delimGeneral, str);
1689 sdBuilder.valid = 0;
1691 delimGeneralSpecified[delimGeneral] = 1;
1693 if (!setRefDelimGeneral(*sdBuilder.syntax,
1694 sdBuilder.syntaxCharset,
1695 sdBuilder.sd->docCharset(),
1696 sdBuilder.switcher))
1697 sdBuilder.valid = 0;
1698 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF,
1699 SdParam::reservedName + Sd::rNONE),
1702 if (parm.type == SdParam::reservedName + Sd::rSGMLREF) {
1703 if (!addRefDelimShortref(*sdBuilder.syntax,
1704 sdBuilder.syntaxCharset,
1705 sdBuilder.sd->docCharset(),
1706 sdBuilder.switcher))
1707 sdBuilder.valid = 0;
1709 String<SyntaxChar> lastLiteral;
1711 if (!parseSdParam(sdBuilder.externalSyntax
1712 ? AllowedSdParams(SdParam::paramLiteral,
1715 SdParam::reservedName + Sd::rNAMES)
1716 : AllowedSdParams(SdParam::paramLiteral,
1717 SdParam::reservedName + Sd::rNAMES),
1720 sdParamConvertToLiteral(parm);
1721 if (parm.type == SdParam::ellipsis) {
1722 if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral,
1726 sdParamConvertToLiteral(parm);
1727 if (parm.paramLiteralText.size() == 0)
1728 message(ParserMessages::sdEmptyDelimiter);
1729 else if (lastLiteral.size() != 1
1730 || parm.paramLiteralText.size() != 1)
1731 message(ParserMessages::sdInvalidEllipsis);
1732 else if (parm.paramLiteralText[0] < lastLiteral[0])
1733 message(ParserMessages::sdInvalidRange);
1734 else if (parm.paramLiteralText[0] != lastLiteral[0]) {
1735 ISet<Char> shortrefChars;
1736 translateRange(sdBuilder,
1738 parm.paramLiteralText[0],
1740 ISet<WideChar> duplicates;
1741 intersectCharSets(shortrefChars,
1742 sdBuilder.syntax->delimShortrefSimple(),
1744 int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex();
1745 for (int i = 0; i < nComplexShortrefs; i++) {
1746 const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i);
1747 if (delim.size() == 1 && shortrefChars.contains(delim[0]))
1748 duplicates.add(delim[0]);
1750 if (!duplicates.isEmpty())
1751 message(ParserMessages::duplicateDelimShortrefSet,
1752 CharsetMessageArg(duplicates));
1753 sdBuilder.syntax->addDelimShortrefs(shortrefChars,
1754 sdBuilder.sd->docCharset());
1756 lastLiteral.resize(0);
1758 else if (parm.type == SdParam::paramLiteral) {
1759 parm.paramLiteralText.swap(lastLiteral);
1761 if (lastLiteral.size() == 0)
1762 message(ParserMessages::sdEmptyDelimiter);
1763 else if (translateSyntax(sdBuilder, lastLiteral, str)) {
1764 const SubstTable<Char> *table = sdBuilder.syntax->generalSubstTable();
1765 for (size_t i = 0; i < str.size(); i++)
1766 table->subst(str[i]);
1768 || checkShortrefDelim(*sdBuilder.syntax,
1769 sdBuilder.sd->docCharset(),
1771 if (sdBuilder.syntax->isValidShortref(str))
1772 message(ParserMessages::duplicateDelimShortref,
1773 StringMessageArg(str));
1775 sdBuilder.syntax->addDelimShortref(str,
1776 sdBuilder.sd->docCharset());
1786 Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm)
1788 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1792 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY,
1793 SdParam::referenceReservedName),
1796 if (parm.type == SdParam::reservedName + Sd::rQUANTITY)
1798 Syntax::ReservedName reservedName = parm.reservedNameIndex;
1799 if (!parseSdParam(sdBuilder.externalSyntax
1800 ? AllowedSdParams(SdParam::name, SdParam::paramLiteral)
1801 : AllowedSdParams(SdParam::name),
1805 if (parm.type == SdParam::name
1806 ? translateName(sdBuilder, parm.token, transName)
1807 : translateSyntax(sdBuilder, parm.paramLiteralText, transName)) {
1808 Syntax::ReservedName tem;
1809 if (sdBuilder.syntax->lookupReservedName(transName, &tem))
1810 message(ParserMessages::ambiguousReservedName,
1811 StringMessageArg(transName));
1813 if (transName.size() == 0
1814 || !sdBuilder.syntax->isNameStartCharacter(transName[0])) {
1815 message(ParserMessages::reservedNameSyntax,
1816 StringMessageArg(transName));
1817 transName.resize(0);
1820 // Check that its a valid name in the declared syntax
1821 // (- and . might not be name characters).
1822 for (i = 1; i < transName.size(); i++)
1823 if (!sdBuilder.syntax->isNameCharacter(transName[i])) {
1824 message(ParserMessages::reservedNameSyntax,
1825 StringMessageArg(transName));
1826 transName.resize(0);
1829 for (i = 0; i < transName.size(); i++)
1830 sdBuilder.syntax->generalSubstTable()->subst(transName[i]);
1831 if (sdBuilder.syntax->reservedName(reservedName).size() > 0)
1832 message(ParserMessages::duplicateReservedName,
1833 StringMessageArg(syntax().reservedName(reservedName)));
1834 else if (transName.size() > 0)
1835 sdBuilder.syntax->setName(reservedName, transName);
1837 sdBuilder.valid = 0;
1841 setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset());
1842 static Syntax::ReservedName functionNameIndex[3] = {
1843 Syntax::rRE, Syntax::rRS, Syntax::rSPACE
1845 for (int i = 0; i < 3; i++) {
1846 const StringC &functionName
1847 = sdBuilder.syntax->reservedName(functionNameIndex[i]);
1849 if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem))
1850 message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName));
1852 sdBuilder.syntax->enterStandardFunctionNames();
1856 Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm)
1858 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF),
1862 int final = (sdBuilder.externalSyntax
1864 : SdParam::reservedName + Sd::rFEATURES);
1865 if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm))
1867 if (parm.type != SdParam::quantityName)
1869 Syntax::Quantity quantity = parm.quantityIndex;
1870 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1872 sdBuilder.syntax->setQuantity(quantity, parm.n);
1874 if (sdBuilder.sd->scopeInstance()) {
1875 for (int i = 0; i < Syntax::nQuantity; i++)
1876 if (sdBuilder.syntax->quantity(Syntax::Quantity(i))
1877 < syntax().quantity(Syntax::Quantity(i)))
1878 message(ParserMessages::scopeInstanceQuantity,
1879 StringMessageArg(sd().quantityName(Syntax::Quantity(i))));
1884 Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm)
1886 struct FeatureInfo {
1887 Sd::ReservedName name;
1894 static FeatureInfo features[] = {
1895 { Sd::rMINIMIZE, FeatureInfo::__none },
1896 { Sd::rDATATAG, FeatureInfo::__boolean },
1897 { Sd::rOMITTAG, FeatureInfo::__boolean },
1898 { Sd::rRANK, FeatureInfo::__boolean },
1899 { Sd::rSHORTTAG, FeatureInfo::__boolean },
1900 { Sd::rLINK, FeatureInfo::__none },
1901 { Sd::rSIMPLE, FeatureInfo::__number },
1902 { Sd::rIMPLICIT, FeatureInfo::__boolean },
1903 { Sd::rEXPLICIT, FeatureInfo::__number },
1904 { Sd::rOTHER, FeatureInfo::__none },
1905 { Sd::rCONCUR, FeatureInfo::__number },
1906 { Sd::rSUBDOC, FeatureInfo::__number },
1907 { Sd::rFORMAL, FeatureInfo::__boolean }
1909 int booleanFeature = 0;
1910 int numberFeature = 0;
1911 for (size_t i = 0; i < SIZEOF(features); i++) {
1912 if (!parseSdParam(AllowedSdParams(SdParam::reservedName
1913 + features[i].name), parm))
1915 if (features[i].arg != FeatureInfo::__none) {
1916 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO,
1917 SdParam::reservedName + Sd::rYES),
1921 if (features[i].name == Sd::rDATATAG
1922 && parm.type == (SdParam::reservedName + Sd::rYES))
1923 message(ParserMessages::datatagNotImplemented);
1925 if (features[i].arg == FeatureInfo::__number) {
1926 if (parm.type == SdParam::reservedName + Sd::rYES) {
1927 if (!parseSdParam(AllowedSdParams(SdParam::number), parm))
1929 sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
1933 sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++),
1937 sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++),
1938 parm.type == (SdParam::reservedName
1945 Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm)
1947 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO),
1950 Location location(currentLocation());
1951 if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE,
1952 SdParam::minimumLiteral),
1955 AppinfoEvent *event;
1956 if (parm.type == SdParam::minimumLiteral)
1957 event = new (eventAllocator()) AppinfoEvent(parm.literalText, location);
1959 event = new (eventAllocator()) AppinfoEvent(location);
1960 eventHandler().appinfo(event);
1964 Boolean Parser::translateSyntax(CharSwitcher &switcher,
1965 const CharsetInfo &syntaxCharset,
1966 const CharsetInfo &docCharset,
1967 WideChar syntaxChar,
1970 syntaxChar = switcher.subst(syntaxChar);
1972 if (syntaxCharset.descToUniv(syntaxChar, univChar)
1973 && univToDescCheck(docCharset, univChar, docChar))
1975 message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
1979 void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start,
1980 SyntaxChar end, ISet<Char> &chars)
1985 if (!translateSyntax(sdBuilder, start, docChar))
1988 } while (start++ != end);
1991 SyntaxChar doneUpTo = end;
1992 Boolean gotSwitch = 0;
1993 WideChar firstSwitch;
1994 for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) {
1995 WideChar c = sdBuilder.switcher.switchFrom(i);
1996 if (start <= c && c <= end) {
2001 else if (c < firstSwitch)
2005 if (gotSwitch && firstSwitch == start) {
2008 if (translateSyntax(sdBuilder, start, docChar))
2013 doneUpTo = firstSwitch - 1;
2016 if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) {
2017 if (count - 1 < doneUpTo - start)
2018 doneUpTo = start + (count - 1);
2019 chars.addRange(docChar, docChar + (doneUpTo - start));
2022 if (doneUpTo == end)
2024 start = doneUpTo + 1;
2028 Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
2029 WideChar syntaxChar, Char &docChar)
2032 return translateSyntaxNoSwitch(sdBuilder,
2033 sdBuilder.switcher.subst(syntaxChar),
2038 Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder,
2039 WideChar syntaxChar, Char &docChar,
2044 CharsetDeclRange::Type type;
2046 if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar,
2052 ISet<WideChar> docChars;
2054 case CharsetDeclRange::unused:
2056 case CharsetDeclRange::string:
2057 sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars);
2059 case CharsetDeclRange::number:
2062 sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2);
2063 if (!docChars.isEmpty() && count2 < count)
2070 if (!docChars.isEmpty()) {
2071 if (!docChars.isSingleton() && options().warnSgmlDecl)
2072 message(ParserMessages::ambiguousDocCharacter,
2073 CharsetMessageArg(docChars));
2074 ISetIter<WideChar> iter(docChars);
2076 if (iter.next(min, max) && min <= charMax) {
2077 docChar = Char(min);
2083 WideChar alsoMax, count2;
2084 if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax)
2085 && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar,
2087 count = (alsoMax - syntaxChar) + 1;
2092 sdBuilder.valid = 0;
2093 message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar));
2098 Boolean Parser::translateSyntax(SdBuilder &sdBuilder,
2099 const String<SyntaxChar> &syntaxString,
2102 docString.resize(0);
2104 for (size_t i = 0; i < syntaxString.size(); i++) {
2106 if (translateSyntax(sdBuilder, syntaxString[i], c))
2114 Boolean Parser::translateName(SdBuilder &sdBuilder,
2115 const StringC &name,
2118 str.resize(name.size());
2119 for (size_t i = 0; i < name.size(); i++) {
2121 Boolean ret = sd().docCharset().descToUniv(name[i], univChar);
2122 // Might switch hyphen or period.
2123 univChar = translateUniv(univChar, sdBuilder.switcher,
2124 sdBuilder.syntaxCharset);
2126 if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) {
2127 message(ParserMessages::translateDocChar, NumberMessageArg(univChar));
2128 sdBuilder.valid = 0;
2135 UnivChar Parser::translateUniv(UnivChar univChar,
2136 CharSwitcher &switcher,
2137 const CharsetInfo &syntaxCharset)
2139 WideChar syntaxChar;
2140 ISet<WideChar> syntaxChars;
2141 if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) {
2142 message(ParserMessages::missingSyntaxChar,
2143 NumberMessageArg(univChar));
2146 SyntaxChar tem = switcher.subst(syntaxChar);
2147 if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar))
2148 message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem));
2152 Boolean Parser::checkNotFunction(const Syntax &syn, Char c)
2154 if (syn.charSet(Syntax::functionChar)->contains(c)) {
2155 message(ParserMessages::oneFunction, NumberMessageArg(c));
2163 // Check that it has at most one B sequence and that it
2164 // is not adjacent to a blank sequence.
2166 Boolean Parser::checkShortrefDelim(const Syntax &syn,
2167 const CharsetInfo &charset,
2168 const StringC &delim)
2171 Char letterB = charset.execToDesc('B');
2172 const ISet<Char> *bSet = syn.charSet(Syntax::blank);
2173 for (size_t i = 0; i < delim.size(); i++)
2174 if (delim[i] == letterB) {
2176 message(ParserMessages::multipleBSequence, StringMessageArg(delim));
2180 if (i > 0 && bSet->contains(delim[i - 1])) {
2181 message(ParserMessages::blankAdjacentBSequence,
2182 StringMessageArg(delim));
2185 while (i + 1 < delim.size() && delim[i + 1] == letterB)
2187 if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) {
2188 message(ParserMessages::blankAdjacentBSequence,
2189 StringMessageArg(delim));
2196 Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim)
2198 const ISet<Char> *functionSet = syn.charSet(Syntax::functionChar);
2199 if (delim.size() > 0) {
2200 Boolean allFunction = 1;
2201 for (size_t i = 0; i < delim.size(); i++)
2202 if (!functionSet->contains(delim[i]))
2205 message(ParserMessages::generalDelimAllFunction,
2206 StringMessageArg(delim));
2213 Boolean Parser::checkSwitches(CharSwitcher &switcher,
2214 const CharsetInfo &syntaxCharset)
2217 for (size_t i = 0; i < switcher.nSwitches(); i++) {
2219 c[0] = switcher.switchFrom(i);
2220 c[1] = switcher.switchTo(i);
2221 for (int j = 0; j < 2; j++) {
2223 if (syntaxCharset.descToUniv(c[j], univChar)) {
2224 // Check that it is not Digit Lcletter or Ucletter
2225 if ((UnivCharsetDesc::a <= univChar
2226 && univChar < UnivCharsetDesc::a + 26)
2227 || (UnivCharsetDesc::A <= univChar
2228 && univChar < UnivCharsetDesc::A + 26)
2229 || (UnivCharsetDesc::zero <= univChar
2230 && univChar < UnivCharsetDesc::zero + 10)) {
2231 message(ParserMessages::switchLetterDigit,
2232 NumberMessageArg(univChar));
2241 Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher)
2244 size_t nSwitches = switcher.nSwitches();
2245 for (size_t i = 0; i < nSwitches; i++)
2246 if (!switcher.switchUsed(i)) {
2247 // If the switch wasn't used,
2248 // then the character wasn't a markup character.
2249 message(ParserMessages::switchNotMarkup,
2250 NumberMessageArg(switcher.switchFrom(i)));
2256 void Parser::checkSyntaxNamelen(const Syntax &syn)
2258 size_t namelen = syn.namelen();
2260 for (i = 0; i < Syntax::nDelimGeneral; i++)
2261 if (syn.delimGeneral(i).size() > namelen)
2262 message(ParserMessages::delimiterLength,
2263 StringMessageArg(syn.delimGeneral(i)),
2264 NumberMessageArg(namelen));
2265 for (i = 0; i < syn.nDelimShortrefComplex(); i++)
2266 if (syn.delimShortrefComplex(i).size() > namelen)
2267 message(ParserMessages::delimiterLength,
2268 StringMessageArg(syn.delimShortrefComplex(i)),
2269 NumberMessageArg(namelen));
2270 for (i = 0; i < Syntax::nNames; i++)
2271 if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen
2272 && options().warnSgmlDecl)
2273 message(ParserMessages::reservedNameLength,
2274 StringMessageArg(syn.reservedName(Syntax::ReservedName(i))),
2275 NumberMessageArg(namelen));
2278 Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
2282 return univToDescCheck(charset, from, to, count);
2285 Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from,
2286 Char &to, WideChar &count)
2289 ISet<WideChar> descSet;
2290 unsigned ret = charset.univToDesc(from, c, descSet, count);
2292 if (options().warnSgmlDecl)
2293 message(ParserMessages::ambiguousDocCharacter,
2294 CharsetMessageArg(descSet));
2297 if (ret && c <= charMax) {
2304 Boolean Parser::parseSdParam(const AllowedSdParams &allow,
2308 Token token = getToken(mdMode);
2310 case tokenUnrecognized:
2311 if (reportNonSgmlCharacter())
2314 message(ParserMessages::markupDeclarationCharacter,
2315 StringMessageArg(currentToken()),
2316 AllowedSdParamsMessageArg(allow, sdPointer()));
2320 if (allow.param(SdParam::eE)) {
2321 parm.type = SdParam::eE;
2322 if (currentMarkup())
2323 currentMarkup()->addEntityEnd();
2327 message(ParserMessages::sdEntityEnd,
2328 AllowedSdParamsMessageArg(allow, sdPointer()));
2331 if (currentMarkup())
2332 currentMarkup()->addS(currentChar());
2335 if (!parseComment(sdcomMode))
2340 case tokenMinusGrpo:
2343 case tokenPeroNameStart:
2345 sdParamInvalidToken(token, allow);
2347 case tokenLcUcNmchar:
2348 if (allow.param(SdParam::ellipsis)) {
2349 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
2350 getCurrentToken(syntax().generalSubstTable(), parm.token);
2351 if (parm.token == sd().execToDoc("...")) {
2352 parm.type = SdParam::ellipsis;
2355 message(ParserMessages::sdInvalidNameToken,
2356 StringMessageArg(parm.token),
2357 AllowedSdParamsMessageArg(allow, sdPointer()));
2360 sdParamInvalidToken(token, allow);
2366 Boolean lita = (token == tokenLita);
2367 if (allow.param(SdParam::minimumLiteral)) {
2368 if (!parseMinimumLiteral(lita, parm.literalText))
2370 parm.type = SdParam::minimumLiteral;
2371 if (currentMarkup())
2372 currentMarkup()->addLiteral(parm.literalText);
2374 else if (allow.param(SdParam::paramLiteral)) {
2375 if (!parseSdParamLiteral(lita, parm.paramLiteralText))
2377 parm.type = SdParam::paramLiteral;
2380 sdParamInvalidToken(token, allow);
2386 if (allow.param(SdParam::mdc)) {
2387 parm.type = SdParam::mdc;
2388 if (currentMarkup())
2389 currentMarkup()->addDelim(Syntax::dMDC);
2392 sdParamInvalidToken(tokenMdc, allow);
2394 case tokenNameStart:
2396 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
2397 getCurrentToken(syntax().generalSubstTable(), parm.token);
2398 if (allow.param(SdParam::capacityName)) {
2399 if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) {
2400 parm.type = SdParam::capacityName;
2401 if (currentMarkup())
2402 currentMarkup()->addName(currentInput());
2406 if (allow.param(SdParam::referenceReservedName)) {
2407 if (syntax().lookupReservedName(parm.token,
2408 &parm.reservedNameIndex)) {
2409 parm.type = SdParam::referenceReservedName;
2410 if (currentMarkup())
2411 currentMarkup()->addName(currentInput());
2415 if (allow.param(SdParam::generalDelimiterName)) {
2416 if (sd().lookupGeneralDelimiterName(parm.token,
2417 parm.delimGeneralIndex)) {
2418 parm.type = SdParam::generalDelimiterName;
2419 if (currentMarkup())
2420 currentMarkup()->addName(currentInput());
2424 if (allow.param(SdParam::quantityName)) {
2425 if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) {
2426 parm.type = SdParam::quantityName;
2427 if (currentMarkup())
2428 currentMarkup()->addName(currentInput());
2432 for (int i = 0;; i++) {
2433 SdParam::Type t = allow.get(i);
2434 if (t == SdParam::invalid)
2436 if (t >= SdParam::reservedName) {
2437 Sd::ReservedName sdReservedName
2438 = Sd::ReservedName(t - SdParam::reservedName);
2439 if (parm.token == sd().reservedName(sdReservedName)) {
2441 if (currentMarkup())
2442 currentMarkup()->addSdReservedName(sdReservedName,
2448 if (allow.param(SdParam::name)) {
2449 parm.type = SdParam::name;
2450 if (currentMarkup())
2451 currentMarkup()->addName(currentInput());
2455 message(ParserMessages::sdInvalidNameToken,
2456 StringMessageArg(parm.token),
2457 AllowedSdParamsMessageArg(allow, sdPointer()));
2462 if (allow.param(SdParam::number)) {
2463 extendNumber(syntax().namelen(), ParserMessages::numberLength);
2464 parm.type = SdParam::number;
2466 if (!stringToNumber(currentInput()->currentTokenStart(),
2467 currentInput()->currentTokenLength(),
2469 || n > Number(-1)) {
2470 message(ParserMessages::numberTooBig,
2471 StringMessageArg(currentToken()));
2472 parm.n = Number(-1);
2475 if (currentMarkup())
2476 currentMarkup()->addNumber(currentInput());
2479 Token token = getToken(mdMode);
2480 if (token == tokenNameStart)
2481 message(ParserMessages::psRequired);
2482 currentInput()->ungetToken();
2485 sdParamInvalidToken(tokenDigit, allow);
2493 // This is a separate function, because we might want SyntaxChar
2494 // to be bigger than Char.
2496 Boolean Parser::parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str)
2498 Location loc(currentLocation());
2500 SdText text(loc, lita); // first character of content
2502 const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN);
2504 Mode mode = lita ? sdplitaMode : sdplitMode;
2507 Token token = getToken(mode);
2510 message(ParserMessages::literalLevel);
2512 case tokenUnrecognized:
2513 if (reportNonSgmlCharacter())
2515 if (options().errorSignificant)
2516 message(ParserMessages::sdLiteralSignificant,
2517 StringMessageArg(currentToken()));
2518 text.addChar(currentChar(), currentLocation());
2522 InputSource *in = currentInput();
2523 Location startLocation = currentLocation();
2524 in->discardInitial();
2525 extendNumber(syntax().namelen(), ParserMessages::numberLength);
2528 if (!stringToNumber(in->currentTokenStart(),
2529 in->currentTokenLength(),
2531 || n > syntaxCharMax) {
2532 message(ParserMessages::syntaxCharacterNumber,
2533 StringMessageArg(currentToken()));
2538 Owner<Markup> markupPtr;
2539 if (eventsWanted().wantPrologMarkup()) {
2540 markupPtr = new Markup;
2541 markupPtr->addDelim(Syntax::dCRO);
2542 markupPtr->addNumber(in);
2543 switch (getToken(refMode)) {
2545 markupPtr->addDelim(Syntax::dREFC);
2548 markupPtr->addRefEndRe();
2555 (void)getToken(refMode);
2557 text.addChar(SyntaxChar(n),
2558 Location(new NumericCharRefOrigin(startLocation,
2559 currentLocation().index()
2560 + currentInput()->currentTokenLength()
2561 - startLocation.index(),
2566 case tokenCroNameStart:
2567 if (!parseNamedCharRef())
2574 case tokenPeroNameStart:
2576 message(ParserMessages::sdParameterEntity);
2578 Location loc(currentLocation());
2579 const Char *p = currentInput()->currentTokenStart();
2580 for (size_t count = currentInput()->currentTokenLength();
2583 text.addChar(*p++, loc);
2589 if (text.string().size() > refLitlen
2590 && currentChar() == syntax().standardFunction(Syntax::fRE)) {
2591 message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen));
2592 // guess that the closing delimiter has been omitted
2593 message(ParserMessages::literalClosingDelimiter);
2596 text.addChar(currentChar(), currentLocation());
2601 if (text.string().size() > refLitlen)
2602 message(ParserMessages::parameterLiteralLength,
2603 NumberMessageArg(refLitlen));
2605 str = text.string();
2606 if (currentMarkup())
2607 currentMarkup()->addSdLiteral(text);
2611 Boolean Parser::stringToNumber(const Char *s, size_t length,
2612 unsigned long &result)
2614 unsigned long n = 0;
2615 for (; length > 0; length--, s++) {
2616 int val = sd().digitWeight(*s);
2617 if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val)
2626 void Parser::sdParamInvalidToken(Token token,
2627 const AllowedSdParams &allow)
2629 message(ParserMessages::sdParamInvalidToken,
2630 TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()),
2631 AllowedSdParamsMessageArg(allow, sdPointer()));
2634 void Parser::sdParamConvertToLiteral(SdParam &parm)
2636 if (parm.type == SdParam::number) {
2637 parm.type = SdParam::paramLiteral;
2638 parm.paramLiteralText.resize(1);
2639 parm.paramLiteralText[0] = parm.n;
2643 AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2,
2644 SdParam::Type arg3, SdParam::Type arg4,
2645 SdParam::Type arg5, SdParam::Type arg6)
2655 Boolean AllowedSdParams::param(SdParam::Type t) const
2657 for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++)
2663 SdParam::Type AllowedSdParams::get(int i) const
2665 return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i];
2668 AllowedSdParamsMessageArg::AllowedSdParamsMessageArg(
2669 const AllowedSdParams &allow,
2670 const ConstPtr<Sd> &sd)
2671 : allow_(allow), sd_(sd)
2675 MessageArg *AllowedSdParamsMessageArg::copy() const
2677 return new AllowedSdParamsMessageArg(*this);
2680 void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const
2682 for (int i = 0;; i++) {
2683 SdParam::Type type = allow_.get(i);
2684 if (type == SdParam::invalid)
2687 builder.appendFragment(ParserMessages::listSep);
2690 builder.appendFragment(ParserMessages::entityEnd);
2692 case SdParam::minimumLiteral:
2693 builder.appendFragment(ParserMessages::minimumLiteral);
2697 builder.appendFragment(ParserMessages::delimStart);
2698 Char c = sd_->execToDoc('>');
2699 builder.appendChars(&c, 1);
2700 builder.appendFragment(ParserMessages::delimEnd);
2703 case SdParam::number:
2704 builder.appendFragment(ParserMessages::number);
2707 builder.appendFragment(ParserMessages::name);
2709 case SdParam::paramLiteral:
2710 builder.appendFragment(ParserMessages::parameterLiteral);
2712 case SdParam::capacityName:
2713 builder.appendFragment(ParserMessages::capacityName);
2715 case SdParam::generalDelimiterName:
2716 builder.appendFragment(ParserMessages::generalDelimiteRoleName);
2718 case SdParam::referenceReservedName:
2719 builder.appendFragment(ParserMessages::referenceReservedName);
2721 case SdParam::quantityName:
2722 builder.appendFragment(ParserMessages::quantityName);
2724 case SdParam::ellipsis:
2726 StringC str(sd_->execToDoc("..."));
2727 builder.appendChars(str.data(), str.size());
2732 StringC str(sd_->reservedName(type - SdParam::reservedName));
2733 builder.appendChars(str.data(), str.size());
2740 SdBuilder::SdBuilder()
2741 : valid(1), externalSyntax(0)
2745 void SdBuilder::addFormalError(const Location &location,
2746 const MessageType1 &message,
2749 formalErrorList.insert(new SdFormalError(location, message, id));
2752 SdFormalError::SdFormalError(const Location &location,
2753 const MessageType1 &message,
2755 : location_(location),
2761 void SdFormalError::send(ParserState &parser)
2763 parser.Messenger::setNextLocation(location_);
2764 parser.message(*message_, StringMessageArg(id_));
2767 CharSwitcher::CharSwitcher()
2771 void CharSwitcher::addSwitch(WideChar from, WideChar to)
2773 switches_.push_back(from);
2774 switches_.push_back(to);
2775 switchUsed_.push_back(0);
2778 SyntaxChar CharSwitcher::subst(WideChar c)
2780 for (size_t i = 0; i < switches_.size(); i += 2)
2781 if (switches_[i] == c) {
2782 switchUsed_[i/2] = 1;
2783 return switches_[i + 1];
2788 size_t CharSwitcher::nSwitches() const
2790 return switchUsed_.size();
2793 Boolean CharSwitcher::switchUsed(size_t i) const
2795 return switchUsed_[i];
2798 WideChar CharSwitcher::switchFrom(size_t i) const
2800 return switches_[i*2];
2803 WideChar CharSwitcher::switchTo(size_t i) const
2805 return switches_[i*2 + 1];
2808 CharsetMessageArg::CharsetMessageArg(const ISet<WideChar> &set)
2813 MessageArg *CharsetMessageArg::copy() const
2815 return new CharsetMessageArg(*this);
2818 void CharsetMessageArg::append(MessageBuilder &builder) const
2820 ISetIter<WideChar> iter(set_);
2823 while (iter.next(min, max)) {
2827 builder.appendFragment(ParserMessages::listSep);
2828 builder.appendNumber(min);
2830 builder.appendFragment(max == min + 1
2831 ? ParserMessages::listSep
2832 : ParserMessages::rangeSep);
2833 builder.appendNumber(max);