/* * CDE - Common Desktop Environment * * Copyright (c) 1993-2012, The Open Group. All rights reserved. * * These libraries and programs are free software; you can * redistribute them and/or modify them under the terms of the GNU * Lesser General Public License as published by the Free Software * Foundation; either version 2 of the License, or (at your option) * any later version. * * These libraries and programs are distributed in the hope that * they will be useful, but WITHOUT ANY WARRANTY; without even the * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR * PURPOSE. See the GNU Lesser General Public License for more * details. * * You should have received a copy of the GNU Lesser General Public * License along with these libraries and programs; if not, write * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth * Floor, Boston, MA 02110-1301 USA */ /* $XConsortium: parseSd.C /main/2 1996/08/12 15:47:30 mgreess $ */ // Copyright (c) 1994, 1995 James Clark // See the file COPYING for copying permission. #include "splib.h" #include "Parser.h" #include "macros.h" #include "SdFormalError.h" #include "MessageBuilder.h" #include "ParserMessages.h" #include "MessageArg.h" #include "CharsetRegistry.h" #include "ISetIter.h" #include "token.h" #include "TokenMessageArg.h" #include "constant.h" #include "SdText.h" #include "NumericCharRefOrigin.h" #ifdef SP_NAMESPACE namespace SP_NAMESPACE { #endif class CharSwitcher { public: CharSwitcher(); void addSwitch(WideChar from, WideChar to); SyntaxChar subst(WideChar c); size_t nSwitches() const; Boolean switchUsed(size_t i) const; WideChar switchFrom(size_t i) const; WideChar switchTo(size_t i) const; private: Vector switchUsed_; Vector switches_; }; // Information about the SGML declaration being built. struct SdBuilder { SdBuilder(); void addFormalError(const Location &, const MessageType1 &, const StringC &); Ptr sd; Ptr syntax; CharsetDecl syntaxCharsetDecl; CharsetInfo syntaxCharset; CharSwitcher switcher; Boolean externalSyntax; Boolean valid; IList formalErrorList; }; class CharsetMessageArg : public MessageArg { public: CharsetMessageArg(const ISet &set); MessageArg *copy() const; void append(MessageBuilder &) const; private: ISet set_; }; struct SdParam { typedef unsigned char Type; enum { invalid, eE, minimumLiteral, mdc, ellipsis, number, capacityName, name, paramLiteral, generalDelimiterName, referenceReservedName, quantityName, reservedName // Sd::ReservedName is added to this }; Type type; StringC token; Text literalText; String paramLiteralText; union { Number n; Sd::Capacity capacityIndex; Syntax::Quantity quantityIndex; Syntax::ReservedName reservedNameIndex; Syntax::DelimGeneral delimGeneralIndex; }; }; class AllowedSdParams { public: AllowedSdParams(SdParam::Type, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid, SdParam::Type = SdParam::invalid); Boolean param(SdParam::Type) const; SdParam::Type get(int i) const; private: enum { maxAllow = 6 }; SdParam::Type allow_[maxAllow]; }; class AllowedSdParamsMessageArg : public MessageArg { public: AllowedSdParamsMessageArg(const AllowedSdParams &allow, const ConstPtr &sd); MessageArg *copy() const; void append(MessageBuilder &) const; private: AllowedSdParams allow_; ConstPtr sd_; }; struct StandardSyntaxSpec { struct AddedFunction { const char *name; Syntax::FunctionClass functionClass; SyntaxChar syntaxChar; }; const AddedFunction *addedFunction; size_t nAddedFunction; Boolean shortref; }; static StandardSyntaxSpec::AddedFunction coreFunctions[] = { { "TAB", Syntax::cSEPCHAR, 9 }, }; static StandardSyntaxSpec coreSyntax = { coreFunctions, SIZEOF(coreFunctions), 0 }; static StandardSyntaxSpec refSyntax = { coreFunctions, SIZEOF(coreFunctions), 1 }; void Parser::doInit() { if (cancelled()) { allDone(); return; } // When document entity doesn't exist, don't give any errors // other than the cannot open error. if (currentInput()->get(messenger()) == InputSource::eE) { if (currentInput()->accessError()) { allDone(); return; } } else currentInput()->ungetToken(); const CharsetInfo &initCharset = sd().docCharset(); ISet missing; findMissingMinimum(initCharset, missing); if (!missing.isEmpty()) { message(ParserMessages::sdMissingCharacters, CharsetMessageArg(missing)); giveUp(); return; } Boolean found = 0; StringC systemId; if (scanForSgmlDecl(initCharset)) found = 1; else { currentInput()->ungetToken(); if (entityCatalog().sgmlDecl(initCharset, messenger(), systemId)) { InputSource *in = entityManager().open(systemId, initCharset, new InputSourceOrigin, 0, messenger()); if (in) { pushInput(in); if (scanForSgmlDecl(initCharset)) found = 1; else { message(ParserMessages::badDefaultSgmlDecl); popInputStack(); } } } } if (found) { if (startMarkup(eventsWanted().wantPrologMarkup(), currentLocation())) { size_t nS = currentInput()->currentTokenLength() - 6; for (size_t i = 0; i < nS; i++) currentMarkup()->addS(currentInput()->currentTokenStart()[i]); currentMarkup()->addDelim(Syntax::dMDO); currentMarkup()->addSdReservedName(Sd::rSGML, currentInput()->currentTokenStart() + (currentInput()->currentTokenLength() - 4), 4); } Syntax *syntaxp = new Syntax(sd()); CharSwitcher switcher; if (!setStandardSyntax(*syntaxp, refSyntax, sd().docCharset(), switcher)) { giveUp(); return; } syntaxp->implySgmlChar(sd().docCharset()); setSyntax(syntaxp); compileSdModes(); ConstPtr refSd(sdPointer()); ConstPtr refSyntax(syntaxPointer()); if (!parseSgmlDecl()) { giveUp(); return; } // queue an SGML declaration event eventHandler().sgmlDecl(new (eventAllocator()) SgmlDeclEvent(sdPointer(), syntaxPointer(), instanceSyntaxPointer(), refSd, refSyntax, currentInput()->nextIndex(), systemId, markupLocation(), currentMarkup())); if (inputLevel() == 2) { // FIXME perhaps check for junk after SGML declaration popInputStack(); } } else { if (!implySgmlDecl()) { giveUp(); return; } // queue an SGML declaration event eventHandler().sgmlDecl(new (eventAllocator()) SgmlDeclEvent(sdPointer(), syntaxPointer())); } // Now we have sd and syntax set up, prepare to parse the prolog. compilePrologModes(); setPhase(prologPhase); } Boolean Parser::implySgmlDecl() { Syntax *syntaxp = new Syntax(sd()); const StandardSyntaxSpec *spec; if (options().shortref) spec = &refSyntax; else spec = &coreSyntax; CharSwitcher switcher; if (!setStandardSyntax(*syntaxp, *spec, sd().docCharset(), switcher)) return 0; syntaxp->implySgmlChar(sd().docCharset()); for (int i = 0; i < Syntax::nQuantity; i++) syntaxp->setQuantity(i, options().quantity[i]); setSyntax(syntaxp); return 1; } Boolean Parser::setStandardSyntax(Syntax &syn, const StandardSyntaxSpec &spec, const CharsetInfo &docCharset, CharSwitcher &switcher) { static UnivCharsetDesc::Range syntaxCharsetRanges[] = { { 0, 128, 0 }, }; static UnivCharsetDesc syntaxCharsetDesc(syntaxCharsetRanges, SIZEOF(syntaxCharsetRanges)); static CharsetInfo syntaxCharset(syntaxCharsetDesc); Boolean valid = 1; if (!checkSwitches(switcher, syntaxCharset)) valid = 0; size_t i; for (i = 0; i < switcher.nSwitches(); i++) if (switcher.switchTo(i) >= 128) message(ParserMessages::switchNotInCharset, NumberMessageArg(switcher.switchTo(i))); static const Char shunchar[] = { 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 127, 255 }; for (i = 0; i < SIZEOF(shunchar); i++) syn.addShunchar(shunchar[i]); syn.setShuncharControls(); static Syntax::StandardFunction standardFunctions[3] = { Syntax::fRE, Syntax::fRS, Syntax::fSPACE }; static SyntaxChar functionChars[3] = { 13, 10, 32 }; for (i = 0; i < 3; i++) { Char docChar; if (translateSyntax(switcher, syntaxCharset, docCharset, functionChars[i], docChar) && checkNotFunction(syn, docChar)) syn.setStandardFunction(standardFunctions[i], docChar); else valid = 0; } for (i = 0; i < spec.nAddedFunction; i++) { Char docChar; if (translateSyntax(switcher, syntaxCharset, docCharset, spec.addedFunction[i].syntaxChar, docChar) && checkNotFunction(syn, docChar)) syn.addFunctionChar(docCharset.execToDesc(spec.addedFunction[i].name), spec.addedFunction[i].functionClass, docChar); else valid = 0; } static SyntaxChar nameChars[2] = { 45, 46 }; // '-' '.' ISet nameCharSet; for (i = 0; i < 2; i++) { Char docChar; if (translateSyntax(switcher, syntaxCharset, docCharset, nameChars[i], docChar)) nameCharSet.add(docChar); else valid = 0; } if (!checkNmchars(nameCharSet, syn)) valid = 0; else syn.addNameCharacters(nameCharSet); syn.setNamecaseGeneral(1); syn.setNamecaseEntity(0); if (!setRefDelimGeneral(syn, syntaxCharset, docCharset, switcher)) valid = 0; setRefNames(syn, docCharset); syn.enterStandardFunctionNames(); if (spec.shortref && !addRefDelimShortref(syn, syntaxCharset, docCharset, switcher)) valid = 0; return valid; } Boolean Parser::setRefDelimGeneral(Syntax &syntax, const CharsetInfo &syntaxCharset, const CharsetInfo &docCharset, CharSwitcher &switcher) { // Column 3 from Figure 3 static const char delims[][2] = { { 38 }, { 45, 45 }, { 38, 35 }, { 93 }, { 91 }, { 93 }, { 91 }, { 38 }, { 60, 47 }, { 41 }, { 40 }, { 34 }, { 39 }, { 62 }, { 60, 33 }, { 45 }, { 93, 93 }, { 47 }, { 63 }, { 124 }, { 37 }, { 62 }, { 60, 63 }, { 43 }, { 59 }, { 42 }, { 35 }, { 44 }, { 60 }, { 62 }, { 61 }, }; Boolean valid = 1; ISet missing; for (int i = 0; i < Syntax::nDelimGeneral; i++) if (syntax.delimGeneral(i).size() == 0) { StringC delim; size_t j; for (j = 0; j < 2 && delims[i][j] != '\0'; j++) { UnivChar univChar = translateUniv(delims[i][j], switcher, syntaxCharset); Char c; if (univToDescCheck(docCharset, univChar, c)) delim += c; else { missing += univChar; valid = 0; } } if (delim.size() == j) { if (checkGeneralDelim(syntax, delim)) syntax.setDelimGeneral(i, delim); else valid = 0; } } if (!missing.isEmpty()) message(ParserMessages::missingSignificant646, CharsetMessageArg(missing)); return valid; } void Parser::setRefNames(Syntax &syntax, const CharsetInfo &docCharset) { static const char *const referenceNames[] = { "ANY", "ATTLIST", "CDATA", "CONREF", "CURRENT", "DEFAULT", "DOCTYPE", "ELEMENT", "EMPTY", "ENDTAG", "ENTITIES", "ENTITY", "FIXED", "ID", "IDLINK", "IDREF", "IDREFS", "IGNORE", "IMPLIED", "INCLUDE", "INITIAL", "LINK", "LINKTYPE", "MD", "MS", "NAME", "NAMES", "NDATA", "NMTOKEN", "NMTOKENS", "NOTATION", "NUMBER", "NUMBERS", "NUTOKEN", "NUTOKENS", "O", "PCDATA", "PI", "POSTLINK", "PUBLIC", "RCDATA", "RE", "REQUIRED", "RESTORE", "RS", "SDATA", "SHORTREF", "SIMPLE", "SPACE", "STARTTAG", "SUBDOC", "SYSTEM", "TEMP", "USELINK", "USEMAP" }; int i; for (i = 0; i < Syntax::nNames; i++) { StringC docName(docCharset.execToDesc(referenceNames[i])); Syntax::ReservedName tem; if (syntax.lookupReservedName(docName, &tem)) message(ParserMessages::nameReferenceReservedName, StringMessageArg(docName)); if (syntax.reservedName(Syntax::ReservedName(i)).size() == 0) syntax.setName(i, docName); } } Boolean Parser::addRefDelimShortref(Syntax &syntax, const CharsetInfo &syntaxCharset, const CharsetInfo &docCharset, CharSwitcher &switcher) { // Column 2 from Figure 4 static const char delimShortref[][3] = { { 9 }, { 13 }, { 10 }, { 10, 66 }, { 10, 13 }, { 10, 66, 13 }, { 66, 13 }, { 32 }, { 66, 66 }, { 34 }, { 35 }, { 37 }, { 39 }, { 40 }, { 41 }, { 42 }, { 43 }, { 44 }, { 45 }, { 45, 45 }, { 58 }, { 59 }, { 61 }, { 64 }, { 91 }, { 93 }, { 94 }, { 95 }, { 123 }, { 124 }, { 125 }, { 126 }, }; ISet missing; for (size_t i = 0; i < SIZEOF(delimShortref); i++) { StringC delim; size_t j; for (j = 0; j < 3 && delimShortref[i][j] != '\0'; j++) { Char c; UnivChar univChar = translateUniv(delimShortref[i][j], switcher, syntaxCharset); if (univToDescCheck(docCharset, univChar, c)) delim += c; else missing += univChar; } if (delim.size() == j) { if (switcher.nSwitches() > 0 && syntax.isValidShortref(delim)) message(ParserMessages::duplicateDelimShortref, StringMessageArg(delim)); else syntax.addDelimShortref(delim, docCharset); } } if (!missing.isEmpty()) message(ParserMessages::missingSignificant646, CharsetMessageArg(missing)); return 1; } // Determine whether the document starts with an SGML declaration. // There is no current syntax at this point. Boolean Parser::scanForSgmlDecl(const CharsetInfo &initCharset) { Char rs; if (!univToDescCheck(initCharset, UnivCharsetDesc::rs, rs)) return 0; Char re; if (!univToDescCheck(initCharset, UnivCharsetDesc::re, re)) return 0; Char space; if (!univToDescCheck(initCharset, UnivCharsetDesc::space, space)) return 0; Char tab; if (!univToDescCheck(initCharset, UnivCharsetDesc::tab, tab)) return 0; InputSource *in = currentInput(); Xchar c = in->get(messenger()); while (c == rs || c == space || c == re || c == tab) c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('<')) return 0; if (in->tokenChar(messenger()) != initCharset.execToDesc('!')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('S') && c != initCharset.execToDesc('s')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('G') && c != initCharset.execToDesc('g')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('M') && c != initCharset.execToDesc('m')) return 0; c = in->tokenChar(messenger()); if (c != initCharset.execToDesc('L') && c != initCharset.execToDesc('l')) return 0; c = in->tokenChar(messenger()); // Don't recognize this if SGML is followed by a name character. if (c == InputSource::eE) return 1; in->endToken(in->currentTokenLength() - 1); if (c == initCharset.execToDesc('-')) return 0; if (c == initCharset.execToDesc('.')) return 0; UnivChar univ; if (!initCharset.descToUniv(c, univ)) return 1; if (UnivCharsetDesc::a <= univ && univ < UnivCharsetDesc::a + 26) return 0; if (UnivCharsetDesc::A <= univ && univ < UnivCharsetDesc::A + 26) return 0; if (UnivCharsetDesc::zero <= univ && univ < UnivCharsetDesc::zero + 10) return 0; return 1; } void Parser::findMissingMinimum(const CharsetInfo &charset, ISet &missing) { Char to; size_t i; for (i = 0; i < 26; i++) { if (!univToDescCheck(charset, UnivCharsetDesc::A + i, to)) missing += UnivCharsetDesc::A + i; if (!univToDescCheck(charset, UnivCharsetDesc::a + i, to)) missing += UnivCharsetDesc::a + i; } for (i = 0; i < 10; i++) { Char to; if (!univToDescCheck(charset, UnivCharsetDesc::zero + i, to)) missing += UnivCharsetDesc::zero + i; } static const UnivChar special[] = { 39, 40, 41, 43, 44, 45, 46, 47, 58, 61, 63 }; for (i = 0; i < SIZEOF(special); i++) if (!univToDescCheck(charset, special[i], to)) missing += special[i]; } Boolean Parser::parseSgmlDecl() { SdParam parm; SdBuilder sdBuilder; if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; StringC version(sd().execToDoc("ISO 8879:1986")); if (parm.literalText.string() != version) message(ParserMessages::standardVersion, StringMessageArg(parm.literalText.string())); sdBuilder.sd = new Sd; typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &); static SdParser parsers[] = { &Parser::sdParseDocumentCharset, &Parser::sdParseCapacity, &Parser::sdParseScope, &Parser::sdParseSyntax, &Parser::sdParseFeatures, &Parser::sdParseAppinfo, }; for (size_t i = 0; i < SIZEOF(parsers); i++) { if (!(this->*(parsers[i]))(sdBuilder, parm)) return 0; if (!sdBuilder.valid) return 0; } if (!parseSdParam(AllowedSdParams(SdParam::mdc), parm)) return 0; if (sdBuilder.sd->formal()) { while (!sdBuilder.formalErrorList.empty()) { SdFormalError *p = sdBuilder.formalErrorList.get(); ParserState *state = this; // work around lcc 3.0 bug p->send(*state); delete p; } } setSd(sdBuilder.sd.pointer()); if (sdBuilder.sd->scopeInstance()) { Syntax *proSyntax = new Syntax(sd()); CharSwitcher switcher; setStandardSyntax(*proSyntax, refSyntax, sd().docCharset(), switcher); proSyntax->setSgmlChar(*sdBuilder.syntax->charSet(Syntax::sgmlChar)); ISet invalidSgmlChar; proSyntax->checkSgmlChar(sdBuilder.sd->docCharset(), sdBuilder.syntax.pointer(), invalidSgmlChar); sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(), proSyntax, invalidSgmlChar); if (!invalidSgmlChar.isEmpty()) message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar)); setSyntaxes(proSyntax, sdBuilder.syntax.pointer()); } else setSyntax(sdBuilder.syntax.pointer()); if (syntax().multicode()) currentInput()->setMarkupScanTable(syntax().markupScanTable()); return 1; } Boolean Parser::sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rCHARSET), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET), parm)) return 0; CharsetDecl decl; UnivCharsetDesc desc; if (!sdParseCharset(sdBuilder, parm, 1, decl, desc)) return 0; ISet missing; findMissingMinimum(desc, missing); if (!missing.isEmpty()) { message(ParserMessages::missingMinimumChars, CharsetMessageArg(missing)); return 0; } ISet sgmlChar; decl.usedSet(sgmlChar); sdBuilder.sd->setDocCharsetDesc(desc); sdBuilder.sd->setDocCharsetDecl(decl); sdBuilder.syntax = new Syntax(*sdBuilder.sd); sdBuilder.syntax->setSgmlChar(sgmlChar); return 1; } Boolean Parser::sdParseCharset(SdBuilder &sdBuilder, SdParam &parm, Boolean isDocument, CharsetDecl &decl, UnivCharsetDesc &desc) { decl.clear(); ISet multiplyDeclared; // This is for checking whether the syntax reference character set // is ISO 646 when SCOPE is INSTANCE. Boolean maybeISO646 = 1; do { if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; UnivCharsetDesc baseDesc; PublicId id; Boolean found; PublicId::TextClass textClass; const MessageType1 *err; if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err)) sdBuilder.addFormalError(currentLocation(), *err, id.string()); else if (id.getTextClass(textClass) && textClass != PublicId::CHARSET) sdBuilder.addFormalError(currentLocation(), ParserMessages::basesetTextClass, id.string()); Boolean givenError; if (referencePublic(id, PublicId::CHARSET, givenError)) found = sdParseExternalCharset(*sdBuilder.sd, baseDesc); else if (!givenError) { found = CharsetRegistry::findCharset(id, sd().docCharset(), baseDesc); if (!found && options().warnSgmlDecl) message(ParserMessages::unknownBaseset, StringMessageArg(id.string())); } else found = 0; if (!found) maybeISO646 = 0; decl.addSection(id); if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDESCSET), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; do { WideChar min = parm.n; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; Number count = parm.n; Number adjCount; if (options().warnSgmlDecl && count == 0) message(ParserMessages::zeroNumberOfCharacters); decl.rangeDeclared(min, count, multiplyDeclared); if (isDocument && count > 0 && (min > charMax || count - 1 > charMax - min)) { message(ParserMessages::documentCharMax, NumberMessageArg(charMax)); adjCount = min > charMax ? 0 : 1 + (charMax - min); sdBuilder.valid = 0; maybeISO646 = 0; } else adjCount = count; if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::minimumLiteral, SdParam::reservedName + Sd::rUNUSED), parm)) return 0; switch (parm.type) { case SdParam::number: decl.addRange(min, count, parm.n); if (found && adjCount > 0) { ISet baseMissing; desc.addBaseRange(baseDesc, min, min + (adjCount - 1), parm.n, baseMissing); if (!baseMissing.isEmpty() && options().warnSgmlDecl) message(ParserMessages::basesetCharsMissing, CharsetMessageArg(baseMissing)); } break; case SdParam::reservedName + Sd::rUNUSED: decl.addRange(min, count); break; case SdParam::minimumLiteral: { UnivChar c = sdBuilder.sd->nameToUniv(parm.literalText.string()); if (adjCount > 256) { message(ParserMessages::tooManyCharsMinimumLiteral); adjCount = 256; } for (Number i = 0; i < adjCount; i++) desc.addRange(min + i, min + i, c); } maybeISO646 = 0; decl.addRange(min, count, parm.literalText.string()); break; default: CANNOT_HAPPEN(); } SdParam::Type follow = (isDocument ? SdParam::reservedName + Sd::rCAPACITY : SdParam::reservedName + Sd::rFUNCTION); if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::reservedName + Sd::rBASESET, follow), parm)) return 0; } while (parm.type == SdParam::number); } while (parm.type == SdParam::reservedName + Sd::rBASESET); if (!multiplyDeclared.isEmpty()) message(ParserMessages::duplicateCharNumbers, CharsetMessageArg(multiplyDeclared)); ISet declaredSet; decl.declaredSet(declaredSet); ISetIter iter(declaredSet); WideChar min, max, lastMax; if (iter.next(min, max)) { ISet holes; lastMax = max; while (iter.next(min, max)) { if (min - lastMax > 1) holes.addRange(lastMax + 1, min - 1); lastMax = max; } if (!holes.isEmpty()) message(ParserMessages::codeSetHoles, CharsetMessageArg(holes)); } if (!isDocument && sdBuilder.sd->scopeInstance()) { // If scope is INSTANCE, syntax reference character set // must be same as reference. UnivCharsetDescIter iter(desc); WideChar descMin, descMax; UnivChar univMin; if (!iter.next(descMin, descMax, univMin) || descMin != 0 || descMax != 127 || univMin != 0 || !maybeISO646) message(ParserMessages::scopeInstanceSyntaxCharset); } return 1; } Boolean Parser::sdParseExternalCharset(Sd &sd, UnivCharsetDesc &desc) { SdParam parm; for (;;) { if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::eE), parm)) break; if (parm.type == SdParam::eE) return 1; WideChar min = parm.n; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) break; Number count = parm.n; if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::minimumLiteral, SdParam::reservedName + Sd::rUNUSED), parm)) break; if (parm.type == SdParam::number) { if (count > 0) desc.addRange(min, min + (count - 1), parm.n); } else if (parm.type == SdParam::minimumLiteral) { UnivChar c = sd.nameToUniv(parm.literalText.string()); if (count > 256) { message(ParserMessages::tooManyCharsMinimumLiteral); count = 256; } for (Number i = 0; i < count; i++) desc.addRange(min + i, min + i, c); } } popInputStack(); return 0; } Boolean Parser::sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rPUBLIC, SdParam::reservedName + Sd::rSGMLREF), parm)) return 0; Boolean pushed = 0; if (parm.type == SdParam::reservedName + Sd::rPUBLIC) { if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; PublicId id; PublicId::TextClass textClass; const MessageType1 *err; if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err)) sdBuilder.addFormalError(currentLocation(), *err, id.string()); else if (id.getTextClass(textClass) && textClass != PublicId::CAPACITY) sdBuilder.addFormalError(currentLocation(), ParserMessages::capacityTextClass, id.string()); const StringC &str = id.string(); if (str != sd().execToDoc("ISO 8879-1986//CAPACITY Reference//EN") && str != sd().execToDoc("ISO 8879:1986//CAPACITY Reference//EN")) { Boolean givenError; if (referencePublic(id, PublicId::CAPACITY, givenError)) pushed = 1; else if (!givenError) message(ParserMessages::unknownCapacitySet, StringMessageArg(str)); } if (!pushed) return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE), parm); } PackedBoolean capacitySpecified[Sd::nCapacity]; int i; for (i = 0; i < Sd::nCapacity; i++) capacitySpecified[i] = 0; if (!parseSdParam(AllowedSdParams(SdParam::capacityName), parm)) return 0; do { Sd::Capacity capacityIndex = parm.capacityIndex; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; if (!capacitySpecified[capacityIndex]) { sdBuilder.sd->setCapacity(capacityIndex, parm.n); capacitySpecified[capacityIndex] = 1; } else if (options().warnSgmlDecl) message(ParserMessages::duplicateCapacity, StringMessageArg(sd().capacityName(i))); int final = pushed ? int(SdParam::eE) : SdParam::reservedName + Sd::rSCOPE; if (!parseSdParam(AllowedSdParams(SdParam::capacityName, final), parm)) return 0; } while (parm.type == SdParam::capacityName); Number totalcap = sdBuilder.sd->capacity(0); for (i = 1; i < Sd::nCapacity; i++) if (sdBuilder.sd->capacity(i) > totalcap) message(ParserMessages::capacityExceedsTotalcap, StringMessageArg(sd().capacityName(i))); if (pushed) return parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSCOPE), parm); return 1; } Boolean Parser::referencePublic(const PublicId &id, PublicId::TextClass entityType, Boolean &givenError) { givenError = 0; StringC sysid; if (entityCatalog().lookupPublic(id.string(), sd().docCharset(), messenger(), sysid)) { Location loc = currentLocation(); eventHandler().sgmlDeclEntity(new (eventAllocator()) SgmlDeclEntityEvent(id, entityType, sysid, loc)); Ptr origin(new EntityOrigin(loc)); if (currentMarkup()) currentMarkup()->addEntityStart(origin); InputSource *in = entityManager().open(sysid, sd().docCharset(), origin.pointer(), 0, messenger()); if (!in) { givenError = 1; return 0; } pushInput(in); return 1; } return 0; } Boolean Parser::sdParseScope(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rINSTANCE, SdParam::reservedName + Sd::rDOCUMENT), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rINSTANCE) sdBuilder.sd->setScopeInstance(); return 1; } Boolean Parser::sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSYNTAX), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR, SdParam::reservedName + Sd::rPUBLIC), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rPUBLIC) { if (!parseSdParam(AllowedSdParams(SdParam::minimumLiteral), parm)) return 0; PublicId id; const MessageType1 *err; PublicId::TextClass textClass; if (!id.init(parm.literalText, sd().docCharset(), syntax().space(), err)) sdBuilder.addFormalError(currentLocation(), *err, id.string()); else if (id.getTextClass(textClass) && textClass != PublicId::SYNTAX) sdBuilder.addFormalError(currentLocation(), ParserMessages::syntaxTextClass, id.string()); if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rFEATURES, SdParam::reservedName + Sd::rSWITCHES), parm)) return 0; Vector charSwitches; if (parm.type == SdParam::reservedName + Sd::rSWITCHES) { if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; for (;;) { SyntaxChar c = parm.n; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; sdBuilder.switcher.addSwitch(c, parm.n); if (!parseSdParam(AllowedSdParams(SdParam::number, SdParam::reservedName + Sd::rFEATURES), parm)) return 0; if (parm.type != SdParam::number) break; } } const StandardSyntaxSpec *spec = lookupSyntax(id); if (spec) { if (!setStandardSyntax(*sdBuilder.syntax, *spec, sdBuilder.sd->docCharset(), sdBuilder.switcher)) sdBuilder.valid = 0; } else { Boolean givenError; if (referencePublic(id, PublicId::SYNTAX, givenError)) { sdBuilder.externalSyntax = 1; SdParam parm2; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSHUNCHAR), parm2)) return 0; if (!sdParseExplicitSyntax(sdBuilder, parm2)) return 0; } else { if (!givenError) message(ParserMessages::unknownPublicSyntax, StringMessageArg(id.string())); sdBuilder.valid = 0; } } } else { if (!sdParseExplicitSyntax(sdBuilder, parm)) return 0; } if (!sdBuilder.sd->scopeInstance()) { // we know the significant chars now ISet invalidSgmlChar; sdBuilder.syntax->checkSgmlChar(sdBuilder.sd->docCharset(), 0, invalidSgmlChar); if (!invalidSgmlChar.isEmpty()) message(ParserMessages::invalidSgmlChar, CharsetMessageArg(invalidSgmlChar)); } checkSyntaxNamelen(*sdBuilder.syntax); checkSwitchesMarkup(sdBuilder.switcher); return 1; } Boolean Parser::sdParseExplicitSyntax(SdBuilder &sdBuilder, SdParam &parm) { typedef Boolean (Parser::*SdParser)(SdBuilder &, SdParam &); static SdParser parsers[] = { &Parser::sdParseShunchar, &Parser::sdParseSyntaxCharset, &Parser::sdParseFunction, &Parser::sdParseNaming, &Parser::sdParseDelim, &Parser::sdParseNames, &Parser::sdParseQuantity }; for (size_t i = 0; i < SIZEOF(parsers); i++) if (!(this->*(parsers[i]))(sdBuilder, parm)) return 0; return 1; } const StandardSyntaxSpec *Parser::lookupSyntax(const PublicId &id) { PublicId::OwnerType ownerType; if (!id.getOwnerType(ownerType) || ownerType != PublicId::ISO) return 0; StringC str; if (!id.getOwner(str)) return 0; if (str != sd().execToDoc("ISO 8879:1986") && str != sd().execToDoc("ISO 8879-1986")) return 0; PublicId::TextClass textClass; if (!id.getTextClass(textClass) || textClass != PublicId::SYNTAX) return 0; if (!id.getDescription(str)) return 0; if (str == sd().execToDoc("Reference")) return &refSyntax; if (str == sd().execToDoc("Core")) return &coreSyntax; return 0; } Boolean Parser::sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm) { UnivCharsetDesc desc; if (!sdParseCharset(sdBuilder, parm, 0, sdBuilder.syntaxCharsetDecl, desc)) return 0; sdBuilder.syntaxCharset.set(desc); checkSwitches(sdBuilder.switcher, sdBuilder.syntaxCharset); for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) if (!sdBuilder.syntaxCharsetDecl.charDeclared(sdBuilder.switcher.switchTo(i))) message(ParserMessages::switchNotInCharset, NumberMessageArg(sdBuilder.switcher.switchTo(i))); ISet missing; findMissingMinimum(sdBuilder.syntaxCharset, missing); if (!missing.isEmpty()) message(ParserMessages::missingMinimumChars, CharsetMessageArg(missing)); return 1; } Boolean Parser::sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE, SdParam::reservedName + Sd::rCONTROLS, SdParam::number), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rNONE) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET), parm)) return 0; return 1; } if (parm.type == SdParam::reservedName + Sd::rCONTROLS) sdBuilder.syntax->setShuncharControls(); else { if (parm.n <= charMax) sdBuilder.syntax->addShunchar(Char(parm.n)); } for (;;) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rBASESET, SdParam::number), parm)) return 0; if (parm.type != SdParam::number) break; if (parm.n <= charMax) sdBuilder.syntax->addShunchar(Char(parm.n)); } return 1; } Boolean Parser::sdParseFunction(SdBuilder &sdBuilder, SdParam &parm) { static Sd::ReservedName standardNames[3] = { Sd::rRE, Sd::rRS, Sd::rSPACE }; for (int i = 0; i < 3; i++) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + standardNames[i]), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; Char c; if (translateSyntax(sdBuilder, parm.n, c)) { if (checkNotFunction(*sdBuilder.syntax, c)) sdBuilder.syntax->setStandardFunction(Syntax::StandardFunction(i), c); else sdBuilder.valid = 0; } } Boolean haveMsichar = 0; Boolean haveMsochar = 0; for (;;) { if (!parseSdParam(sdBuilder.externalSyntax ? AllowedSdParams(SdParam::name, SdParam::paramLiteral) : AllowedSdParams(SdParam::name), parm)) return 0; Boolean nameWasLiteral; Boolean invalidName = 0; StringC name; if (parm.type == SdParam::paramLiteral) { nameWasLiteral = 1; if (!translateSyntax(sdBuilder, parm.paramLiteralText, name)) invalidName = 1; } else { parm.token.swap(name); nameWasLiteral = 0; } if (!parseSdParam(nameWasLiteral ? AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR, SdParam::reservedName + Sd::rMSICHAR, SdParam::reservedName + Sd::rMSOCHAR, SdParam::reservedName + Sd::rMSSCHAR, SdParam::reservedName + Sd::rSEPCHAR) : AllowedSdParams(SdParam::reservedName + Sd::rFUNCHAR, SdParam::reservedName + Sd::rMSICHAR, SdParam::reservedName + Sd::rMSOCHAR, SdParam::reservedName + Sd::rMSSCHAR, SdParam::reservedName + Sd::rSEPCHAR, SdParam::reservedName + Sd::rLCNMSTRT), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rLCNMSTRT) { if (name != sd().reservedName(Sd::rNAMING)) message(ParserMessages::namingBeforeLcnmstrt, StringMessageArg(name)); break; } if (!nameWasLiteral) { StringC tem; name.swap(tem); if (!translateName(sdBuilder, tem, name)) invalidName = 1; } Syntax::FunctionClass functionClass; switch (parm.type) { case SdParam::reservedName + Sd::rFUNCHAR: functionClass = Syntax::cFUNCHAR; break; case SdParam::reservedName + Sd::rMSICHAR: haveMsichar = 1; functionClass = Syntax::cMSICHAR; break; case SdParam::reservedName + Sd::rMSOCHAR: haveMsochar = 1; functionClass = Syntax::cMSOCHAR; break; case SdParam::reservedName + Sd::rMSSCHAR: functionClass = Syntax::cMSSCHAR; break; case SdParam::reservedName + Sd::rSEPCHAR: functionClass = Syntax::cSEPCHAR; break; default: CANNOT_HAPPEN(); } if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; Char c; if (translateSyntax(sdBuilder, parm.n, c) && checkNotFunction(*sdBuilder.syntax, c) && !invalidName) { Char tem; if (sdBuilder.syntax->lookupFunctionChar(name, &tem)) message(ParserMessages::duplicateFunctionName, StringMessageArg(name)); else sdBuilder.syntax->addFunctionChar(name, functionClass, c); } } if (haveMsochar && !haveMsichar) message(ParserMessages::msocharRequiresMsichar); return 1; } Boolean Parser::sdParseNaming(SdBuilder &sdBuilder, SdParam &parm) { static Sd::ReservedName keys[4] = { Sd::rUCNMSTRT, Sd::rLCNMCHAR, Sd::rUCNMCHAR, Sd::rNAMECASE }; int isNamechar = 0; ISet nameStartChar; ISet nameChar; do { String lc; Vector rangeIndex; Boolean first = 1; Boolean allowThrough = 0; for (;;) { if (!parseSdParam(sdBuilder.externalSyntax ? AllowedSdParams(SdParam::reservedName + keys[isNamechar * 2], SdParam::paramLiteral, SdParam::number, SdParam::ellipsis) : (first ? AllowedSdParams(SdParam::paramLiteral) : AllowedSdParams(SdParam::reservedName + keys[isNamechar * 2])), parm)) return 0; first = 0; Boolean wasRange = 0; sdParamConvertToLiteral(parm); if (parm.type == SdParam::ellipsis) { if (!allowThrough) message(ParserMessages::sdInvalidEllipsis); if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral, SdParam::number), parm)) return 0; sdParamConvertToLiteral(parm); if (parm.paramLiteralText.size() == 0) message(ParserMessages::sdInvalidEllipsis); else if (allowThrough) { SyntaxChar n = parm.paramLiteralText[0]; if (n < lc[lc.size() - 1]) message(ParserMessages::sdInvalidRange); else if (n > lc[lc.size() - 1] + 1) rangeIndex.push_back(lc.size() - 1); } wasRange = 1; } if (parm.type != SdParam::paramLiteral) break; lc += parm.paramLiteralText; allowThrough = (parm.paramLiteralText.size() - wasRange) > 0; } size_t lcPos = 0; size_t rangeIndexPos = 0; unsigned long rangeLeft = 0; SyntaxChar nextRangeChar; ISet &set = isNamechar ? nameChar : nameStartChar; String chars; Boolean runOut = 0; first = 1; for (;;) { if (!parseSdParam(sdBuilder.externalSyntax ? AllowedSdParams(SdParam::reservedName + keys[isNamechar * 2 + 1], SdParam::paramLiteral, SdParam::number, SdParam::ellipsis) : (first ? AllowedSdParams(SdParam::paramLiteral) : AllowedSdParams(SdParam::reservedName + keys[isNamechar * 2 + 1])), parm)) return 0; sdParamConvertToLiteral(parm); first = 0; Boolean isRange = parm.type == SdParam::ellipsis; size_t nChars = chars.size(); if (nChars) nChars -= isRange; for (size_t i = 0; i < nChars; i++) { if (rangeLeft == 0 && rangeIndexPos < rangeIndex.size() && rangeIndex[rangeIndexPos] == lcPos) { rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos]; nextRangeChar = lc[lcPos]; lcPos += 2; rangeIndexPos += 1; } Char c; if (rangeLeft > 0) { rangeLeft--; c = nextRangeChar++; } else if (lcPos < lc.size()) c = lc[lcPos++]; else { runOut = 1; c = chars[i]; } // map from c to chars[i] Char transLc, transUc; if (translateSyntax(sdBuilder, c, transLc) && translateSyntax(sdBuilder, chars[i], transUc)) { set.add(transLc); if (transLc != transUc) { set.add(transUc); sdBuilder.syntax->addSubst(transLc, transUc); } } } if (isRange) { if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral, SdParam::number), parm)) return 0; sdParamConvertToLiteral(parm); if (chars.size() == 0 || parm.paramLiteralText.size() == 0) message(ParserMessages::sdInvalidEllipsis); else { SyntaxChar start = chars[chars.size() - 1]; SyntaxChar end = parm.paramLiteralText[0]; if (start > end) message(ParserMessages::sdInvalidRange); else { size_t count = end + 1 - start; while (count > 0) { if (rangeLeft == 0 && rangeIndexPos < rangeIndex.size() && rangeIndex[rangeIndexPos] == lcPos) { rangeLeft = 1 + lc[lcPos + 1] - lc[lcPos]; nextRangeChar = lc[lcPos]; lcPos += 2; rangeIndexPos += 1; } Char c; if (rangeLeft > 0) { rangeLeft--; c = nextRangeChar++; } else if (lcPos < lc.size()) c = lc[lcPos++]; else { c = start; runOut = 1; } if (c == start && count > 1 && (runOut || rangeLeft > 0)) { size_t n; if (runOut) n = count; else if (rangeLeft < count) n = rangeLeft + 1; else n = count; translateRange(sdBuilder, start, start + (count - 1), set); count -= n; start += n; } else { Char transLc, transUc; if (translateSyntax(sdBuilder, c, transLc) && translateSyntax(sdBuilder, start, transUc)) { set.add(transLc); if (transLc != transUc) { set.add(transUc); sdBuilder.syntax->addSubst(transLc, transUc); } } count--; start++; } } } } chars.resize(0); if (parm.type != SdParam::paramLiteral) break; chars.append(parm.paramLiteralText.data() + 1, parm.paramLiteralText.size() - 1); } else if (parm.type == SdParam::paramLiteral) parm.paramLiteralText.swap(chars); else break; } if ((runOut && !sdBuilder.externalSyntax) || rangeLeft > 0 || lcPos < lc.size()) message(isNamechar ? ParserMessages::nmcharLength : ParserMessages::nmstrtLength); if (!checkNmchars(set, *sdBuilder.syntax)) sdBuilder.valid = 0; } while (!isNamechar++); ISet bad; intersectCharSets(nameStartChar, nameChar, bad); if (!bad.isEmpty()) { sdBuilder.valid = 0; message(ParserMessages::nmcharNmstrt, CharsetMessageArg(bad)); } sdBuilder.syntax->addNameStartCharacters(nameStartChar); sdBuilder.syntax->addNameCharacters(nameChar); if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO, SdParam::reservedName + Sd::rYES), parm)) return 0; sdBuilder.syntax->setNamecaseGeneral(parm.type == SdParam::reservedName + Sd::rYES); if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rENTITY), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO, SdParam::reservedName + Sd::rYES), parm)) return 0; sdBuilder.syntax->setNamecaseEntity(parm.type == SdParam::reservedName + Sd::rYES); return 1; } Boolean Parser::checkNmchars(const ISet &set, const Syntax &syntax) { Boolean valid = 1; ISet bad; intersectCharSets(set, *syntax.charSet(Syntax::nameStart), bad); if (!bad.isEmpty()) { message(ParserMessages::nmcharLetter, CharsetMessageArg(bad)); valid = 0; bad.clear(); } intersectCharSets(set, *syntax.charSet(Syntax::digit), bad); if (!bad.isEmpty()) { message(ParserMessages::nmcharDigit, CharsetMessageArg(bad)); valid = 0; bad.clear(); } Char funChar; if (syntax.getStandardFunction(Syntax::fRE, funChar) && set.contains(funChar)) { message(ParserMessages::nmcharRe, NumberMessageArg(funChar)); valid = 0; } if (syntax.getStandardFunction(Syntax::fRS, funChar) && set.contains(funChar)) { message(ParserMessages::nmcharRs, NumberMessageArg(funChar)); valid = 0; } if (syntax.getStandardFunction(Syntax::fSPACE, funChar) && set.contains(funChar)) { message(ParserMessages::nmcharSpace, NumberMessageArg(funChar)); valid = 0; } intersectCharSets(set, *syntax.charSet(Syntax::sepchar), bad); if (!bad.isEmpty()) { message(ParserMessages::nmcharSepchar, CharsetMessageArg(bad)); valid = 0; } return valid; } // Result is a ISet, so it can be used with CharsetMessageArg. void Parser::intersectCharSets(const ISet &s1, const ISet &s2, ISet &inter) { ISetIter i1(s1); ISetIter i2(s2); Char min1, max1, min2, max2; if (!i1.next(min1, max1)) return; if (!i2.next(min2, max2)) return; for (;;) { if (max1 < min2) { if (!i1.next(min1, max1)) break; } else if (max2 < min1) { if (!i2.next(min2, max2)) break; } else { // min2 <= max1 // min1 <= max2 Char min = min1 > min2 ? min1 : min2; Char max = max1 < max2 ? max1 : max2; inter.addRange(min, max); if (!i1.next(min1, max1)) break; if (!i2.next(min2, max2)) break; } } } Boolean Parser::sdParseDelim(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rDELIM), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rGENERAL), parm)) return 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF), parm)) return 0; PackedBoolean delimGeneralSpecified[Syntax::nDelimGeneral]; for (int i = 0; i < Syntax::nDelimGeneral; i++) delimGeneralSpecified[i] = 0; for (;;) { if (!parseSdParam(AllowedSdParams(SdParam::generalDelimiterName, SdParam::reservedName + Sd::rSHORTREF), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rSHORTREF) break; Syntax::DelimGeneral delimGeneral = parm.delimGeneralIndex; if (delimGeneralSpecified[delimGeneral]) message(ParserMessages::duplicateDelimGeneral, StringMessageArg(sd().generalDelimiterName(delimGeneral))); if (!parseSdParam(sdBuilder.externalSyntax ? AllowedSdParams(SdParam::paramLiteral, SdParam::number) : AllowedSdParams(SdParam::paramLiteral), parm)) return 0; sdParamConvertToLiteral(parm); StringC str; if (parm.paramLiteralText.size() == 0) message(ParserMessages::sdEmptyDelimiter); else if (translateSyntax(sdBuilder, parm.paramLiteralText, str)) { const SubstTable *table = sdBuilder.syntax->generalSubstTable(); for (size_t i = 0; i < str.size(); i++) table->subst(str[i]); if (checkGeneralDelim(*sdBuilder.syntax, str) && !delimGeneralSpecified[delimGeneral]) sdBuilder.syntax->setDelimGeneral(delimGeneral, str); else sdBuilder.valid = 0; } delimGeneralSpecified[delimGeneral] = 1; } if (!setRefDelimGeneral(*sdBuilder.syntax, sdBuilder.syntaxCharset, sdBuilder.sd->docCharset(), sdBuilder.switcher)) sdBuilder.valid = 0; if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF, SdParam::reservedName + Sd::rNONE), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rSGMLREF) { if (!addRefDelimShortref(*sdBuilder.syntax, sdBuilder.syntaxCharset, sdBuilder.sd->docCharset(), sdBuilder.switcher)) sdBuilder.valid = 0; } String lastLiteral; for (;;) { if (!parseSdParam(sdBuilder.externalSyntax ? AllowedSdParams(SdParam::paramLiteral, SdParam::number, SdParam::ellipsis, SdParam::reservedName + Sd::rNAMES) : AllowedSdParams(SdParam::paramLiteral, SdParam::reservedName + Sd::rNAMES), parm)) return 0; sdParamConvertToLiteral(parm); if (parm.type == SdParam::ellipsis) { if (!parseSdParam(AllowedSdParams(SdParam::paramLiteral, SdParam::number), parm)) return 0; sdParamConvertToLiteral(parm); if (parm.paramLiteralText.size() == 0) message(ParserMessages::sdEmptyDelimiter); else if (lastLiteral.size() != 1 || parm.paramLiteralText.size() != 1) message(ParserMessages::sdInvalidEllipsis); else if (parm.paramLiteralText[0] < lastLiteral[0]) message(ParserMessages::sdInvalidRange); else if (parm.paramLiteralText[0] != lastLiteral[0]) { ISet shortrefChars; translateRange(sdBuilder, lastLiteral[0] + 1, parm.paramLiteralText[0], shortrefChars); ISet duplicates; intersectCharSets(shortrefChars, sdBuilder.syntax->delimShortrefSimple(), duplicates); int nComplexShortrefs = sdBuilder.syntax->nDelimShortrefComplex(); for (int i = 0; i < nComplexShortrefs; i++) { const StringC &delim = sdBuilder.syntax->delimShortrefComplex(i); if (delim.size() == 1 && shortrefChars.contains(delim[0])) duplicates.add(delim[0]); } if (!duplicates.isEmpty()) message(ParserMessages::duplicateDelimShortrefSet, CharsetMessageArg(duplicates)); sdBuilder.syntax->addDelimShortrefs(shortrefChars, sdBuilder.sd->docCharset()); } lastLiteral.resize(0); } else if (parm.type == SdParam::paramLiteral) { parm.paramLiteralText.swap(lastLiteral); StringC str; if (lastLiteral.size() == 0) message(ParserMessages::sdEmptyDelimiter); else if (translateSyntax(sdBuilder, lastLiteral, str)) { const SubstTable *table = sdBuilder.syntax->generalSubstTable(); for (size_t i = 0; i < str.size(); i++) table->subst(str[i]); if (str.size() == 1 || checkShortrefDelim(*sdBuilder.syntax, sdBuilder.sd->docCharset(), str)) { if (sdBuilder.syntax->isValidShortref(str)) message(ParserMessages::duplicateDelimShortref, StringMessageArg(str)); else sdBuilder.syntax->addDelimShortref(str, sdBuilder.sd->docCharset()); } } } else break; } return 1; } Boolean Parser::sdParseNames(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF), parm)) return 0; for (;;) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rQUANTITY, SdParam::referenceReservedName), parm)) return 0; if (parm.type == SdParam::reservedName + Sd::rQUANTITY) break; Syntax::ReservedName reservedName = parm.reservedNameIndex; if (!parseSdParam(sdBuilder.externalSyntax ? AllowedSdParams(SdParam::name, SdParam::paramLiteral) : AllowedSdParams(SdParam::name), parm)) return 0; StringC transName; if (parm.type == SdParam::name ? translateName(sdBuilder, parm.token, transName) : translateSyntax(sdBuilder, parm.paramLiteralText, transName)) { Syntax::ReservedName tem; if (sdBuilder.syntax->lookupReservedName(transName, &tem)) message(ParserMessages::ambiguousReservedName, StringMessageArg(transName)); else { if (transName.size() == 0 || !sdBuilder.syntax->isNameStartCharacter(transName[0])) { message(ParserMessages::reservedNameSyntax, StringMessageArg(transName)); transName.resize(0); } size_t i; // Check that its a valid name in the declared syntax // (- and . might not be name characters). for (i = 1; i < transName.size(); i++) if (!sdBuilder.syntax->isNameCharacter(transName[i])) { message(ParserMessages::reservedNameSyntax, StringMessageArg(transName)); transName.resize(0); break; } for (i = 0; i < transName.size(); i++) sdBuilder.syntax->generalSubstTable()->subst(transName[i]); if (sdBuilder.syntax->reservedName(reservedName).size() > 0) message(ParserMessages::duplicateReservedName, StringMessageArg(syntax().reservedName(reservedName))); else if (transName.size() > 0) sdBuilder.syntax->setName(reservedName, transName); else sdBuilder.valid = 0; } } } setRefNames(*sdBuilder.syntax, sdBuilder.sd->docCharset()); static Syntax::ReservedName functionNameIndex[3] = { Syntax::rRE, Syntax::rRS, Syntax::rSPACE }; for (int i = 0; i < 3; i++) { const StringC &functionName = sdBuilder.syntax->reservedName(functionNameIndex[i]); Char tem; if (sdBuilder.syntax->lookupFunctionChar(functionName, &tem)) message(ParserMessages::duplicateFunctionName, StringMessageArg(functionName)); } sdBuilder.syntax->enterStandardFunctionNames(); return 1; } Boolean Parser::sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rSGMLREF), parm)) return 0; for (;;) { int final = (sdBuilder.externalSyntax ? int(SdParam::eE) : SdParam::reservedName + Sd::rFEATURES); if (!parseSdParam(AllowedSdParams(SdParam::quantityName, final), parm)) return 0; if (parm.type != SdParam::quantityName) break; Syntax::Quantity quantity = parm.quantityIndex; if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; sdBuilder.syntax->setQuantity(quantity, parm.n); } if (sdBuilder.sd->scopeInstance()) { for (int i = 0; i < Syntax::nQuantity; i++) if (sdBuilder.syntax->quantity(Syntax::Quantity(i)) < syntax().quantity(Syntax::Quantity(i))) message(ParserMessages::scopeInstanceQuantity, StringMessageArg(sd().quantityName(Syntax::Quantity(i)))); } return 1; } Boolean Parser::sdParseFeatures(SdBuilder &sdBuilder, SdParam &parm) { struct FeatureInfo { Sd::ReservedName name; enum { __none, __boolean, __number } arg; }; static FeatureInfo features[] = { { Sd::rMINIMIZE, FeatureInfo::__none }, { Sd::rDATATAG, FeatureInfo::__boolean }, { Sd::rOMITTAG, FeatureInfo::__boolean }, { Sd::rRANK, FeatureInfo::__boolean }, { Sd::rSHORTTAG, FeatureInfo::__boolean }, { Sd::rLINK, FeatureInfo::__none }, { Sd::rSIMPLE, FeatureInfo::__number }, { Sd::rIMPLICIT, FeatureInfo::__boolean }, { Sd::rEXPLICIT, FeatureInfo::__number }, { Sd::rOTHER, FeatureInfo::__none }, { Sd::rCONCUR, FeatureInfo::__number }, { Sd::rSUBDOC, FeatureInfo::__number }, { Sd::rFORMAL, FeatureInfo::__boolean } }; int booleanFeature = 0; int numberFeature = 0; for (size_t i = 0; i < SIZEOF(features); i++) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + features[i].name), parm)) return 0; if (features[i].arg != FeatureInfo::__none) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNO, SdParam::reservedName + Sd::rYES), parm)) return 0; #if 0 if (features[i].name == Sd::rDATATAG && parm.type == (SdParam::reservedName + Sd::rYES)) message(ParserMessages::datatagNotImplemented); #endif if (features[i].arg == FeatureInfo::__number) { if (parm.type == SdParam::reservedName + Sd::rYES) { if (!parseSdParam(AllowedSdParams(SdParam::number), parm)) return 0; sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++), parm.n); } else sdBuilder.sd->setNumberFeature(Sd::NumberFeature(numberFeature++), 0); } else sdBuilder.sd->setBooleanFeature(Sd::BooleanFeature(booleanFeature++), parm.type == (SdParam::reservedName + Sd::rYES)); } } return 1; } Boolean Parser::sdParseAppinfo(SdBuilder &, SdParam &parm) { if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rAPPINFO), parm)) return 0; Location location(currentLocation()); if (!parseSdParam(AllowedSdParams(SdParam::reservedName + Sd::rNONE, SdParam::minimumLiteral), parm)) return 0; AppinfoEvent *event; if (parm.type == SdParam::minimumLiteral) event = new (eventAllocator()) AppinfoEvent(parm.literalText, location); else event = new (eventAllocator()) AppinfoEvent(location); eventHandler().appinfo(event); return 1; } Boolean Parser::translateSyntax(CharSwitcher &switcher, const CharsetInfo &syntaxCharset, const CharsetInfo &docCharset, WideChar syntaxChar, Char &docChar) { syntaxChar = switcher.subst(syntaxChar); UnivChar univChar; if (syntaxCharset.descToUniv(syntaxChar, univChar) && univToDescCheck(docCharset, univChar, docChar)) return 1; message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar)); return 0; } void Parser::translateRange(SdBuilder &sdBuilder, SyntaxChar start, SyntaxChar end, ISet &chars) { #if 0 do { Char docChar; if (!translateSyntax(sdBuilder, start, docChar)) break; chars.add(docChar); } while (start++ != end); #endif for (;;) { SyntaxChar doneUpTo = end; Boolean gotSwitch = 0; WideChar firstSwitch; for (size_t i = 0; i < sdBuilder.switcher.nSwitches(); i++) { WideChar c = sdBuilder.switcher.switchFrom(i); if (start <= c && c <= end) { if (!gotSwitch) { gotSwitch = 1; firstSwitch = c; } else if (c < firstSwitch) firstSwitch = c; } } if (gotSwitch && firstSwitch == start) { doneUpTo = start; Char docChar; if (translateSyntax(sdBuilder, start, docChar)) chars.add(docChar); } else { if (gotSwitch) doneUpTo = firstSwitch - 1; Char docChar; Number count; if (translateSyntaxNoSwitch(sdBuilder, start, docChar, count)) { if (count - 1 < doneUpTo - start) doneUpTo = start + (count - 1); chars.addRange(docChar, docChar + (doneUpTo - start)); } } if (doneUpTo == end) break; start = doneUpTo + 1; } } Boolean Parser::translateSyntax(SdBuilder &sdBuilder, WideChar syntaxChar, Char &docChar) { Number count; return translateSyntaxNoSwitch(sdBuilder, sdBuilder.switcher.subst(syntaxChar), docChar, count); } Boolean Parser::translateSyntaxNoSwitch(SdBuilder &sdBuilder, WideChar syntaxChar, Char &docChar, Number &count) { Number n; StringC str; CharsetDeclRange::Type type; const PublicId *id; if (sdBuilder.syntaxCharsetDecl.getCharInfo(syntaxChar, id, type, n, str, count)) { ISet docChars; switch (type) { case CharsetDeclRange::unused: break; case CharsetDeclRange::string: sdBuilder.sd->docCharsetDecl().stringToChar(str, docChars); break; case CharsetDeclRange::number: { Number count2; sdBuilder.sd->docCharsetDecl().numberToChar(id, n, docChars, count2); if (!docChars.isEmpty() && count2 < count) count = count2; } break; default: CANNOT_HAPPEN(); } if (!docChars.isEmpty()) { if (!docChars.isSingleton() && options().warnSgmlDecl) message(ParserMessages::ambiguousDocCharacter, CharsetMessageArg(docChars)); ISetIter iter(docChars); WideChar min, max; if (iter.next(min, max) && min <= charMax) { docChar = Char(min); return 1; } } } UnivChar univChar; WideChar alsoMax, count2; if (sdBuilder.syntaxCharset.descToUniv(syntaxChar, univChar, alsoMax) && univToDescCheck(sdBuilder.sd->docCharset(), univChar, docChar, count2)) { count = (alsoMax - syntaxChar) + 1; if (count2 < count) count = count2; return 1; } sdBuilder.valid = 0; message(ParserMessages::translateSyntaxChar, NumberMessageArg(syntaxChar)); return 0; } Boolean Parser::translateSyntax(SdBuilder &sdBuilder, const String &syntaxString, StringC &docString) { docString.resize(0); int ret = 1; for (size_t i = 0; i < syntaxString.size(); i++) { Char c; if (translateSyntax(sdBuilder, syntaxString[i], c)) docString += c; else ret = 0; } return ret; } Boolean Parser::translateName(SdBuilder &sdBuilder, const StringC &name, StringC &str) { str.resize(name.size()); for (size_t i = 0; i < name.size(); i++) { UnivChar univChar; Boolean ret = sd().docCharset().descToUniv(name[i], univChar); // Might switch hyphen or period. univChar = translateUniv(univChar, sdBuilder.switcher, sdBuilder.syntaxCharset); ASSERT(ret != 0); if (!univToDescCheck(sdBuilder.sd->docCharset(), univChar, str[i])) { message(ParserMessages::translateDocChar, NumberMessageArg(univChar)); sdBuilder.valid = 0; return 0; } } return 1; } UnivChar Parser::translateUniv(UnivChar univChar, CharSwitcher &switcher, const CharsetInfo &syntaxCharset) { WideChar syntaxChar; ISet syntaxChars; if (syntaxCharset.univToDesc(univChar, syntaxChar, syntaxChars) != 1) { message(ParserMessages::missingSyntaxChar, NumberMessageArg(univChar)); return univChar; } SyntaxChar tem = switcher.subst(syntaxChar); if (tem != syntaxChar && !syntaxCharset.descToUniv(tem, univChar)) message(ParserMessages::translateSyntaxChar, NumberMessageArg(tem)); return univChar; } Boolean Parser::checkNotFunction(const Syntax &syn, Char c) { if (syn.charSet(Syntax::functionChar)->contains(c)) { message(ParserMessages::oneFunction, NumberMessageArg(c)); return 0; } else return 1; } // Check that it has at most one B sequence and that it // is not adjacent to a blank sequence. Boolean Parser::checkShortrefDelim(const Syntax &syn, const CharsetInfo &charset, const StringC &delim) { Boolean hadB = 0; Char letterB = charset.execToDesc('B'); const ISet *bSet = syn.charSet(Syntax::blank); for (size_t i = 0; i < delim.size(); i++) if (delim[i] == letterB) { if (hadB) { message(ParserMessages::multipleBSequence, StringMessageArg(delim)); return 0; } hadB = 1; if (i > 0 && bSet->contains(delim[i - 1])) { message(ParserMessages::blankAdjacentBSequence, StringMessageArg(delim)); return 0; } while (i + 1 < delim.size() && delim[i + 1] == letterB) i++; if (i < delim.size() - 1 && bSet->contains(delim[i + 1])) { message(ParserMessages::blankAdjacentBSequence, StringMessageArg(delim)); return 0; } } return 1; } Boolean Parser::checkGeneralDelim(const Syntax &syn, const StringC &delim) { const ISet *functionSet = syn.charSet(Syntax::functionChar); if (delim.size() > 0) { Boolean allFunction = 1; for (size_t i = 0; i < delim.size(); i++) if (!functionSet->contains(delim[i])) allFunction = 0; if (allFunction) { message(ParserMessages::generalDelimAllFunction, StringMessageArg(delim)); return 0; } } return 1; } Boolean Parser::checkSwitches(CharSwitcher &switcher, const CharsetInfo &syntaxCharset) { Boolean valid = 1; for (size_t i = 0; i < switcher.nSwitches(); i++) { WideChar c[2]; c[0] = switcher.switchFrom(i); c[1] = switcher.switchTo(i); for (int j = 0; j < 2; j++) { UnivChar univChar; if (syntaxCharset.descToUniv(c[j], univChar)) { // Check that it is not Digit Lcletter or Ucletter if ((UnivCharsetDesc::a <= univChar && univChar < UnivCharsetDesc::a + 26) || (UnivCharsetDesc::A <= univChar && univChar < UnivCharsetDesc::A + 26) || (UnivCharsetDesc::zero <= univChar && univChar < UnivCharsetDesc::zero + 10)) { message(ParserMessages::switchLetterDigit, NumberMessageArg(univChar)); valid = 0; } } } } return valid; } Boolean Parser::checkSwitchesMarkup(CharSwitcher &switcher) { Boolean valid = 1; size_t nSwitches = switcher.nSwitches(); for (size_t i = 0; i < nSwitches; i++) if (!switcher.switchUsed(i)) { // If the switch wasn't used, // then the character wasn't a markup character. message(ParserMessages::switchNotMarkup, NumberMessageArg(switcher.switchFrom(i))); valid = 0; } return valid; } void Parser::checkSyntaxNamelen(const Syntax &syn) { size_t namelen = syn.namelen(); int i; for (i = 0; i < Syntax::nDelimGeneral; i++) if (syn.delimGeneral(i).size() > namelen) message(ParserMessages::delimiterLength, StringMessageArg(syn.delimGeneral(i)), NumberMessageArg(namelen)); for (i = 0; i < syn.nDelimShortrefComplex(); i++) if (syn.delimShortrefComplex(i).size() > namelen) message(ParserMessages::delimiterLength, StringMessageArg(syn.delimShortrefComplex(i)), NumberMessageArg(namelen)); for (i = 0; i < Syntax::nNames; i++) if (syn.reservedName(Syntax::ReservedName(i)).size() > namelen && options().warnSgmlDecl) message(ParserMessages::reservedNameLength, StringMessageArg(syn.reservedName(Syntax::ReservedName(i))), NumberMessageArg(namelen)); } Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from, Char &to) { WideChar count; return univToDescCheck(charset, from, to, count); } Boolean Parser::univToDescCheck(const CharsetInfo &charset, UnivChar from, Char &to, WideChar &count) { WideChar c; ISet descSet; unsigned ret = charset.univToDesc(from, c, descSet, count); if (ret > 1) { if (options().warnSgmlDecl) message(ParserMessages::ambiguousDocCharacter, CharsetMessageArg(descSet)); ret = 1; } if (ret && c <= charMax) { to = Char(c); return 1; } return 0; } Boolean Parser::parseSdParam(const AllowedSdParams &allow, SdParam &parm) { for (;;) { Token token = getToken(mdMode); switch (token) { case tokenUnrecognized: if (reportNonSgmlCharacter()) break; { message(ParserMessages::markupDeclarationCharacter, StringMessageArg(currentToken()), AllowedSdParamsMessageArg(allow, sdPointer())); } return 0; case tokenEe: if (allow.param(SdParam::eE)) { parm.type = SdParam::eE; if (currentMarkup()) currentMarkup()->addEntityEnd(); popInputStack(); return 1; } message(ParserMessages::sdEntityEnd, AllowedSdParamsMessageArg(allow, sdPointer())); return 0; case tokenS: if (currentMarkup()) currentMarkup()->addS(currentChar()); break; case tokenCom: if (!parseComment(sdcomMode)) return 0; break; case tokenDso: case tokenGrpo: case tokenMinusGrpo: case tokenPlusGrpo: case tokenRni: case tokenPeroNameStart: case tokenPeroGrpo: sdParamInvalidToken(token, allow); return 0; case tokenLcUcNmchar: if (allow.param(SdParam::ellipsis)) { extendNameToken(syntax().namelen(), ParserMessages::nameLength); getCurrentToken(syntax().generalSubstTable(), parm.token); if (parm.token == sd().execToDoc("...")) { parm.type = SdParam::ellipsis; return 1; } message(ParserMessages::sdInvalidNameToken, StringMessageArg(parm.token), AllowedSdParamsMessageArg(allow, sdPointer())); } else { sdParamInvalidToken(token, allow); return 0; } case tokenLita: case tokenLit: { Boolean lita = (token == tokenLita); if (allow.param(SdParam::minimumLiteral)) { if (!parseMinimumLiteral(lita, parm.literalText)) return 0; parm.type = SdParam::minimumLiteral; if (currentMarkup()) currentMarkup()->addLiteral(parm.literalText); } else if (allow.param(SdParam::paramLiteral)) { if (!parseSdParamLiteral(lita, parm.paramLiteralText)) return 0; parm.type = SdParam::paramLiteral; } else { sdParamInvalidToken(token, allow); return 0; } return 1; } case tokenMdc: if (allow.param(SdParam::mdc)) { parm.type = SdParam::mdc; if (currentMarkup()) currentMarkup()->addDelim(Syntax::dMDC); return 1; } sdParamInvalidToken(tokenMdc, allow); return 0; case tokenNameStart: { extendNameToken(syntax().namelen(), ParserMessages::nameLength); getCurrentToken(syntax().generalSubstTable(), parm.token); if (allow.param(SdParam::capacityName)) { if (sd().lookupCapacityName(parm.token, parm.capacityIndex)) { parm.type = SdParam::capacityName; if (currentMarkup()) currentMarkup()->addName(currentInput()); return 1; } } if (allow.param(SdParam::referenceReservedName)) { if (syntax().lookupReservedName(parm.token, &parm.reservedNameIndex)) { parm.type = SdParam::referenceReservedName; if (currentMarkup()) currentMarkup()->addName(currentInput()); return 1; } } if (allow.param(SdParam::generalDelimiterName)) { if (sd().lookupGeneralDelimiterName(parm.token, parm.delimGeneralIndex)) { parm.type = SdParam::generalDelimiterName; if (currentMarkup()) currentMarkup()->addName(currentInput()); return 1; } } if (allow.param(SdParam::quantityName)) { if (sd().lookupQuantityName(parm.token, parm.quantityIndex)) { parm.type = SdParam::quantityName; if (currentMarkup()) currentMarkup()->addName(currentInput()); return 1; } } for (int i = 0;; i++) { SdParam::Type t = allow.get(i); if (t == SdParam::invalid) break; if (t >= SdParam::reservedName) { Sd::ReservedName sdReservedName = Sd::ReservedName(t - SdParam::reservedName); if (parm.token == sd().reservedName(sdReservedName)) { parm.type = t; if (currentMarkup()) currentMarkup()->addSdReservedName(sdReservedName, currentInput()); return 1; } } } if (allow.param(SdParam::name)) { parm.type = SdParam::name; if (currentMarkup()) currentMarkup()->addName(currentInput()); return 1; } { message(ParserMessages::sdInvalidNameToken, StringMessageArg(parm.token), AllowedSdParamsMessageArg(allow, sdPointer())); } return 0; } case tokenDigit: if (allow.param(SdParam::number)) { extendNumber(syntax().namelen(), ParserMessages::numberLength); parm.type = SdParam::number; unsigned long n; if (!stringToNumber(currentInput()->currentTokenStart(), currentInput()->currentTokenLength(), n) || n > Number(-1)) { message(ParserMessages::numberTooBig, StringMessageArg(currentToken())); parm.n = Number(-1); } else { if (currentMarkup()) currentMarkup()->addNumber(currentInput()); parm.n = Number(n); } Token token = getToken(mdMode); if (token == tokenNameStart) message(ParserMessages::psRequired); currentInput()->ungetToken(); return 1; } sdParamInvalidToken(tokenDigit, allow); return 0; default: CANNOT_HAPPEN(); } } } // This is a separate function, because we might want SyntaxChar // to be bigger than Char. Boolean Parser::parseSdParamLiteral(Boolean lita, String &str) { Location loc(currentLocation()); loc += 1; SdText text(loc, lita); // first character of content str.resize(0); const unsigned refLitlen = Syntax::referenceQuantity(Syntax::qLITLEN); Mode mode = lita ? sdplitaMode : sdplitMode; int done = 0; for (;;) { Token token = getToken(mode); switch (token) { case tokenEe: message(ParserMessages::literalLevel); return 0; case tokenUnrecognized: if (reportNonSgmlCharacter()) break; if (options().errorSignificant) message(ParserMessages::sdLiteralSignificant, StringMessageArg(currentToken())); text.addChar(currentChar(), currentLocation()); break; case tokenCroDigit: { InputSource *in = currentInput(); Location startLocation = currentLocation(); in->discardInitial(); extendNumber(syntax().namelen(), ParserMessages::numberLength); unsigned long n; Boolean valid; if (!stringToNumber(in->currentTokenStart(), in->currentTokenLength(), n) || n > syntaxCharMax) { message(ParserMessages::syntaxCharacterNumber, StringMessageArg(currentToken())); valid = 0; } else valid = 1; Owner markupPtr; if (eventsWanted().wantPrologMarkup()) { markupPtr = new Markup; markupPtr->addDelim(Syntax::dCRO); markupPtr->addNumber(in); switch (getToken(refMode)) { case tokenRefc: markupPtr->addDelim(Syntax::dREFC); break; case tokenRe: markupPtr->addRefEndRe(); break; default: break; } } else (void)getToken(refMode); if (valid) text.addChar(SyntaxChar(n), Location(new NumericCharRefOrigin(startLocation, currentLocation().index() + currentInput()->currentTokenLength() - startLocation.index(), markupPtr), 0)); } break; case tokenCroNameStart: if (!parseNamedCharRef()) return 0; break; case tokenLit: case tokenLita: done = 1; break; case tokenPeroNameStart: case tokenPeroGrpo: message(ParserMessages::sdParameterEntity); { Location loc(currentLocation()); const Char *p = currentInput()->currentTokenStart(); for (size_t count = currentInput()->currentTokenLength(); count > 0; count--) { text.addChar(*p++, loc); loc += 1; } } break; case tokenChar: if (text.string().size() > refLitlen && currentChar() == syntax().standardFunction(Syntax::fRE)) { message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen)); // guess that the closing delimiter has been omitted message(ParserMessages::literalClosingDelimiter); return 0; } text.addChar(currentChar(), currentLocation()); break; } if (done) break; } if (text.string().size() > refLitlen) message(ParserMessages::parameterLiteralLength, NumberMessageArg(refLitlen)); str = text.string(); if (currentMarkup()) currentMarkup()->addSdLiteral(text); return 1; } Boolean Parser::stringToNumber(const Char *s, size_t length, unsigned long &result) { unsigned long n = 0; for (; length > 0; length--, s++) { int val = sd().digitWeight(*s); if (n <= ULONG_MAX/10 && (n *= 10) <= ULONG_MAX - val) n += val; else return 0; } result = n; return 1; } void Parser::sdParamInvalidToken(Token token, const AllowedSdParams &allow) { message(ParserMessages::sdParamInvalidToken, TokenMessageArg(token, mdMode, syntaxPointer(), sdPointer()), AllowedSdParamsMessageArg(allow, sdPointer())); } void Parser::sdParamConvertToLiteral(SdParam &parm) { if (parm.type == SdParam::number) { parm.type = SdParam::paramLiteral; parm.paramLiteralText.resize(1); parm.paramLiteralText[0] = parm.n; } } AllowedSdParams::AllowedSdParams(SdParam::Type arg1, SdParam::Type arg2, SdParam::Type arg3, SdParam::Type arg4, SdParam::Type arg5, SdParam::Type arg6) { allow_[0] = arg1; allow_[1] = arg2; allow_[2] = arg3; allow_[3] = arg4; allow_[4] = arg5; allow_[5] = arg6; } Boolean AllowedSdParams::param(SdParam::Type t) const { for (int i = 0; i < maxAllow && allow_[i] != SdParam::invalid; i++) if (t == allow_[i]) return 1; return 0; } SdParam::Type AllowedSdParams::get(int i) const { return i < 0 || i >= maxAllow ? SdParam::Type(SdParam::invalid) : allow_[i]; } AllowedSdParamsMessageArg::AllowedSdParamsMessageArg( const AllowedSdParams &allow, const ConstPtr &sd) : allow_(allow), sd_(sd) { } MessageArg *AllowedSdParamsMessageArg::copy() const { return new AllowedSdParamsMessageArg(*this); } void AllowedSdParamsMessageArg::append(MessageBuilder &builder) const { for (int i = 0;; i++) { SdParam::Type type = allow_.get(i); if (type == SdParam::invalid) break; if (i != 0) builder.appendFragment(ParserMessages::listSep); switch (type) { case SdParam::eE: builder.appendFragment(ParserMessages::entityEnd); break; case SdParam::minimumLiteral: builder.appendFragment(ParserMessages::minimumLiteral); break; case SdParam::mdc: { builder.appendFragment(ParserMessages::delimStart); Char c = sd_->execToDoc('>'); builder.appendChars(&c, 1); builder.appendFragment(ParserMessages::delimEnd); } break; case SdParam::number: builder.appendFragment(ParserMessages::number); break; case SdParam::name: builder.appendFragment(ParserMessages::name); break; case SdParam::paramLiteral: builder.appendFragment(ParserMessages::parameterLiteral); break; case SdParam::capacityName: builder.appendFragment(ParserMessages::capacityName); break; case SdParam::generalDelimiterName: builder.appendFragment(ParserMessages::generalDelimiteRoleName); break; case SdParam::referenceReservedName: builder.appendFragment(ParserMessages::referenceReservedName); break; case SdParam::quantityName: builder.appendFragment(ParserMessages::quantityName); break; case SdParam::ellipsis: { StringC str(sd_->execToDoc("...")); builder.appendChars(str.data(), str.size()); break; } default: { StringC str(sd_->reservedName(type - SdParam::reservedName)); builder.appendChars(str.data(), str.size()); break; } } } } SdBuilder::SdBuilder() : valid(1), externalSyntax(0) { } void SdBuilder::addFormalError(const Location &location, const MessageType1 &message, const StringC &id) { formalErrorList.insert(new SdFormalError(location, message, id)); } SdFormalError::SdFormalError(const Location &location, const MessageType1 &message, const StringC &id) : location_(location), message_(&message), id_(id) { } void SdFormalError::send(ParserState &parser) { parser.Messenger::setNextLocation(location_); parser.message(*message_, StringMessageArg(id_)); } CharSwitcher::CharSwitcher() { } void CharSwitcher::addSwitch(WideChar from, WideChar to) { switches_.push_back(from); switches_.push_back(to); switchUsed_.push_back(0); } SyntaxChar CharSwitcher::subst(WideChar c) { for (size_t i = 0; i < switches_.size(); i += 2) if (switches_[i] == c) { switchUsed_[i/2] = 1; return switches_[i + 1]; } return c; } size_t CharSwitcher::nSwitches() const { return switchUsed_.size(); } Boolean CharSwitcher::switchUsed(size_t i) const { return switchUsed_[i]; } WideChar CharSwitcher::switchFrom(size_t i) const { return switches_[i*2]; } WideChar CharSwitcher::switchTo(size_t i) const { return switches_[i*2 + 1]; } CharsetMessageArg::CharsetMessageArg(const ISet &set) : set_(set) { } MessageArg *CharsetMessageArg::copy() const { return new CharsetMessageArg(*this); } void CharsetMessageArg::append(MessageBuilder &builder) const { ISetIter iter(set_); WideChar min, max; Boolean first = 1; while (iter.next(min, max)) { if (first) first = 0; else builder.appendFragment(ParserMessages::listSep); builder.appendNumber(min); if (max != min) { builder.appendFragment(max == min + 1 ? ParserMessages::listSep : ParserMessages::rangeSep); builder.appendNumber(max); } } } #ifdef SP_NAMESPACE } #endif