2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: Parser.h /main/1 1996/07/29 17:00:29 cde-hp $ */
24 // Copyright (c) 1994 James Clark
25 // See the file COPYING for copying permission.
27 #ifndef Parser_INCLUDED
28 #define Parser_INCLUDED 1
35 #include "Attribute.h"
36 #include "Attributed.h"
39 #include "ElementType.h"
46 #include "ParserState.h"
48 #include "SgmlParser.h"
54 namespace SP_NAMESPACE {
62 class AllowedGroupTokens;
63 struct GroupConnector;
64 class AllowedGroupConnectors;
65 class AllowedSdParams;
68 class AttributeDefinition;
69 class AttributeDefinitionList;
70 class UnivCharsetDesc;
77 class ElementDefinition;
79 struct StandardSyntaxSpec;
83 class Parser : private ParserState {
85 Parser(const SgmlParser::Params &);
87 void parseAll(EventHandler &, SP_CONST SP_VOLATILE sig_atomic_t *cancelPtr);
88 ParserState::sdPointer;
89 ParserState::instanceSyntaxPointer;
90 ParserState::prologSyntaxPointer;
91 ParserState::activateLinkType;
92 ParserState::allLinkTypesActivated;
93 ParserState::entityManager;
94 ParserState::entityCatalog;
98 Parser(const Parser &); // undefined
99 void operator=(const Parser &); // undefined
100 Boolean setStandardSyntax(Syntax &syn, const StandardSyntaxSpec &,
101 const CharsetInfo &docCharset,
103 Boolean addRefDelimShortref(Syntax &syntax,
104 const CharsetInfo &syntaxCharset,
105 const CharsetInfo &docCharset,
106 CharSwitcher &switcher);
107 Boolean setRefDelimGeneral(Syntax &syntax,
108 const CharsetInfo &syntaxCharset,
109 const CharsetInfo &docCharset,
110 CharSwitcher &switcher);
111 void setRefNames(Syntax &syntax, const CharsetInfo &docCharset);
114 void compileSdModes();
115 void compilePrologModes();
116 void compileInstanceModes();
117 void addNeededShortrefs(Dtd &, const Syntax &);
118 Boolean shortrefCanPreemptDelim(const StringC &sr,
122 void compileModes(const Mode *modes, int n, const Dtd *);
123 void compileNormalMap();
128 void doInstanceStart();
130 void extendNameToken(size_t, const MessageType1 &);
131 void extendNumber(size_t, const MessageType1 &);
134 void extendContentS();
135 void declSubsetRecover(unsigned startLevel);
136 void prologRecover();
137 void skipDeclaration(unsigned startLevel);
138 Boolean parseElementDecl();
139 Boolean parseAttlistDecl();
140 Boolean parseNotationDecl();
141 Boolean parseEntityDecl();
142 Boolean parseShortrefDecl();
143 Boolean parseUsemapDecl();
144 Boolean parseUselinkDecl();
145 Boolean parseDoctypeDeclStart();
146 Boolean parseDoctypeDeclEnd(Boolean fake = 0);
147 Boolean parseMarkedSectionDeclStart();
148 void handleMarkedSectionEnd();
149 Boolean parseCommentDecl();
150 void emptyCommentDecl();
151 Boolean parseExternalId(const AllowedParams &,
152 const AllowedParams &,
156 Boolean parseParam(const AllowedParams &, unsigned, Param &);
157 Boolean parseMinimumLiteral(Boolean, Text &);
158 Boolean parseAttributeValueLiteral(Boolean, Text &);
159 Boolean parseTokenizedAttributeValueLiteral(Boolean, Text &);
160 Boolean parseSystemIdentifier(Boolean, Text &);
161 Boolean parseParameterLiteral(Boolean, Text &);
162 Boolean parseDataTagParameterLiteral(Boolean, Text &);
163 // flags for parseLiteral()
165 literalSingleSpace = 01,
167 literalMinimumData = 04,
168 // Keep info about delimiters
169 literalDelimInfo = 010,
170 // Ignore references in the literal
171 literalNoProcess = 020
173 Boolean parseLiteral(Mode litMode, Mode liteMode, size_t maxLength,
174 const MessageType1 &tooLongMessage,
175 unsigned flags, Text &text);
177 Boolean parseGroupToken(const AllowedGroupTokens &allow,
178 unsigned nestingLevel,
179 unsigned declInputLevel,
180 unsigned groupInputLevel,
182 Boolean parseGroupConnector(const AllowedGroupConnectors &allow,
183 unsigned declInputLevel,
184 unsigned groupInputLevel,
186 Boolean parseGroup(const AllowedGroupTokens &allowToken,
187 unsigned declInputLevel,
189 Boolean parseModelGroup(unsigned nestingLevel, unsigned declInputLevel,
190 ModelGroup *&, Mode);
191 Boolean parseNameGroup(unsigned declInputLevel, Param &);
192 Boolean parseNameTokenGroup(unsigned declInputLevel, Param &);
193 Boolean parseDataTagGroup(unsigned nestingLevel, unsigned declInputLevel,
195 Boolean parseDataTagTemplateGroup(unsigned nestingLevel,
196 unsigned declInputLevel, GroupToken &);
198 Boolean parseElementNameGroup(unsigned declInputLevel, Param &);
199 Boolean parseReservedName(const AllowedParams &allow, Param &parm);
200 Boolean parseIndicatedReservedName(const AllowedParams &allow, Param &parm);
201 Boolean getReservedName(Syntax::ReservedName *);
202 Boolean getIndicatedReservedName(Syntax::ReservedName *);
203 Boolean parseAttributeValueParam(Param &parm);
204 Boolean parseEntityReference(Boolean isParameter,
206 ConstPtr<Entity> &entity,
207 Ptr<EntityOrigin> &origin);
208 ContentToken::OccurrenceIndicator getOccurrenceIndicator(Mode);
209 Boolean parseComment(Mode);
210 Boolean parseNamedCharRef();
211 Boolean parseNumericCharRef(Char &, Location &);
212 Boolean parseDeclarationName(Syntax::ReservedName *, Boolean allowAfdr = 0);
213 void paramInvalidToken(Token, const AllowedParams &);
214 void groupTokenInvalidToken(Token, const AllowedGroupTokens &);
215 void groupConnectorInvalidToken(Token, const AllowedGroupConnectors &);
216 ElementType *lookupCreateElement(const StringC &);
217 RankStem *lookupCreateRankStem(const StringC &);
218 Boolean parseExceptions(unsigned declInputLevel,
219 Ptr<ElementDefinition> &def);
221 void parseStartTag();
222 const ElementType *completeRankStem(const StringC &);
223 void handleRankedElement(const ElementType *);
224 void parseEmptyStartTag();
225 void acceptPcdata(const Location &);
226 void acceptStartTag(const ElementType *, StartElementEvent *,
227 Boolean netEnabling);
228 void handleBadStartTag(const ElementType *, StartElementEvent *,
229 Boolean netEnabling);
230 void undo(IList<Undo> &);
231 Boolean tryStartTag(const ElementType *, StartElementEvent *,
232 Boolean netEnabling, IList<Event> &);
233 void checkExclusion(const ElementType *e);
234 Boolean tryImplyTag(const Location &, unsigned &, unsigned &,
235 IList<Undo> &, IList<Event> &);
236 void pushElementCheck(const ElementType *, StartElementEvent *,
237 Boolean netEnabling);
238 void pushElementCheck(const ElementType *, StartElementEvent *,
239 IList<Undo> &, IList<Event> &);
240 void queueElementEvents(IList<Event> &);
241 Boolean parseAttributeSpec(Boolean inDeclaration,
243 Boolean &netEnabling);
244 Boolean handleAttributeNameToken(Text &text,
246 unsigned &specLength);
247 struct AttributeParameter {
257 Boolean parseAttributeParameter(Boolean inDecl,
259 AttributeParameter::Type &result,
260 Boolean &netEnabling);
261 void extendUnquotedAttributeValue();
263 Boolean parseAttributeValueSpec(Boolean inDecl,
266 unsigned &specLength);
269 void parseEndTagClose();
270 void parseEmptyEndTag();
271 void parseNullEndTag();
272 void endAllElements();
273 void acceptEndTag(const ElementType *, EndElementEvent *);
274 void implyCurrentElementEnd(const Location &);
275 void maybeDefineEntity(const Ptr<Entity> &entity);
276 Notation *lookupCreateNotation(const StringC &name);
277 Boolean parseExternalEntity(StringC &name,
278 Entity::DeclType declType,
279 unsigned declInputLevel,
281 ShortReferenceMap *lookupCreateMap(const StringC &);
282 StringC prettifyDelim(const StringC &delim);
283 void handleShortref(int index);
284 Boolean parseProcessingInstruction();
285 Boolean parseAttributed(unsigned declInputLevel, Param &parm,
286 Vector<Attributed *> &attributed,
287 Boolean &isNotation);
288 Boolean parseDeclaredValue(unsigned declInputLevel, Boolean isNotation,
289 Param &parm, Owner<DeclaredValue> &value);
290 Boolean parseDefaultValue(unsigned declInputLevel, Boolean isNotation,
291 Param &parm, const StringC &attributeName,
292 Owner<DeclaredValue> &declaredValue,
293 Owner<AttributeDefinition> &def,
294 Boolean &anyCurrent);
295 Boolean reportNonSgmlCharacter();
297 Boolean implySgmlDecl();
298 Boolean scanForSgmlDecl(const CharsetInfo &initCharset);
299 void findMissingMinimum(const CharsetInfo &charset, ISet<WideChar> &);
300 Boolean parseSgmlDecl();
301 Boolean sdParseDocumentCharset(SdBuilder &sdBuilder, SdParam &parm);
302 Boolean sdParseCapacity(SdBuilder &sdBuilder, SdParam &parm);
303 Boolean sdParseScope(SdBuilder &sdBuilder, SdParam &parm);
304 Boolean sdParseSyntax(SdBuilder &sdBuilder, SdParam &parm);
305 Boolean sdParseExplicitSyntax(SdBuilder &sdBuilder, SdParam &parm);
306 Boolean sdParseSyntaxCharset(SdBuilder &sdBuilder, SdParam &parm);
307 Boolean sdParseShunchar(SdBuilder &sdBuilder, SdParam &parm);
308 Boolean sdParseFunction(SdBuilder &sdBuilder, SdParam &parm);
309 Boolean sdParseNaming(SdBuilder &sdBuilder, SdParam &parm);
310 Boolean sdParseDelim(SdBuilder &sdBuilder, SdParam &parm);
311 Boolean sdParseNames(SdBuilder &sdBuilder, SdParam &parm);
312 Boolean sdParseQuantity(SdBuilder &sdBuilder, SdParam &parm);
313 Boolean sdParseFeatures(SdBuilder &sd, SdParam &parm);
314 Boolean sdParseAppinfo(SdBuilder &sd, SdParam &parm);
315 Boolean parseSdParam(const AllowedSdParams &allow, SdParam &);
316 Boolean parseSdParamLiteral(Boolean lita, String<SyntaxChar> &str);
317 Boolean stringToNumber(const Char *s, size_t length, unsigned long &);
318 void sdParamConvertToLiteral(SdParam &parm);
319 void sdParamInvalidToken(Token token, const AllowedSdParams &);
320 Boolean sdParseCharset(SdBuilder &sdBuilder, SdParam &parm,
322 CharsetDecl &, UnivCharsetDesc &);
323 Boolean sdParseExternalCharset(Sd &, UnivCharsetDesc &desc);
324 Boolean translateSyntax(CharSwitcher &switcher,
325 const CharsetInfo &syntaxCharset,
326 const CharsetInfo &docCharset,
329 Boolean translateSyntax(SdBuilder &sdBuilder,
330 WideChar syntaxChar, Char &docChar);
331 Boolean translateSyntax(SdBuilder &sdBuilder,
332 const String<SyntaxChar> &syntaxString,
334 Boolean translateSyntaxNoSwitch(SdBuilder &sdBuilder,
335 WideChar syntaxChar, Char &docChar,
337 Boolean translateName(SdBuilder &sdBuilder,
340 void translateRange(SdBuilder &sdBuilder, SyntaxChar start,
341 SyntaxChar end, ISet<Char> &chars);
342 UnivChar translateUniv(UnivChar univChar,
343 CharSwitcher &switcher,
344 const CharsetInfo &syntaxCharset);
345 Boolean univToDescCheck(const CharsetInfo &charset, UnivChar from,
347 Boolean univToDescCheck(const CharsetInfo &charset, UnivChar from,
348 Char &to, WideChar &count);
349 Boolean checkNotFunction(const Syntax &syn, Char c);
350 Boolean checkGeneralDelim(const Syntax &syn, const StringC &delim);
351 Boolean checkShortrefDelim(const Syntax &syn,
352 const CharsetInfo &charset,
353 const StringC &delim);
354 Boolean checkNmchars(const ISet<Char> &set, const Syntax &syntax);
355 void intersectCharSets(const ISet<Char> &s1, const ISet<Char> &s2,
356 ISet<WideChar> &inter);
357 Boolean checkSwitches(CharSwitcher &switcher,
358 const CharsetInfo &syntaxCharset);
359 Boolean checkSwitchesMarkup(CharSwitcher &switcher);
361 const StandardSyntaxSpec *lookupSyntax(const PublicId &id);
362 Boolean referencePublic(const PublicId &id, PublicId::TextClass,
363 Boolean &givenError);
365 void checkTaglen(Index tagStartIndex);
366 void checkSyntaxNamelen(const Syntax &syn);
367 void checkElementAttribute(const ElementType *e, size_t checkFrom = 0);
368 void checkDtd(Dtd &dtd);
369 Boolean maybeStatusKeyword(const Entity &entity);
370 void reportAmbiguity(const LeafContentToken *from,
371 const LeafContentToken *to1,
372 const LeafContentToken *to2,
373 unsigned ambigAndDepth);
374 Boolean parseLinktypeDeclStart();
375 Boolean parseLinktypeDeclEnd();
376 Boolean parseLinkDecl();
377 Boolean parseIdlinkDecl();
378 Boolean parseLinkSet(Boolean idlink);
379 void addIdLinkRule(const StringC &id, IdLinkRule &rule);
380 void addLinkRule(LinkSet *linkSet,
381 const ElementType *sourceElement,
382 const ConstPtr<SourceLinkRuleResource> &linkRule);
383 Boolean parseResultElementSpec(unsigned declInputLevel,
387 const ElementType *&resultType,
388 AttributeList &attributes);
389 LinkSet *lookupCreateLinkSet(const StringC &name);
390 const ElementType *lookupResultElementType(const StringC &name);
392 Boolean parseEntityReferenceNameGroup(Boolean &ignore);
393 Boolean parseTagNameGroup(Boolean &active);
394 void parseGroupStartTag();
395 void parseGroupEndTag();
396 Boolean skipAttributeSpec();
397 Boolean lookingAtStartTag(StringC &gi);
398 Boolean implyDtd(const StringC &gi);
399 void findMissingTag(const ElementType *e, Vector<const ElementType *> &);
400 unsigned paramsSubdocLevel(const SgmlParser::Params &);
401 void addCommonAttributes(Dtd &dtd);
402 Boolean parseAfdrDecl();
409 #endif /* not Parser_INCLUDED */