2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: parseCommon.C /main/1 1996/07/29 17:09:12 cde-hp $ */
24 // Copyright (c) 1994 James Clark
25 // See the file COPYING for copying permission.
30 #include "MessageArg.h"
31 #include "ParserMessages.h"
33 #include "NumericCharRefOrigin.h"
36 namespace SP_NAMESPACE {
39 Boolean Parser::parseProcessingInstruction()
41 currentInput()->startToken();
42 Location location(currentLocation());
45 Token token = getToken(piMode);
46 if (token == tokenPic)
50 message(ParserMessages::processingInstructionEntityEnd);
52 case tokenUnrecognized:
53 reportNonSgmlCharacter();
56 buf += *currentInput()->currentTokenStart();
57 if (buf.size()/2 > syntax().pilen()) {
58 message(ParserMessages::processingInstructionLength,
59 NumberMessageArg(syntax().pilen()));
60 message(ParserMessages::processingInstructionClose);
66 if (buf.size() > syntax().pilen())
67 message(ParserMessages::processingInstructionLength,
68 NumberMessageArg(syntax().pilen()));
70 eventHandler().pi(new (eventAllocator()) ImmediatePiEvent(buf, location));
74 Boolean Parser::parseLiteral(Mode litMode,
77 const MessageType1 &tooLongMessage,
81 unsigned startLevel = inputLevel();
82 Mode currentMode = litMode;
83 // If the literal gets to be longer than this, then we assume
84 // that the closing delimiter has been omitted if we're at the end
85 // of a line and at the starting input level.
86 size_t reallyMaxLength = (maxLength > size_t(-1)/2
90 Location startLoc(currentLocation());
91 if (flags & literalDelimInfo)
92 text.addStartDelim(currentLocation());
95 Token token = getToken(currentMode);
98 if (inputLevel() == startLevel) {
99 message(ParserMessages::literalLevel);
102 text.addEntityEnd(currentLocation());
104 if (inputLevel() == startLevel)
105 currentMode = litMode;
107 case tokenUnrecognized:
108 if (reportNonSgmlCharacter())
110 message(ParserMessages::literalMinimumData,
111 StringMessageArg(currentToken()));
114 text.ignoreChar(currentChar(), currentLocation());
117 if (text.size() > reallyMaxLength && inputLevel() == startLevel) {
119 message(tooLongMessage, NumberMessageArg(maxLength));
121 // guess that the closing delimiter has been omitted
122 Messenger::setNextLocation(startLoc);
123 message(ParserMessages::literalClosingDelimiter);
128 if ((flags & literalSingleSpace)
129 && (text.size() == 0 || text.lastChar() == syntax().space()))
130 text.ignoreChar(currentChar(), currentLocation());
132 text.addChar(syntax().space(),
133 Location(new ReplacementOrigin(currentLocation(),
138 if ((flags & literalSingleSpace)
139 && (text.size() == 0 || text.lastChar() == syntax().space()))
140 text.ignoreChar(currentChar(), currentLocation());
142 text.addChar(currentChar(), currentLocation());
148 if (!parseNumericCharRef(c, loc))
150 if (flags & literalDataTag) {
151 if (!syntax().isSgmlChar(c))
152 message(ParserMessages::dataTagPatternNonSgml);
153 else if (syntax().charSet(Syntax::functionChar)->contains(c))
154 message(ParserMessages::dataTagPatternFunction);
156 if ((flags & literalSingleSpace)
157 && c == syntax().space()
158 && (text.size() == 0 || text.lastChar() == syntax().space()))
159 text.ignoreChar(c, loc);
161 text.addChar(c, loc);
164 case tokenCroNameStart:
165 if (!parseNamedCharRef())
169 message(inInstance() ? ParserMessages::eroGrpoStartTag : ParserMessages::eroGrpoProlog);
173 if (flags & literalDelimInfo)
174 text.addEndDelim(currentLocation(), token == tokenLita);
177 case tokenEroNameStart:
178 case tokenPeroNameStart:
180 ConstPtr<Entity> entity;
181 Ptr<EntityOrigin> origin;
182 if (!parseEntityReference(token == tokenPeroNameStart,
183 (flags & literalNoProcess) ? 2 : 0,
186 if (!entity.isNull())
187 entity->litReference(text, *this, origin,
188 (flags & literalSingleSpace) != 0);
189 if (inputLevel() > startLevel)
190 currentMode = liteMode;
194 message(ParserMessages::peroGrpoProlog);
197 if (text.size() > reallyMaxLength && inputLevel() == startLevel
198 && currentChar() == syntax().standardFunction(Syntax::fRE)) {
200 message(tooLongMessage, NumberMessageArg(maxLength));
202 // guess that the closing delimiter has been omitted
203 Messenger::setNextLocation(startLoc);
204 message(ParserMessages::literalClosingDelimiter);
207 text.addChar(currentChar(), currentLocation());
212 if ((flags & literalSingleSpace)
214 && text.lastChar() == syntax().space())
215 text.ignoreLastChar();
216 if (text.size() > maxLength) {
222 if (AttributeValue::handleAsUnterminated(text, *this))
227 message(tooLongMessage, NumberMessageArg(maxLength));
232 Boolean Parser::parseNamedCharRef()
234 InputSource *in = currentInput();
235 Index startIndex = currentLocation().index();
236 in->discardInitial();
237 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
241 getCurrentToken(syntax().generalSubstTable(), name);
242 if (!syntax().lookupFunctionChar(name, &c)) {
243 message(ParserMessages::functionName, StringMessageArg(name));
249 getCurrentToken(name); // the original name
251 NamedCharRef::RefEndType refEndType;
252 switch (getToken(refMode)) {
254 refEndType = NamedCharRef::endRefc;
257 refEndType = NamedCharRef::endRE;
260 refEndType = NamedCharRef::endOmitted;
265 in->pushCharRef(c, NamedCharRef(startIndex, refEndType, name));
269 Boolean Parser::parseNumericCharRef(Char &ch, Location &loc)
271 InputSource *in = currentInput();
272 Location startLocation = currentLocation();
273 in->discardInitial();
274 extendNumber(syntax().namelen(), ParserMessages::numberLength);
277 const Char *lim = in->currentTokenEnd();
278 for (const Char *p = in->currentTokenStart(); p < lim; p++) {
279 int val = sd().digitWeight(*p);
280 if (c <= charMax/10 && (c *= 10) <= charMax - val)
283 message(ParserMessages::characterNumber, StringMessageArg(currentToken()));
288 if (valid && !sd().docCharsetDecl().charDeclared(c)) {
290 message(ParserMessages::characterNumber, StringMessageArg(currentToken()));
292 Owner<Markup> markupPtr;
294 markupPtr = new Markup;
295 markupPtr->addDelim(Syntax::dCRO);
296 markupPtr->addNumber(in);
297 switch (getToken(refMode)) {
299 markupPtr->addDelim(Syntax::dREFC);
302 markupPtr->addRefEndRe();
309 (void)getToken(refMode);
312 loc = Location(new NumericCharRefOrigin(startLocation,
313 currentLocation().index()
314 + currentInput()->currentTokenLength()
315 - startLocation.index(),
322 // ignoreLevel: 0 means don't ignore;
323 // 1 means parse name group and ignore if inactive
326 Boolean Parser::parseEntityReference(Boolean isParameter,
328 ConstPtr<Entity> &entity,
329 Ptr<EntityOrigin> &origin)
331 InputSource *in = currentInput();
332 Location startLocation(in->currentLocation());
333 Owner<Markup> markupPtr;
335 markupPtr = new Markup;
336 markupPtr->addDelim(isParameter ? Syntax::dPERO : Syntax::dERO);
338 if (ignoreLevel == 1) {
340 Markup *savedCurrentMarkup = currentMarkup();
341 if (savedCurrentMarkup)
342 savedCurrentMarkup->swap(savedMarkup);
343 Location savedMarkupLocation(markupLocation());
344 startMarkup(markupPtr != 0, startLocation);
346 markupPtr->addDelim(Syntax::dGRPO);
347 markupPtr->swap(*currentMarkup());
350 if (!parseEntityReferenceNameGroup(ignore))
353 currentMarkup()->swap(*markupPtr);
354 startMarkup(savedCurrentMarkup != 0, savedMarkupLocation);
355 if (savedCurrentMarkup)
356 savedMarkup.swap(*currentMarkup());
360 Xchar c = in->tokenChar(messenger());
361 if (!syntax().isNameStartCharacter(c)) {
362 message(ParserMessages::entityReferenceMissingName);
366 in->discardInitial();
368 extendNameToken(syntax().penamelen(), ParserMessages::parameterEntityNameLength);
370 extendNameToken(syntax().namelen(), ParserMessages::nameLength);
371 StringC &name = nameBuffer();
372 getCurrentToken(syntax().entitySubstTable(), name);
374 entity = new IgnoredEntity(name,
376 ? Entity::parameterEntity
377 : Entity::generalEntity);
379 entity = lookupEntity(isParameter, name, startLocation, 1);
380 if (entity.isNull()) {
381 if (haveApplicableDtd())
383 ? ParserMessages::parameterEntityUndefined
384 : ParserMessages::entityUndefined,
385 StringMessageArg(name));
387 message(ParserMessages::entityApplicableDtd);
389 else if (entity->defaulted() && options().warnDefaultEntityReference)
390 message(ParserMessages::defaultEntityReference, StringMessageArg(name));
393 markupPtr->addName(in);
394 switch (getToken(refMode)) {
396 markupPtr->addDelim(Syntax::dREFC);
399 markupPtr->addRefEndRe();
406 (void)getToken(refMode);
407 if (!entity.isNull())
408 origin = new (internalAllocator())
409 EntityOrigin(entity, startLocation,
410 currentLocation().index()
411 + currentInput()->currentTokenLength()
412 - startLocation.index(),
415 origin = (EntityOrigin *)0;
419 Boolean Parser::parseComment(Mode mode)
421 Location startLoc(currentLocation());
422 Markup *markup = currentMarkup();
424 markup->addCommentStart();
426 while ((token = getToken(mode)) != tokenCom)
428 case tokenUnrecognized:
429 if (!reportNonSgmlCharacter())
430 message(ParserMessages::sdCommentSignificant,
431 StringMessageArg(currentToken()));
434 message(ParserMessages::commentEntityEnd, startLoc);
438 markup->addCommentChar(currentChar());
444 void Parser::extendNameToken(size_t maxLength,
445 const MessageType1 &tooLongMessage)
447 InputSource *in = currentInput();
448 size_t length = in->currentTokenLength();
449 const Syntax &syn = syntax();
450 while (syn.isNameCharacter(in->tokenChar(messenger())))
452 if (length > maxLength)
453 message(tooLongMessage, NumberMessageArg(maxLength));
454 in->endToken(length);
458 void Parser::extendNumber(size_t maxLength, const MessageType1 &tooLongMessage)
460 InputSource *in = currentInput();
461 size_t length = in->currentTokenLength();
462 while (syntax().isDigit(in->tokenChar(messenger())))
464 if (length > maxLength)
465 message(tooLongMessage, NumberMessageArg(maxLength));
466 in->endToken(length);
469 Boolean Parser::reportNonSgmlCharacter()
472 if (!syntax().isSgmlChar(c)) {
473 message(ParserMessages::nonSgmlCharacter, NumberMessageArg(c));
479 void Parser::extendS()
481 InputSource *in = currentInput();
482 size_t length = in->currentTokenLength();
483 while (syntax().isS(in->tokenChar(messenger())))
485 in->endToken(length);