2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: parseMode.C /main/1 1996/07/29 17:09:27 cde-hp $ */
24 // Copyright (c) 1994 James Clark
25 // See the file COPYING for copying permission.
29 #include "ParserMessages.h"
30 #include "MessageArg.h"
31 #include "TokenMessageArg.h"
33 #include "Partition.h"
38 #include "TrieBuilder.h"
42 namespace SP_NAMESPACE {
47 modeUsedInProlog = 02,
48 modeUsedInInstance = 04,
56 { grpMode, modeUsedInProlog|modeUsedInInstance },
57 { alitMode, modeUsedInProlog|modeUsedInInstance },
58 { alitaMode, modeUsedInProlog|modeUsedInInstance },
59 { aliteMode, modeUsedInProlog|modeUsedInInstance },
60 { talitMode, modeUsedInProlog|modeUsedInInstance },
61 { talitaMode, modeUsedInProlog|modeUsedInInstance },
62 { taliteMode, modeUsedInProlog|modeUsedInInstance },
63 { mdMode, modeUsedInProlog|modeUsedInInstance|modeUsedInSd },
64 { mdMinusMode, modeUsedInProlog },
65 { mdPeroMode, modeUsedInProlog },
66 { comMode, modeUsedInProlog|modeUsedInInstance },
67 { sdcomMode, modeUsedInSd },
68 { piMode, modeUsedInProlog|modeUsedInInstance },
69 { refMode, modeUsedInProlog|modeUsedInInstance|modeUsedInSd },
70 { imsMode, modeUsedInProlog|modeUsedInInstance },
71 { cmsMode, modeUsedInProlog|modeUsedInInstance },
72 { rcmsMode, modeUsedInProlog|modeUsedInInstance },
73 { proMode, modeUsedInProlog },
74 { dsMode, modeUsedInProlog },
75 { dsiMode, modeUsedInProlog },
76 { plitMode, modeUsedInProlog },
77 { plitaMode, modeUsedInProlog },
78 { pliteMode, modeUsedInProlog },
79 { sdplitMode, modeUsedInSd },
80 { sdplitaMode, modeUsedInSd },
81 { grpsufMode, modeUsedInProlog },
82 { mlitMode, modeUsedInProlog|modeUsedInSd },
83 { mlitaMode, modeUsedInProlog|modeUsedInSd },
84 { asMode, modeUsedInProlog },
85 { slitMode, modeUsedInProlog },
86 { slitaMode, modeUsedInProlog },
87 { cconMode, modeUsedInInstance },
88 { rcconMode, modeUsedInInstance },
89 { cconnetMode, modeUsedInInstance },
90 { rcconnetMode, modeUsedInInstance },
91 { rcconeMode, modeUsedInInstance },
92 { tagMode, modeUsedInInstance },
93 { econMode, modeUsedInInstance|modeUsesSr },
94 { mconMode, modeUsedInInstance|modeUsesSr },
95 { econnetMode, modeUsedInInstance|modeUsesSr },
96 { mconnetMode, modeUsedInInstance|modeUsesSr },
99 void Parser::compileSdModes()
103 for (size_t i = 0; i < SIZEOF(modeTable); i++)
104 if (modeTable[i].flags & modeUsedInSd)
105 modes[n++] = modeTable[i].mode;
106 compileModes(modes, n, 0);
109 void Parser::compilePrologModes()
111 Boolean scopeInstance = sd().scopeInstance();
112 Boolean haveSr = syntax().hasShortrefs();
115 for (size_t i = 0; i < SIZEOF(modeTable); i++) {
117 if (modeTable[i].flags & modeUsedInProlog)
118 modes[n++] = modeTable[i].mode;
121 if ((modeTable[i].flags & (modeUsedInInstance|modeUsedInProlog))
122 && !(modeTable[i].flags & modeUsesSr))
123 modes[n++] = modeTable[i].mode;
126 if (modeTable[i].flags & (modeUsedInInstance|modeUsedInProlog))
127 modes[n++] = modeTable[i].mode;
130 compileModes(modes, n, 0);
133 void Parser::compileInstanceModes()
135 Boolean scopeInstance = sd().scopeInstance();
137 if (!scopeInstance && !syntax().hasShortrefs())
141 for (size_t i = 0; i < SIZEOF(modeTable); i++) {
143 if (modeTable[i].flags & modeUsedInInstance)
144 modes[n++] = modeTable[i].mode;
147 if (modeTable[i].flags & modeUsesSr)
148 modes[n++] = modeTable[i].mode;
151 compileModes(modes, n, ¤tDtd());
154 void Parser::compileModes(const Mode *modes,
158 PackedBoolean sets[Syntax::nSet];
159 PackedBoolean delims[Syntax::nDelimGeneral];
160 PackedBoolean functions[3];
162 Boolean includesShortref = 0;
163 for (i = 0; i < Syntax::nSet; i++)
165 for (i = 0; i < Syntax::nDelimGeneral; i++)
167 for (i = 0; i < 3; i++)
170 for (i = 0; i < n; i++) {
171 ModeInfo iter(modes[i], sd());
173 while (iter.nextToken(&ti)) {
175 case TokenInfo::delimType:
176 delims[ti.delim1] = 1;
178 case TokenInfo::delimDelimType:
179 delims[ti.delim1] = 1;
180 delims[ti.delim2] = 1;
182 case TokenInfo::delimSetType:
183 delims[ti.delim1] = 1;
185 case TokenInfo::setType:
188 case TokenInfo::functionType:
189 functions[ti.function] = 1;
193 if (!includesShortref && iter.includesShortref())
194 includesShortref = 1;
199 for (i = 0; i < 3; i++)
201 chars.add(syntax().standardFunction(i));
202 for (i = 0; i < Syntax::nDelimGeneral; i++)
204 const StringC &str = syntax().delimGeneral(i);
205 for (size_t j = 0; j < str.size(); j++)
208 if (includesShortref && dtd) {
209 size_t n = dtd->nShortref();
210 for (size_t i = 0; i < n; i++) {
211 const StringC &delim = dtd->shortref(i);
212 size_t len = delim.size();
213 for (size_t j = 0; j < len; j++)
214 if (delim[j] == sd().execToDoc('B'))
215 sets[Syntax::blank] = 1;
221 const ISet<Char> *csets[Syntax::nSet];
223 for (i = 0; i < Syntax::nSet; i++)
225 csets[usedSets++] = syntax().charSet(i);
227 Partition partition(chars, csets, usedSets, *syntax().generalSubstTable());
229 String<EquivCode> setCodes[Syntax::nSet];
232 for (i = 0; i < Syntax::nSet; i++)
234 setCodes[i] = partition.setCodes(nCodes++);
236 String<EquivCode> delimCodes[Syntax::nDelimGeneral];
237 for (i = 0; i < Syntax::nDelimGeneral; i++)
239 StringC str = syntax().delimGeneral(i);
240 for (size_t j = 0; j < str.size(); j++)
241 delimCodes[i] += partition.charCode(str[j]);
244 String<EquivCode> functionCode[3];
245 for (i = 0; i < 3; i++)
247 functionCode[i] += partition.charCode(syntax().standardFunction(i));
249 Vector<SrInfo> srInfo;
252 if (!includesShortref || !dtd)
255 nShortref = dtd->nShortref();
256 srInfo.resize(nShortref);
258 for (i = 0; i < nShortref; i++) {
259 const StringC delim = dtd->shortref(i);
260 SrInfo *p = &srInfo[i];
262 for (j = 0; j < delim.size(); j++) {
263 if (delim[j] == sd().execToDoc('B'))
265 p->chars += partition.charCode(delim[j]);
267 if (j < delim.size()) {
268 p->bSequenceLength = 1;
269 for (++j; j < delim.size(); j++) {
270 if (delim[j] != sd().execToDoc('B'))
272 p->bSequenceLength += 1;
274 for (; j < delim.size(); j++)
275 p->chars2 += partition.charCode(delim[j]);
278 p->bSequenceLength = 0;
282 const String<EquivCode> emptyString;
283 Boolean multicode = syntax().multicode();
284 for (i = 0; i < n; i++) {
285 TrieBuilder tb(partition.maxCode() + 1);
286 TrieBuilder::TokenVector ambiguities;
287 Vector<Token> suppressTokens;
289 suppressTokens.assign(partition.maxCode() + 1, 0);
290 suppressTokens[partition.eECode()] = tokenEe;
292 tb.recognizeEE(partition.eECode(), tokenEe);
293 ModeInfo iter(modes[i], sd());
295 // We try to handle the possibility that some delimiters may be empty;
296 // this might happen when compiling recognizers for the SGML declaration.
297 while (iter.nextToken(&ti)) {
299 case TokenInfo::delimType:
300 if (delimCodes[ti.delim1].size() > 0)
301 tb.recognize(delimCodes[ti.delim1], ti.token,
302 ti.priority, ambiguities);
304 case TokenInfo::delimDelimType:
306 String<EquivCode> str(delimCodes[ti.delim1]);
307 if (str.size() > 0 && delimCodes[ti.delim2].size() > 0) {
308 str += delimCodes[ti.delim2];
309 tb.recognize(str, ti.token, ti.priority, ambiguities);
313 case TokenInfo::delimSetType:
314 if (delimCodes[ti.delim1].size() > 0)
315 tb.recognize(delimCodes[ti.delim1], setCodes[ti.set],
316 ti.token, ti.priority, ambiguities);
318 case TokenInfo::setType:
319 tb.recognize(emptyString, setCodes[ti.set], ti.token,
320 ti.priority, ambiguities);
322 const String<EquivCode> &equivCodes = setCodes[ti.set];
323 for (size_t j = 0; j < equivCodes.size(); j++)
324 suppressTokens[equivCodes[j]] = ti.token;
327 case TokenInfo::functionType:
328 tb.recognize(functionCode[ti.function], ti.token,
329 ti.priority, ambiguities);
331 suppressTokens[functionCode[ti.function][0]] = ti.token;
335 if (iter.includesShortref()) {
336 for (int j = 0; j < nShortref; j++) {
337 const SrInfo *p = &srInfo[j];
338 if (p->bSequenceLength > 0)
339 tb.recognizeB(p->chars, p->bSequenceLength,
340 syntax().quantity(Syntax::qBSEQLEN),
341 setCodes[Syntax::blank],
342 p->chars2, tokenFirstShortref + j,
345 tb.recognize(p->chars, tokenFirstShortref + j,
346 Priority::delim, ambiguities);
349 setRecognizer(modes[i],
351 ? new Recognizer(tb.extractTrie(), partition.map(),
353 : new Recognizer(tb.extractTrie(), partition.map())));
354 // FIXME give more information
355 for (size_t j = 0; j < ambiguities.size(); j += 2)
356 message(ParserMessages::lexicalAmbiguity,
357 TokenMessageArg(ambiguities[j], modes[i], syntaxPointer(),
359 TokenMessageArg(ambiguities[j + 1], modes[i], syntaxPointer(),
364 void Parser::compileNormalMap()
366 XcharMap<PackedBoolean> map(0);
367 ISetIter<Char> sgmlCharIter(*syntax().charSet(Syntax::sgmlChar));
369 while (sgmlCharIter.next(min, max))
370 map.setRange(min, max, 1);
371 ModeInfo iter(mconnetMode, sd());
373 while (iter.nextToken(&ti)) {
375 case TokenInfo::delimType:
376 case TokenInfo::delimDelimType:
377 case TokenInfo::delimSetType:
379 Char c = syntax().delimGeneral(ti.delim1)[0];
381 StringC str(syntax().generalSubstTable()->inverse(c));
382 for (size_t i = 0; i < str.size(); i++)
383 map.setChar(str[i], 0);
386 case TokenInfo::setType:
387 if (ti.token != tokenChar) {
388 ISetIter<Char> setIter(*syntax().charSet(ti.set));
390 while (setIter.next(min, max))
391 map.setRange(min, max, 0);
394 case TokenInfo::functionType:
395 if (ti.token != tokenChar)
396 map.setChar(syntax().standardFunction(ti.function), 0);
400 int nShortref = currentDtd().nShortref();
401 for (int i = 0; i < nShortref; i++) {
402 Char c = currentDtd().shortref(i)[0];
403 if (c == sd().execToDoc('B')) {
404 ISetIter<Char> setIter(*syntax().charSet(Syntax::blank));
406 while (setIter.next(min, max))
407 map.setRange(min, max, 0);
411 StringC str(syntax().generalSubstTable()->inverse(c));
412 for (size_t j = 0; j < str.size(); j++)
413 map.setChar(str[j], 0);
419 void Parser::addNeededShortrefs(Dtd &dtd, const Syntax &syntax)
421 if (!syntax.hasShortrefs())
423 PackedBoolean delimRelevant[Syntax::nDelimGeneral];
425 for (i = 0; i < Syntax::nDelimGeneral; i++)
426 delimRelevant[i] = 0;
427 ModeInfo iter(mconnetMode, sd());
429 while (iter.nextToken(&ti)) {
431 case TokenInfo::delimType:
432 case TokenInfo::delimDelimType:
433 case TokenInfo::delimSetType:
434 delimRelevant[ti.delim1] = 1;
441 // PIO and NET are the only delimiters that are recognized in con
442 // mode without context. If a short reference delimiter is
443 // identical to one of these delimiters, then we'll have an
444 // ambiguity. We make such a short reference delimiter needed
445 // to ensure that this ambiguity is reported.
446 if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dPIO)))
447 dtd.addNeededShortref(syntax.delimGeneral(Syntax::dPIO));
448 if (syntax.isValidShortref(syntax.delimGeneral(Syntax::dNET)))
449 dtd.addNeededShortref(syntax.delimGeneral(Syntax::dNET));
451 size_t nShortrefComplex = syntax.nDelimShortrefComplex();
453 // A short reference delimiter is needed if it is used or if it can
454 // contains some other shorter delimiter that is either a relevant general
455 // delimiter or a shortref delimiter that is used.
457 for (i = 0; i < nShortrefComplex; i++) {
459 for (j = 0; j < Syntax::nDelimGeneral; j++)
461 && shortrefCanPreemptDelim(syntax.delimShortrefComplex(i),
462 syntax.delimGeneral(j),
465 dtd.addNeededShortref(syntax.delimShortrefComplex(i));
468 for (j = 0; j < dtd.nShortref(); j++)
469 if (shortrefCanPreemptDelim(syntax.delimShortrefComplex(i),
473 dtd.addNeededShortref(syntax.delimShortrefComplex(i));
480 Boolean Parser::shortrefCanPreemptDelim(const StringC &sr,
483 const Syntax &syntax)
485 Char letterB = sd().execToDoc('B');
486 for (size_t i = 0; i < sr.size(); i++) {
494 if (sr[k] == letterB) {
495 if (dIsSr && d[j] == letterB) {
499 else if (syntax.isB(d[j])) {
502 if (k == sr.size() || sr[k] != letterB) {
503 // it was the last B in the sequence
504 while (j < d.size() && syntax.isB(d[j]))
511 else if (dIsSr && d[j] == letterB) {
512 if (syntax.isB(sr[k])) {
515 if (j < d.size() && d[j] != letterB) {
516 while (k < sr.size() && syntax.isB(sr[k]))
523 else if (d[j] == sr[k]) {