2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* Copyright (c) 1995 FUJITSU LIMITED */
24 /* All Rights Reserved */
26 /* $TOG: DocParser.C /main/16 1998/04/17 11:48:07 mgreess $ */
31 #include "StyleSheetExceptions.h"
32 #include "DocParser.h"
35 #include "AttributeList.h"
37 #define DATA_BUF_SIZ 4096
40 static ostrstream& terminate(ostrstream& ost)
42 char* pstring = ost.str();
43 *(pstring + ost.pcount()) = '\0';
49 DocParser::DocParser(Resolver &r)
50 : f_resolver(r), f_ignoring_element(0),
52 f_buffer(new char[DATA_BUF_SIZ]),
53 f_output(f_buffer, DATA_BUF_SIZ)
55 f_streambuf(new stringbuf()),
61 DocParser::~DocParser()
64 if (f_buffer) delete[] f_buffer;
66 // this causes a free memory read when f_output is deleted as part of this
67 // object...nothing we can do about it
73 DocParser::parse(istream &input)
76 unsigned int ok = rawParse(input);
82 DocParser::rawParse(istream &input)
86 input.unsetf(ios::skipws);
88 f_ignoring_element = 0 ;
90 switch(read_tag(input, f_output))
95 Symbol name(gElemSymTab->intern(terminate(f_output).str()));
96 f_output.rdbuf()->freeze(0);
98 data = f_output.str().c_str();
101 MESSAGE(cerr, "StartTag case:");
102 debug(cerr, f_output.str().size());
103 debug(cerr, data.c_str());
106 Symbol name(gElemSymTab->intern(data.c_str()));
108 process(input, f_output, name, 1, 1);
112 case AttributeSection:
114 throw(CASTDPUTEXCEPT docParserUnexpectedTag());
117 throw(CASTDPUDEXCEPT docParserUnexpectedData());
125 update_last_seen_child_name(Symbol*& last_seen_child_name, unsigned int& child_relative_sibling_number, const Symbol& new_child_name)
127 if ( last_seen_child_name == 0 ||
128 !(*last_seen_child_name == Symbol(new_child_name))
131 delete last_seen_child_name ;
132 last_seen_child_name = new Symbol(new_child_name);
133 child_relative_sibling_number= 1;
135 child_relative_sibling_number++;
141 DocParser::process(istream &input, ostringstream &output,
143 unsigned int sibling_number, unsigned int this_sibling_number)
145 ON_DEBUG(cerr << "process(" << name << ") -> " << sibling_number << endl);
147 Symbol* last_seen_child_name = 0;
149 unsigned int child_relative_sibling_number = 0;
151 unsigned int child = 1 ; // sibling numbers for child elements
159 while ((input >> c) && (c == '\n'));
163 throw(CASTDPUEEXCEPT docParserUnexpectedEof());
169 // process whatever comes right after start tag
170 TagType tt = read_tag(input, output);
175 ON_DEBUG(cerr << "beginElement" << endl);
176 // have to begin this element before processing child elements
177 if (!f_ignoring_element)
179 ignore = f_resolver.beginElement(new Element(name,
182 this_sibling_number));
183 f_ignoring_element = ignore ;
187 /////////////////////////////
188 // first child of this node
189 /////////////////////////////
191 Symbol name(gElemSymTab->intern(terminate(f_output).str()));
193 update_last_seen_child_name(last_seen_child_name,
194 child_relative_sibling_number, name);
196 f_output.rdbuf()->freeze(0);
198 process(input, output, name, child++, child_relative_sibling_number);
200 data = f_output.str().c_str();
202 // data[f_output.str().size()] = '\0';
204 Symbol name(gElemSymTab->intern(data.c_str()));
205 update_last_seen_child_name(last_seen_child_name,
206 child_relative_sibling_number, name);
208 process(input, output, name,
209 child++, child_relative_sibling_number);
214 // hit an end tag right after start tag
218 data = terminate(f_output).str();
219 f_output.rdbuf()->freeze(0);
221 data = f_output.str().c_str();
224 // data[f_output.str().size()] = '\0';
227 cerr << "EndTag: " << data.c_str() << endl;
228 assert(gElemSymTab->intern(data.c_str()) == name);
233 if (!f_ignoring_element)
235 int ignore = f_resolver.beginElement(new Element(name,
237 0, 0, this_sibling_number));
239 f_resolver.endElement(name);
241 return ; // EXIT FUNCTION
243 case AttributeSection:
245 #if !defined(SC3) && !defined(_IBMR2) && !defined(linux) && \
246 !defined(CSRG_BASED) && !defined(sun)
249 AttributeList *attrs = 0;
250 #if !defined(SC3) && !defined(_IBMR2) && !defined(linux) && \
251 !defined(CSRG_BASED) && !defined(sun)
254 AttributeList *olias_attrs = 0;
258 process_attributes(input, output, attrs, olias_attrs);
260 if (!f_ignoring_element)
262 //////////////////////////////
263 // this node with attributes
264 //////////////////////////////
265 ignore = f_resolver.beginElement(new Element(name,
271 f_ignoring_element = ignore ;
287 throw(CASTDPUTEXCEPT docParserUnexpectedTag());
292 if (!f_ignoring_element)
295 ignore = f_resolver.beginElement(new Element(name,
297 0, 0, this_sibling_number));
298 f_ignoring_element = ignore ;
301 read_data(input, output);
303 if (!f_ignoring_element)
305 // the str() call seems to add the null byte to the stream
306 // and increment the pcount, so we must make sure it gets
309 char *pstring = terminate(f_output).str();
310 int size = f_output.pcount();
311 f_resolver.data(pstring, size);
312 f_output.rdbuf()->freeze(0);
314 pstring = f_output.str().c_str();
315 int size = pstring.size() + 1;
316 f_resolver.data(pstring.c_str(), size);
323 while ((tt = read_tag(input, output)) != EndTag)
328 /////////////////////////////
329 // second child and beyond.
330 /////////////////////////////
331 data = f_output.str().c_str();
333 f_output.rdbuf()->freeze(0);
337 MESSAGE(cerr, "StartTag case2");
339 debug(cerr, f_output.str().size());
342 Symbol name(gElemSymTab->intern(data.c_str()));
343 update_last_seen_child_name(last_seen_child_name,
344 child_relative_sibling_number, name);
346 process(input, output, name, child++, child_relative_sibling_number);
349 case EndTag: // should never get this
351 // we have already processed these for this tag
352 case AttributeSection:
354 throw(CASTDPUTEXCEPT docParserUnexpectedTag());
358 read_data(input, output);
360 if (!f_ignoring_element)
362 // the str() call seems to add the null byte to the stream
363 // and increment the pcount, so we must make sure it gets
366 char *pstring = f_output.str();
367 int size = f_output.pcount();
368 *(pstring + size) = 0;
369 f_resolver.data(pstring, size);
370 f_output.rdbuf()->freeze(0);
372 pstring = f_output.str().c_str();
373 int size = pstring.size() + 1;
374 f_resolver.data(pstring.c_str(), size);
382 data = terminate(f_output).str();
383 f_output.rdbuf()->freeze(0);
385 data = f_output.str().c_str();
387 cerr << "EndTag: " << data.c_str() << endl;
388 assert(gElemSymTab->intern(data.c_str()) == name);
391 // hit end tag, end processing
392 if (!f_ignoring_element)
393 f_resolver.endElement(name);
395 // if we set ignore flag, unset it
397 f_ignoring_element = 0;
404 ON_DEBUG(cerr << "exit process: " << name << endl);
405 delete last_seen_child_name;
410 DocParser::process_attributes(istream &input, ostringstream &output,
411 AttributeList *&attrs,
412 AttributeList *&olias_attrs)
419 Attribute* newAttribute = 0;
421 AttributeList* orig_attrs = attrs;
422 AttributeList* orig_olias_attrs = olias_attrs;
425 while ((tt = read_tag(input,output)) != NoTag)
432 theData = f_output.str().c_str();
435 attrs = new AttributeList ;
438 process_attribute(input, output,
440 gSymTab->intern(terminate(f_output).str()),
441 gSymTab->intern(f_streambuf->str()),
443 gSymTab->intern(theData.c_str()),
447 attrs->add(newAttribute);
451 return ; // EXIT FUNCTION
453 case AttributeSection:
454 throw(CASTDPUTEXCEPT docParserUnexpectedTag());
458 theData = f_output.str().c_str();
462 olias_attrs = new AttributeList ;
465 process_attribute(input, output,
467 gSymTab->intern(terminate(f_output).str()),
468 gSymTab->intern(f_streambuf->str()),
470 gSymTab->intern(theData.c_str()),
475 olias_attrs->add(newAttribute);
478 throw(CASTDPUDEXCEPT docParserUnexpectedData());
487 if ( orig_attrs == 0 ) {
492 if ( orig_olias_attrs == 0 ) {
503 DocParser::process_attribute(istream &input, ostringstream &output,
504 const Symbol &name, TagType tt)
508 //ON_DEBUG(cerr << "process_attribute: " << name << endl);
510 // If the attribute is OLIAS internal, we use DocParser's
511 // read_data(). This is to prevent the attribte value
512 // from change in a call to specific renderer engine's
515 // Example: LoutDocparser::read_data() quotes any '.' char
516 // which changes the graphic locator value if the element
517 // is OLIAS internal attribute #GRAPHIC.
519 if ( tt == OliasAttribute ) {
520 DocParser::read_data(input, output);
522 (void)read_data(input, output);
524 char *data = f_output.str();
525 *(data + f_output.pcount()) = 0;
526 f_output.rdbuf()->freeze(0);
527 Attribute *attr = new Attribute(name, strdup(data));
529 data = f_output.str().c_str();
530 Attribute *attr = new Attribute(name, strdup(data.c_str()));
533 switch (read_tag(input, output))
536 case AttributeSection:
539 throw(CASTDPUTEXCEPT docParserUnexpectedTag());
543 throw(CASTDPUDEXCEPT docParserUnexpectedData());
555 DocParser::read_tag(istream &input, ostringstream &output)
557 output.seekp(streampos(0));
559 TagType tt = StartTag;
563 // strip newlines before/after tags
564 while ((input >> c) && (c == '\n'));
566 throw(CASTDPUEEXCEPT docParserUnexpectedEof());
585 return AttributeSection ; // EXIT
588 tt = OliasAttribute ;
589 output << c; // keep char we just read
593 throw(CASTUTEXCEPT unknownTagException());
597 output << c ; // keep char we just read
602 // get (remainder of) tag name
603 while ((input >> c) && (c != '>'))
612 DocParser::read_data(istream &input, ostringstream &output)
616 output.seekp(streampos(0));
618 while ((input >> c) && (c != '<'))
624 unsigned int tmplen = 0;
625 while ((input >> c ) && (c != ';'))
627 tmpbuf[tmplen++] = c ;
630 cerr << "Temp Buf overflow (ampersand problem)" << endl;
631 throw(CASTEXCEPT Exception());
635 throw(CASTDPUEEXCEPT docParserUnexpectedEof());
640 cerr << "Entity: " << tmpbuf << endl;
643 if ((!strcmp(tmpbuf, "hardreturn")) ||
644 (!strcmp(tmpbuf, "lnfeed")))
647 if ((!strcmp(tmpbuf, "lang")) ||
648 (!strcmp(tmpbuf, "lt")))
651 if (!strcmp(tmpbuf, "amp"))
654 if (!strcmp(tmpbuf, "nbsp")) // non-break space
666 // can never run out of input while reading data, tags must be balanced
668 throw(CASTDPUEEXCEPT docParserUnexpectedEof());