Add GNU LGPL headers to all .c .C and .h files
[oweals/cde.git] / cde / programs / dtinfo / DtMmdb / StyleSheet / DocParser.C
1 /*
2  * CDE - Common Desktop Environment
3  *
4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
5  *
6  * These libraries and programs are free software; you can
7  * redistribute them and/or modify them under the terms of the GNU
8  * Lesser General Public License as published by the Free Software
9  * Foundation; either version 2 of the License, or (at your option)
10  * any later version.
11  *
12  * These libraries and programs are distributed in the hope that
13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15  * PURPOSE. See the GNU Lesser General Public License for more
16  * details.
17  *
18  * You should have received a copy of the GNU Lesser General Public
19  * License along with these librararies and programs; if not, write
20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21  * Floor, Boston, MA 02110-1301 USA
22  */
23 /*      Copyright (c) 1995 FUJITSU LIMITED      */
24 /*      All Rights Reserved                     */
25
26 /* $TOG: DocParser.C /main/16 1998/04/17 11:48:07 mgreess $ */
27 #ifdef DEBUG
28 #include "assert.h"
29 #endif
30 #include "Debug.h"
31 #include "StyleSheetExceptions.h"
32 #include "DocParser.h"
33 #include "Resolver.h"
34 #include "Element.h"
35 #include "AttributeList.h"
36
37 #define DATA_BUF_SIZ 4096
38
39 #if defined(SC3) || defined(__osf__)
40 static ostrstream& terminate(ostrstream& ost)
41 {
42     char* string = ost.str();
43     *(string + ost.pcount()) = 0;
44
45     return ost;
46 }
47 #endif
48
49 DocParser::DocParser(Resolver &r)
50 : f_resolver(r),
51 #if defined(SC3) || defined(__osf__)
52   f_buffer(new char[DATA_BUF_SIZ]),
53   f_output(f_buffer, DATA_BUF_SIZ)
54 #else
55   f_streambuf(new strstreambuf(DATA_BUF_SIZ)),
56   f_output(f_streambuf)
57 #endif
58 {
59 }
60
61 DocParser::~DocParser()
62 {
63 #if defined(SC3) || defined(__osf__)
64   if (f_buffer) delete[] f_buffer;
65 #else
66   // this causes a free memory read when f_output is deleted as part of this
67   // object...nothing we can do about it
68   delete f_streambuf ;
69 #endif
70 }
71
72 unsigned int
73 DocParser::parse(istream &input)
74 {
75    f_resolver.Begin();
76    unsigned int ok = rawParse(input);
77    f_resolver.End();
78    return ok;
79 }
80
81 unsigned int
82 DocParser::rawParse(istream &input)
83 {
84   input.unsetf(ios::skipws);
85
86   f_ignoring_element = 0 ;
87
88   switch(read_tag(input, f_output))
89     {
90     case StartTag:
91       {
92 #if defined(SC3) || defined(__osf__)
93         Symbol name(gElemSymTab->intern(terminate(f_output).str()));
94         f_output.rdbuf()->freeze(0);
95 #else
96         char *data = f_streambuf->str();
97
98 /*
99 MESSAGE(cerr, "StartTag case:");
100 debug(cerr, f_streambuf->pcount());
101 debug(cerr, data);
102 */
103
104 #if !defined(SC3) && !defined(__osf__)
105         data[f_streambuf->pcount()] = 0;
106 #endif
107         f_streambuf->freeze(0);
108         Symbol name(gElemSymTab->intern(data));
109 #endif
110         process(input, f_output, name, 1, 1);
111       }
112       break;
113     case EndTag:
114     case AttributeSection:
115     case OliasAttribute:
116       throw(CASTDPUTEXCEPT docParserUnexpectedTag());
117       break;
118     case NoTag:
119       throw(CASTDPUDEXCEPT docParserUnexpectedData());
120       break;
121     }      
122   return 1;
123 }
124
125                 
126 void
127 update_last_seen_child_name(Symbol*& last_seen_child_name, unsigned int& child_relative_sibling_number, const Symbol& new_child_name)
128 {
129    if ( last_seen_child_name == 0 || 
130         !(*last_seen_child_name == Symbol(new_child_name)) 
131       ) 
132    {
133      delete last_seen_child_name ;
134      last_seen_child_name = new Symbol(new_child_name);
135      child_relative_sibling_number= 1;
136    } else
137      child_relative_sibling_number++;
138
139    return;
140 }
141
142 void
143 DocParser::process(istream &input, ostream &output,
144                    const Symbol &name,
145                    unsigned int sibling_number, unsigned int this_sibling_number)
146 {
147   ON_DEBUG(cerr << "process(" << name << ") -> " << sibling_number << endl);
148
149   Symbol* last_seen_child_name = 0; 
150             
151   unsigned int child_relative_sibling_number = 0;
152
153   unsigned int child = 1 ;      // sibling numbers for child elements 
154
155   char c ;
156   while ((input >> c) && (c == '\n'));
157   input.putback(c);
158
159   if (input.eof())
160     throw(CASTDPUEEXCEPT docParserUnexpectedEof());
161
162   int ignore = 0 ;
163
164   try
165     {
166       // process whatever comes right after start tag 
167       TagType tt = read_tag(input, output);
168       switch (tt)
169         {
170         case StartTag:
171           {
172             ON_DEBUG(cerr << "beginElement" << endl);
173             // have to begin this element before processing child elements 
174             if (!f_ignoring_element)
175               {
176                 ignore = f_resolver.beginElement(new Element(name,
177                                                              sibling_number, 0,
178                                                              0,
179                                                              this_sibling_number)); 
180                 f_ignoring_element = ignore ;
181               }
182
183                
184 /////////////////////////////
185 // first child of this node
186 /////////////////////////////
187 #if defined(SC3) || defined(__osf__)
188             Symbol name(gElemSymTab->intern(terminate(f_output).str()));
189
190             update_last_seen_child_name(last_seen_child_name, 
191                                  child_relative_sibling_number, name);
192
193             f_output.rdbuf()->freeze(0);
194
195             process(input, output, name, child++, child_relative_sibling_number);
196 #else
197             char *data = f_streambuf->str();
198 #if !defined(SC3)  && !defined(__osf__)
199             data[f_streambuf->pcount()] = 0;
200 #endif
201             Symbol name(gElemSymTab->intern(data));
202             update_last_seen_child_name(last_seen_child_name, 
203                                  child_relative_sibling_number, name);
204
205             f_streambuf->freeze(0);
206             process(input, output, name,
207                     child++, child_relative_sibling_number);
208 #endif
209           }
210           break;
211         case EndTag:
212           // hit an end tag right after start tag 
213 #ifdef DEBUG
214           {
215 #if defined(SC3) || defined(__osf__)
216             char *data = terminate(f_output).str();
217             f_output.rdbuf()->freeze(0);
218 #else
219             char *data = f_streambuf->str();
220 //#ifdef _IBMR2
221 #if !defined(SC3)  && !defined(__osf__)
222             data[f_streambuf->pcount()] = 0;
223 #endif
224             f_streambuf->freeze(0);
225 #endif
226             cerr << "EndTag: " << data << endl;
227             assert(gElemSymTab->intern(data) == name);
228           }
229 #endif
230
231 // this node
232           if (!f_ignoring_element)
233             {
234               int ignore = f_resolver.beginElement(new Element(name,
235                                                                sibling_number,
236                                                                0, 0, this_sibling_number)); 
237               if (!ignore)
238                 f_resolver.endElement(name);
239             }
240           return ;              // EXIT FUNCTION 
241           break;
242         case AttributeSection:
243           {
244 #if !defined(SC3) && \
245     !defined(__osf__) && \
246     !defined(_IBMR2) && \
247     !defined(__uxp__) && \
248     !defined(USL) && \
249     !defined(linux)
250             volatile
251 #endif
252             AttributeList *attrs = 0;
253 #if !defined(SC3) &&  \
254     !defined(__osf__) && \
255     !defined(_IBMR2) && \
256     !defined(__uxp__) && \
257     !defined(USL) && \
258     !defined(linux)
259             volatile
260 #endif
261             AttributeList *olias_attrs = 0;
262
263             try
264               {
265                 process_attributes(input, output, attrs, olias_attrs);
266
267                 if (!f_ignoring_element)
268                   {
269 //////////////////////////////
270 // this node with attributes
271 //////////////////////////////
272                     ignore = f_resolver.beginElement(new Element(name,
273                                                                  sibling_number,
274                                                                  attrs,
275                                                                  olias_attrs, 
276                                                                  this_sibling_number
277                                                         ));
278                     f_ignoring_element = ignore ;
279                   }
280               }
281             catch_any()
282               {
283 /*
284                 delete attrs ;
285                 delete olias_attrs ;
286 */
287                 attrs = 0 ;
288                 olias_attrs = 0 ;
289               }
290             end_try;
291           }
292           break;
293         case OliasAttribute:
294           throw(CASTDPUTEXCEPT docParserUnexpectedTag());
295           break;
296
297         case NoTag:
298           {
299             if (!f_ignoring_element)
300               {
301 // this node
302                 ignore = f_resolver.beginElement(new Element(name,
303                                                              sibling_number,
304                                                              0, 0, this_sibling_number)); 
305                 f_ignoring_element = ignore ;
306               }
307             // process data 
308             read_data(input, output);
309
310             if (!f_ignoring_element)
311               {
312                 //  the str() call seems to add the null byte to the stream
313                 //  and increment the pcount, so we must make sure it gets
314                 //  called first
315 #if defined(SC3) || defined(__osf__)
316                 char *string = terminate(f_output).str();
317                 int   size = f_output.pcount();
318                 f_resolver.data(string, size);
319                 f_output.rdbuf()->freeze(0);
320 #else
321                 char *string = f_streambuf->str();
322 //#ifdef _IBMR2
323 #if !defined(SC3) && !defined(__osf__)
324                 string[f_streambuf->pcount()] = 0;
325                 int   size = f_streambuf->pcount() ;
326 #else
327                 int   size = f_streambuf->pcount() - 1 ;
328 #endif
329                 f_resolver.data(string, size);
330                 f_streambuf->freeze(0); // unfreeze buffer frozen by str() call
331 #endif
332               }
333           }
334           break;
335         }
336       
337       while ((tt = read_tag(input, output)) != EndTag)
338         switch (tt)
339           {
340           case StartTag:
341             {
342 /////////////////////////////
343 // second child and beyond.
344 /////////////////////////////
345 #if defined(SC3) || defined(__osf__)
346               char *data = f_output.str();
347               *(data + f_output.pcount()) = 0;
348               f_output.rdbuf()->freeze(0);
349 #else
350               char *data = f_streambuf->str();
351 //#ifdef _IBMR2
352 #if !defined(SC3) && !defined(__osf__)
353               data[f_streambuf->pcount ()] = 0;
354 #endif
355               f_streambuf->freeze(0);
356 #endif
357
358 /*
359 MESSAGE(cerr, "StartTag case2");
360 debug(cerr, data);
361 debug(cerr, f_streambuf->pcount ());
362 */
363
364               Symbol name(gElemSymTab->intern(data));
365               update_last_seen_child_name(last_seen_child_name, 
366                                  child_relative_sibling_number, name);
367
368               process(input, output, name, child++, child_relative_sibling_number);
369             }
370             break;
371           case EndTag:          // should never get this 
372             break;
373             // we have already processed these for this tag
374           case AttributeSection:
375           case OliasAttribute: 
376             throw(CASTDPUTEXCEPT docParserUnexpectedTag());
377             break;
378           case NoTag:
379             {
380               read_data(input, output);
381
382               if (!f_ignoring_element)
383                 {
384                   //  the str() call seems to add the null byte to the stream
385                   //  and increment the pcount, so we must make sure it gets
386                   //  called first
387 #if defined(SC3) || defined(__osf__)
388                   char *string = f_output.str();
389                   int   size = f_output.pcount();
390                   *(string + size) = 0;
391                   f_resolver.data(string, size);
392                   f_output.rdbuf()->freeze(0);
393 #else
394                   char *string = f_streambuf->str();
395 //#ifdef _IBMR2
396 #if !defined(SC3) && !defined(__osf__)
397                   string[f_streambuf->pcount()] = 0;
398                   int   size = f_streambuf->pcount() ;
399 #else
400                   int   size = f_streambuf->pcount() - 1 ;
401 #endif
402                   f_resolver.data(string, size);
403                   f_streambuf->freeze(0); // unfreeze buffer frozen by str() call
404 #endif
405                 }
406             }
407           }
408 #ifdef DEBUG
409       {
410 #if defined(SC3) || defined(__osf__)
411         char *data = terminate(f_output).str();
412         f_output.rdbuf()->freeze(0);
413 #else
414         char *data = f_streambuf->str();
415 //#ifdef _IBMR2
416 #if !defined(SC3) && !defined(__osf__)
417         data[f_streambuf->pcount ()] = 0;
418 #endif
419         f_streambuf->freeze(0);
420 #endif
421         cerr << "EndTag: " << data << endl;
422         assert(gElemSymTab->intern(data) == name);
423       }
424 #endif
425       // hit end tag, end processing
426       if (!f_ignoring_element)
427         f_resolver.endElement(name);
428
429       // if we set ignore flag, unset it 
430       if (ignore)
431         f_ignoring_element = 0;
432     }
433   catch_any()
434     {
435       rethrow;
436     }
437   end_try;
438   ON_DEBUG(cerr << "exit process: " << name << endl);
439   delete last_seen_child_name; 
440 }
441
442
443 void
444 DocParser::process_attributes(istream &input, ostream &output,
445                               AttributeList *&attrs,
446                               AttributeList *&olias_attrs)
447 {
448   TagType tt ;
449
450   Attribute* newAttribute = 0;
451
452   AttributeList* orig_attrs = attrs;
453   AttributeList* orig_olias_attrs = olias_attrs;
454
455   char *theData = 0;
456
457   try {
458      while ((tt = read_tag(input,output)) != NoTag)
459        {
460          switch (tt)
461         {
462         case StartTag:
463           {
464 //#ifdef _IBMR2
465 #if !defined(SC3) && !defined(__osf__)
466           theData = f_streambuf->str ();
467           theData[f_streambuf->pcount()] = 0;
468 #endif
469           if (!attrs)
470             attrs = new AttributeList ;
471
472           newAttribute = 
473                 process_attribute(input, output,
474 #if defined(SC3) || defined(__osf__)
475                                   gSymTab->intern(terminate(f_output).str()),
476 #else
477 //#ifdef _IBMR2
478 #if !defined(SC3)  && !defined(__osf__)
479                                   gSymTab->intern(theData),
480 #else
481                                   gSymTab->intern(f_streambuf->str()),
482 #endif
483 #endif
484                                   StartTag
485                                  );
486           attrs->add(newAttribute);
487           break;
488           }
489         case EndTag:
490           return ;              // EXIT FUNCTION
491    
492         case AttributeSection:
493           throw(CASTDPUTEXCEPT docParserUnexpectedTag());
494           break;
495         case OliasAttribute:
496 //#ifdef _IBMR2
497 #if !defined(SC3) && !defined(__osf__)
498           theData = f_streambuf->str ();
499           theData[f_streambuf->pcount()] = 0;
500 #endif
501           // mirrors attribute 
502           if (!olias_attrs)
503             olias_attrs = new AttributeList ;
504
505           newAttribute = 
506                 process_attribute(input, output,
507 #if defined(SC3) || defined(__osf__)
508                                   gSymTab->intern(terminate(f_output).str()),
509 #else
510 //#ifdef _IBMR2
511 #if !defined(SC3)  && !defined(__osf__)
512                                   gSymTab->intern(theData),
513 #else
514                                   gSymTab->intern(f_streambuf->str()),
515 #endif
516 #endif
517                                   OliasAttribute
518                                  );
519
520           olias_attrs->add(newAttribute);
521           break;
522         case NoTag:
523           throw(CASTDPUDEXCEPT docParserUnexpectedData());
524           break;
525         }
526        }
527    }
528    catch_any()
529    {
530      delete newAttribute;
531
532      if ( orig_attrs == 0 ) {
533         delete attrs;
534         attrs = 0;
535      }
536
537      if ( orig_olias_attrs == 0 ) {
538         delete olias_attrs;
539         olias_attrs = 0;
540      }
541
542      rethrow;
543    }
544    end_try;
545 }
546
547 Attribute *
548 DocParser::process_attribute(istream &input, ostream &output,
549                              const Symbol &name, TagType tt)
550 {
551   //ON_DEBUG(cerr << "process_attribute: " << name << endl);
552
553 // If the attribute is OLIAS internal, we use DocParser's 
554 // read_data(). This is to prevent the attribte value 
555 // from change in a call to specific renderer engine's 
556 // read_data().
557 //
558 // Example: LoutDocparser::read_data() quotes any '.' char
559 // which changes the graphic locator value if the element
560 // is OLIAS internal attribute #GRAPHIC.
561
562   if ( tt == OliasAttribute ) {
563     DocParser::read_data(input, output);
564   } else 
565     (void)read_data(input, output);
566 #if defined(SC3) || defined(__osf__)
567   char *data = f_output.str();
568   *(data + f_output.pcount()) = 0;
569   f_output.rdbuf()->freeze(0);
570 #else
571   char *data = f_streambuf->str();
572 //#ifdef _IBMR2
573 #if !defined(SC3)  && !defined(__osf__)
574   data[f_streambuf->pcount ()] = 0;
575 #endif
576   f_streambuf->freeze(0);
577 #endif
578   Attribute *attr = new Attribute(name, strdup(data));
579
580   switch (read_tag(input, output))
581     {
582     case StartTag:
583     case AttributeSection:
584     case OliasAttribute:
585       delete attr ;
586       throw(CASTDPUTEXCEPT docParserUnexpectedTag());
587       break;
588     case NoTag:
589       delete attr;
590       throw(CASTDPUDEXCEPT docParserUnexpectedData());
591       break;
592     case EndTag:
593       break;
594     }
595
596   return attr ;
597   
598 }
599
600
601 DocParser::TagType
602 DocParser::read_tag(istream &input, ostream &output)
603 {
604   output.seekp(streampos(0));
605
606   TagType tt = StartTag;
607
608   char c ;
609
610   // strip newlines before/after tags
611   while ((input >> c) && (c == '\n'));
612   if (input.eof())
613     throw(CASTDPUEEXCEPT docParserUnexpectedEof());
614
615   if (c != '<')
616     {
617       input.putback(c);
618       return NoTag;
619     }
620
621
622   input >> c ;
623
624   switch (c)
625     {
626     case '/':
627       tt = EndTag ;
628       break;
629     case '#':
630       input >> c;
631       if (c == '>')
632         return AttributeSection ; // EXIT 
633       else
634         {
635           tt = OliasAttribute ;
636           output << c;          // keep char we just read 
637         }
638       break;
639     case '>':
640       throw(CASTUTEXCEPT unknownTagException());
641       // NOT REACHED 
642       break;
643     default:
644       output << c ;             // keep char we just read 
645       break;
646     }
647
648
649   // get (remainder of) tag name 
650   while ((input >> c) && (c != '>'))
651     output << c ;
652
653   return tt ;
654 }
655
656
657 void
658 DocParser::read_data(istream &input, ostream &output)
659 {
660   char c ;
661
662   output.seekp(streampos(0));
663
664   while ((input >> c) && (c != '<'))
665     {
666       // handle entities 
667       if (c == '&')
668         {
669           char tmpbuf[64];
670           unsigned int tmplen = 0;
671           while ((input >> c ) && (c != ';'))
672             {
673               tmpbuf[tmplen++] = c ;
674               if (tmplen > 63)
675                 {
676                   cerr << "Temp Buf overflow (ampersand problem)" << endl;
677                   throw(CASTEXCEPT Exception());
678                 }
679             }
680           if (input.eof())
681             throw(CASTDPUEEXCEPT docParserUnexpectedEof());
682             
683           tmpbuf[tmplen] = 0 ;
684
685 #ifdef ENTITY_DEBUG
686           cerr << "Entity: " << tmpbuf << endl;
687 #endif
688
689           if ((!strcmp(tmpbuf, "hardreturn")) ||
690               (!strcmp(tmpbuf, "lnfeed")))
691             c = '\n';
692           else
693             if ((!strcmp(tmpbuf, "lang")) ||
694                 (!strcmp(tmpbuf, "lt")))
695               c = '<' ;
696             else
697               if (!strcmp(tmpbuf, "amp"))
698                 c = '&' ;
699               else
700                 if (!strcmp(tmpbuf, "nbsp")) // non-break space 
701                   c = (char)0xA0 ;
702                 else
703                   c = ' ';
704
705         }
706
707       output << c;
708     }
709
710   // can never run out of input while reading data, tags must be balanced
711   if (input.eof())
712     throw(CASTDPUEEXCEPT docParserUnexpectedEof());
713             
714   input.putback(c);
715
716 }