cde/programs/nsgmls/Text.C

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these libraries and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: Text.C /main/1 1996/07/29 17:06:09 cde-hp $ */
  24 // Copyright (c) 1994 James Clark
  25 // See the file COPYING for copying permission.
  26
  27 #ifdef __GNUG__
  28 #pragma implementation
  29 #endif
  30 #include "splib.h"
  31 #include "Text.h"
  32 #include "Entity.h"
  33 // for memcmp()
  34 #include <string.h>
  35
  36 #ifdef SP_NAMESPACE
  37 namespace SP_NAMESPACE {
  38 #endif
  39
  40 Text::Text()
  41 {
  42 }
  43
  44 void Text::addChar(Char c, const Location &loc)
  45 {
  46   if (items_.size() == 0
  47       || items_.back().type != TextItem::data
  48       || loc.origin().pointer() != items_.back().loc.origin().pointer()
  49       || loc.index() != (items_.back().loc.index()
  50                          + (chars_.size() - items_.back().index))) {
  51     items_.resize(items_.size() + 1);
  52     items_.back().loc = loc;
  53     items_.back().type = TextItem::data;
  54     items_.back().index = chars_.size();
  55   }
  56   chars_ += c;
  57 }
  58
  59 void Text::addChars(const Char *p, size_t length, const Location &loc)
  60 {
  61   if (items_.size() == 0
  62       || items_.back().type != TextItem::data
  63       || loc.origin().pointer() != items_.back().loc.origin().pointer()
  64       || loc.index() != (items_.back().loc.index()
  65                          + (chars_.size() - items_.back().index))) {
  66     items_.resize(items_.size() + 1);
  67     items_.back().loc = loc;
  68     items_.back().type = TextItem::data;
  69     items_.back().index = chars_.size();
  70   }
  71   chars_.append(p, length);
  72 }
  73
  74 void Text::addCdata(const InternalEntity *entity,
  75                     const ConstPtr<Origin> &origin)
  76 {
  77   addSimple(TextItem::cdata, Location(origin, 0));
  78   chars_.append(entity->string().data(), entity->string().size());
  79 }
  80
  81 void Text::addSdata(const InternalEntity *entity,
  82                     const ConstPtr<Origin> &origin)
  83 {
  84   addSimple(TextItem::sdata, Location(origin, 0));
  85   chars_.append(entity->string().data(), entity->string().size());
  86 }
  87
  88 void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc,
  89                             Char space)
  90 {
  91   Location loci(loc);
  92   // FIXME speed this up
  93   for (size_t i = 0; i < n; loci += 1, i++) {
  94     if (str[i] == space && (size() == 0 || lastChar() == space))
  95       ignoreChar(str[i], loci);
  96     else
  97       addChar(str[i], loci);
  98   }
  99 }
 100
 101 void Text::tokenize(Char space, Text &text) const
 102 {
 103   TextIter iter(*this);
 104   TextItem::Type type;
 105   const Char *p;
 106   size_t n;
 107   const Location *loc;
 108   while (iter.next(type, p, n, loc)) {
 109     switch (type) {
 110     case TextItem::data:
 111       text.addCharsTokenize(p, n, *loc, space);
 112       break;
 113     case TextItem::sdata:
 114     case TextItem::cdata:
 115       {
 116         text.addEntityStart(*loc);
 117         text.addCharsTokenize(p, n, *loc, space);
 118         Location tem(*loc);
 119         tem += n;
 120         text.addEntityEnd(tem);
 121       }
 122       break;
 123     case TextItem::ignore:
 124       text.ignoreChar(*p, *loc);
 125       break;
 126     default:
 127       text.addSimple(type, *loc);
 128       break;
 129     }
 130   }
 131   if (text.size() > 0 && text.lastChar() == space)
 132     text.ignoreLastChar();
 133 }
 134
 135 void Text::addSimple(TextItem::Type type, const Location &loc)
 136 {
 137   items_.resize(items_.size() + 1);
 138   items_.back().loc = loc;
 139   items_.back().type = type;
 140   items_.back().index = chars_.size();
 141 }
 142
 143 void Text::ignoreChar(Char c, const Location &loc)
 144 {
 145   items_.resize(items_.size() + 1);
 146   items_.back().loc = loc;
 147   items_.back().type = TextItem::ignore;
 148   items_.back().c = c;
 149   items_.back().index = chars_.size();
 150 }
 151
 152 void Text::ignoreLastChar()
 153 {
 154   size_t lastIndex = chars_.size() - 1;
 155   size_t i;
 156   for (i = items_.size() - 1; items_[i].index > lastIndex; i--)
 157     ;
 158   // lastIndex >= items_[i].index
 159   if (items_[i].index != lastIndex) {
 160     items_.resize(items_.size() + 1);
 161     i++;
 162     for (size_t j = items_.size() - 1; j > i; j--)
 163       items_[j] = items_[j - 1];
 164     items_[i].index = lastIndex;
 165     items_[i].loc = items_[i - 1].loc;
 166     items_[i].loc += lastIndex - items_[i - 1].index;
 167   }
 168
 169   items_[i].c = chars_[chars_.size() - 1];
 170   items_[i].type = TextItem::ignore;
 171   for (size_t j = i + 1; j < items_.size(); j++)
 172     items_[j].index = lastIndex;
 173   chars_.resize(chars_.size() - 1);
 174 }
 175
 176 // All characters other than spaces are substed.
 177
 178 void Text::subst(const SubstTable<Char> &table, Char space)
 179 {
 180   for (size_t i = 0; i < items_.size(); i++)
 181     if (items_[i].type == TextItem::data) {
 182       size_t lim = (i + 1 < items_.size()
 183                     ? items_[i + 1].index
 184                     : chars_.size());
 185       size_t j;
 186       for (j = items_[i].index; j < lim; j++) {
 187         Char c = chars_[j];
 188         if (c != space && c != table[c])
 189           break;
 190       }
 191       if (j < lim) {
 192         size_t start = items_[i].index;
 193         StringC origChars(chars_.data() + start, lim - start);
 194         for (; j < lim; j++)
 195           if (chars_[j] != space)
 196             table.subst(chars_[j]);
 197         items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc,
 198                                                             origChars),
 199                                  0);
 200       }
 201     }
 202 }
 203
 204 void Text::clear()
 205 {
 206   chars_.resize(0);
 207   items_.clear();
 208 }
 209
 210 Boolean Text::startDelimLocation(Location &loc) const
 211 {
 212   if (items_.size() == 0 || items_[0].type != TextItem::startDelim)
 213     return 0;
 214   loc = items_[0].loc;
 215   return 1;
 216 }
 217
 218 Boolean Text::endDelimLocation(Location &loc) const
 219 {
 220   if (items_.size() == 0)
 221     return 0;
 222   switch (items_.back().type) {
 223   case TextItem::endDelim:
 224   case TextItem::endDelimA:
 225     break;
 226   default:
 227     return 0;
 228   }
 229   loc = items_.back().loc;
 230   return 1;
 231 }
 232
 233 Boolean Text::delimType(Boolean &lita) const
 234 {
 235   if (items_.size() == 0)
 236     return 0;
 237   switch (items_.back().type) {
 238   case TextItem::endDelim:
 239     lita = 0;
 240     return 1;
 241   case TextItem::endDelimA:
 242     lita = 1;
 243     return 1;
 244   default:
 245     break;
 246   }
 247   return 0;
 248 }
 249
 250 TextItem::TextItem()
 251 : type(data), c(0), index(0)
 252 {
 253 }
 254
 255 void Text::swap(Text &to)
 256 {
 257   items_.swap(to.items_);
 258   chars_.swap(to.chars_);
 259 }
 260
 261 TextIter::TextIter(const Text &text)
 262 : ptr_(text.items_.begin()), text_(&text)
 263 {
 264 }
 265
 266 const Char *TextIter::chars(size_t &length) const
 267 {
 268   if (ptr_->type == TextItem::ignore) {
 269     length = 1;
 270     return &ptr_->c;
 271   }
 272   else {
 273     const StringC &chars = text_->chars_;
 274     size_t charsIndex = ptr_->index;
 275     if (ptr_ + 1 != text_->items_.begin() + text_->items_.size())
 276       length = ptr_[1].index - charsIndex;
 277     else
 278       length = chars.size() - charsIndex;
 279     return chars.data() + charsIndex;
 280   }
 281 }
 282
 283 Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length,
 284                        const Location *&loc)
 285 {
 286   const TextItem *end = text_->items_.begin() + text_->items_.size();
 287   if (ptr_ == end)
 288     return 0;
 289   type = ptr_->type;
 290   loc = &ptr_->loc;
 291   if (type == TextItem::ignore) {
 292     str = &ptr_->c;
 293     length = 1;
 294   }
 295   else {
 296     const StringC &chars = text_->chars_;
 297     size_t charsIndex = ptr_->index;
 298     str = chars.data() + charsIndex;
 299     if (ptr_ + 1 != end)
 300       length = ptr_[1].index - charsIndex;
 301     else
 302       length = chars.size() - charsIndex;
 303   }
 304   ptr_++;
 305   return 1;
 306 }
 307
 308 void Text::insertChars(const StringC &s, const Location &loc)
 309 {
 310   chars_.insert(0, s);
 311   items_.resize(items_.size() + 1);
 312   for (size_t i = items_.size() - 1; i > 0; i--) {
 313     items_[i] = items_[i - 1];
 314     items_[i].index += s.size();
 315   }
 316   items_[0].loc = loc;
 317   items_[0].type = TextItem::data;
 318   items_[0].index = 0;
 319 }
 320
 321 size_t Text::nDataEntities() const
 322 {
 323   size_t n = 0;
 324   for (size_t i = 0; i < items_.size(); i++)
 325     switch (items_[i].type) {
 326     case TextItem::sdata:
 327     case TextItem::cdata:
 328       n++;
 329       break;
 330     default:
 331       break;
 332     }
 333   return n;
 334 }
 335
 336 // This is used to determine for a FIXED CDATA attribute
 337 // whether a specified value if equal to the default value.
 338
 339 Boolean Text::fixedEqual(const Text &text) const
 340 {
 341   if (string() != text.string())
 342     return 0;
 343   size_t j = 0;
 344   for (size_t i = 0; i < items_.size(); i++)
 345     switch (items_[i].type) {
 346     case TextItem::cdata:
 347     case TextItem::sdata:
 348       for (;;) {
 349         if (j >= text.items_.size())
 350           return 0;
 351         if (text.items_[j].type == TextItem::cdata
 352             || text.items_[j].type == TextItem::sdata)
 353           break;
 354         j++;
 355       }
 356       if (text.items_[j].index != items_[i].index
 357           || (text.items_[j].loc.origin()->asEntityOrigin()->entity()
 358               != items_[i].loc.origin()->asEntityOrigin()->entity()))
 359         return 0;
 360       break;
 361     default:
 362       break;
 363     }
 364   for (; j < text.items_.size(); j++)
 365     switch (text.items_[j].type) {
 366     case TextItem::cdata:
 367     case TextItem::sdata:
 368       return 0;
 369     default:
 370       break;
 371     }
 372   return 1;
 373 }
 374
 375 Location Text::charLocation(size_t ind) const
 376 {
 377   // Find the last item whose index <= ind.
 378   // Invariant:
 379   // indexes < i implies index <= ind
 380   // indexes >= lim implies index > ind
 381   // The first item will always have index 0.
 382   size_t i = 1;
 383   size_t lim = items_.size();
 384   while (i < lim) {
 385     size_t mid = i + (lim - i)/2;
 386     if (items_[mid].index > ind)
 387       lim = mid;
 388     else
 389       i = mid + 1;
 390   }
 391 #if 0
 392   for (size_t i = 1; i < items_.size(); i++)
 393     if (items_[i].index > ind)
 394       break;
 395 #endif
 396   i--;
 397   Location loc;
 398   // If items_.size() == 0, then i == lim.
 399   if (i < lim) {
 400     loc = items_[i].loc;
 401     loc += ind - items_[i].index;
 402   }
 403   return loc;
 404 }
 405
 406 #ifdef SP_NAMESPACE
 407 }
 408 #endif