cde/programs/nsgmls/Text.C

   1 /*
   2  * CDE - Common Desktop Environment
   3  *
   4  * Copyright (c) 1993-2012, The Open Group. All rights reserved.
   5  *
   6  * These libraries and programs are free software; you can
   7  * redistribute them and/or modify them under the terms of the GNU
   8  * Lesser General Public License as published by the Free Software
   9  * Foundation; either version 2 of the License, or (at your option)
  10  * any later version.
  11  *
  12  * These libraries and programs are distributed in the hope that
  13  * they will be useful, but WITHOUT ANY WARRANTY; without even the
  14  * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
  15  * PURPOSE. See the GNU Lesser General Public License for more
  16  * details.
  17  *
  18  * You should have received a copy of the GNU Lesser General Public
  19  * License along with these librararies and programs; if not, write
  20  * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
  21  * Floor, Boston, MA 02110-1301 USA
  22  */
  23 /* $XConsortium: Text.C /main/1 1996/07/29 17:06:09 cde-hp $ */
  24 // Copyright (c) 1994 James Clark
  25 // See the file COPYING for copying permission.
  26
  27 #ifdef __GNUG__
  28 #pragma implementation
  29 #endif
  30 #include "splib.h"
  31 #include "Text.h"
  32 #include "Entity.h"
  33 // for memcmp()
  34 #include <string.h>
  35
  36 #ifdef SP_NAMESPACE
  37 namespace SP_NAMESPACE {
  38 #endif
  39
  40 Text::Text()
  41 {
  42 }
  43
  44 void Text::addChar(Char c, const Location &loc)
  45 {
  46   if (items_.size() == 0
  47       || items_.back().type != TextItem::data
  48       || loc.origin().pointer() != items_.back().loc.origin().pointer()
  49       || loc.index() != (items_.back().loc.index()
  50                          + (chars_.size() - items_.back().index))) {
  51     items_.resize(items_.size() + 1);
  52     items_.back().loc = loc;
  53     items_.back().type = TextItem::data;
  54     items_.back().index = chars_.size();
  55   }
  56   chars_ += c;
  57 }
  58
  59 void Text::addChars(const Char *p, size_t length, const Location &loc)
  60 {
  61   if (items_.size() == 0
  62       || items_.back().type != TextItem::data
  63       || loc.origin().pointer() != items_.back().loc.origin().pointer()
  64       || loc.index() != (items_.back().loc.index()
  65                          + (chars_.size() - items_.back().index))) {
  66     items_.resize(items_.size() + 1);
  67     items_.back().loc = loc;
  68     items_.back().type = TextItem::data;
  69     items_.back().index = chars_.size();
  70   }
  71   chars_.append(p, length);
  72 }
  73
  74 void Text::addCdata(const InternalEntity *entity,
  75                     const ConstPtr<Origin> &origin)
  76 {
  77   addSimple(TextItem::cdata, Location(origin, 0));
  78   chars_.append(entity->string().data(), entity->string().size());
  79 }
  80
  81 void Text::addSdata(const InternalEntity *entity,
  82                     const ConstPtr<Origin> &origin)
  83 {
  84   addSimple(TextItem::sdata, Location(origin, 0));
  85   chars_.append(entity->string().data(), entity->string().size());
  86 }
  87
  88 void Text::addCharsTokenize(const Char *str, size_t n, const Location &loc,
  89                             Char space)
  90 {
  91   Location loci(loc);
  92   // FIXME speed this up
  93   for (size_t i = 0; i < n; loci += 1, i++) {
  94     if (str[i] == space && (size() == 0 || lastChar() == space))
  95       ignoreChar(str[i], loci);
  96     else
  97       addChar(str[i], loci);
  98   }
  99 }
 100
 101 void Text::tokenize(Char space, Text &text) const
 102 {
 103   TextIter iter(*this);
 104   TextItem::Type type;
 105   const Char *p;
 106   size_t n;
 107   const Location *loc;
 108   while (iter.next(type, p, n, loc)) {
 109     switch (type) {
 110     case TextItem::data:
 111       text.addCharsTokenize(p, n, *loc, space);
 112       break;
 113     case TextItem::sdata:
 114     case TextItem::cdata:
 115       {
 116         text.addEntityStart(*loc);
 117         text.addCharsTokenize(p, n, *loc, space);
 118         Location tem(*loc);
 119         tem += n;
 120         text.addEntityEnd(tem);
 121       }
 122       break;
 123     case TextItem::ignore:
 124       text.ignoreChar(*p, *loc);
 125       break;
 126     default:
 127       text.addSimple(type, *loc);
 128       break;
 129     }
 130   }
 131   if (text.size() > 0 && text.lastChar() == space)
 132     text.ignoreLastChar();
 133 }
 134
 135 void Text::addSimple(TextItem::Type type, const Location &loc)
 136 {
 137   items_.resize(items_.size() + 1);
 138   items_.back().loc = loc;
 139   items_.back().type = type;
 140   items_.back().index = chars_.size();
 141 }
 142
 143 void Text::ignoreChar(Char c, const Location &loc)
 144 {
 145   items_.resize(items_.size() + 1);
 146   items_.back().loc = loc;
 147   items_.back().type = TextItem::ignore;
 148   items_.back().c = c;
 149   items_.back().index = chars_.size();
 150 }
 151
 152 void Text::ignoreLastChar()
 153 {
 154   size_t lastIndex = chars_.size() - 1;
 155   size_t i;
 156   for (i = items_.size() - 1; items_[i].index > lastIndex; i--)
 157     ;
 158   // lastIndex >= items_[i].index
 159   if (items_[i].index != lastIndex) {
 160     items_.resize(items_.size() + 1);
 161     i++;
 162     for (size_t j = items_.size() - 1; j > i; j--)
 163       items_[j] = items_[j - 1];
 164     items_[i].index = lastIndex;
 165     items_[i].loc = items_[i - 1].loc;
 166     items_[i].loc += lastIndex - items_[i - 1].index;
 167   }
 168
 169   items_[i].c = chars_[chars_.size() - 1];
 170   items_[i].type = TextItem::ignore;
 171   for (size_t j = i + 1; j < items_.size(); j++)
 172     items_[j].index = lastIndex;
 173   chars_.resize(chars_.size() - 1);
 174 }
 175
 176 // All characters other than spaces are substed.
 177
 178 void Text::subst(const SubstTable<Char> &table, Char space)
 179 {
 180   for (size_t i = 0; i < items_.size(); i++)
 181     if (items_[i].type == TextItem::data) {
 182       size_t lim = (i + 1 < items_.size()
 183                     ? items_[i + 1].index
 184                     : chars_.size());
 185       size_t j;
 186       for (j = items_[i].index; j < lim; j++) {
 187         Char c = chars_[j];
 188         if (c != space && c != table[c])
 189           break;
 190       }
 191       if (j < lim) {
 192         size_t start = items_[i].index;
 193         StringC origChars(chars_.data() + start, lim - start);
 194         for (; j < lim; j++)
 195           if (chars_[j] != space)
 196             table.subst(chars_[j]);
 197         items_[i].loc = Location(new MultiReplacementOrigin(items_[i].loc,
 198                                                             origChars),
 199                                  0);
 200       }
 201     }
 202 }
 203
 204 void Text::clear()
 205 {
 206   chars_.resize(0);
 207   items_.clear();
 208 }
 209
 210 Boolean Text::startDelimLocation(Location &loc) const
 211 {
 212   if (items_.size() == 0 || items_[0].type != TextItem::startDelim)
 213     return 0;
 214   loc = items_[0].loc;
 215   return 1;
 216 }
 217
 218 Boolean Text::endDelimLocation(Location &loc) const
 219 {
 220   if (items_.size() == 0)
 221     return 0;
 222   switch (items_.back().type) {
 223   case TextItem::endDelim:
 224   case TextItem::endDelimA:
 225     break;
 226   default:
 227     return 0;
 228   }
 229   loc = items_.back().loc;
 230   return 1;
 231 }
 232
 233 Boolean Text::delimType(Boolean &lita) const
 234 {
 235   if (items_.size() == 0)
 236     return 0;
 237   switch (items_.back().type) {
 238   case TextItem::endDelim:
 239     lita = 0;
 240     return 1;
 241   case TextItem::endDelimA:
 242     lita = 1;
 243     return 1;
 244   default:
 245     break;
 246   }
 247   return 0;
 248 }
 249
 250 TextItem::TextItem()
 251 {
 252 }
 253
 254 void Text::swap(Text &to)
 255 {
 256   items_.swap(to.items_);
 257   chars_.swap(to.chars_);
 258 }
 259
 260 TextIter::TextIter(const Text &text)
 261 : ptr_(text.items_.begin()), text_(&text)
 262 {
 263 }
 264
 265 const Char *TextIter::chars(size_t &length) const
 266 {
 267   if (ptr_->type == TextItem::ignore) {
 268     length = 1;
 269     return &ptr_->c;
 270   }
 271   else {
 272     const StringC &chars = text_->chars_;
 273     size_t charsIndex = ptr_->index;
 274     if (ptr_ + 1 != text_->items_.begin() + text_->items_.size())
 275       length = ptr_[1].index - charsIndex;
 276     else
 277       length = chars.size() - charsIndex;
 278     return chars.data() + charsIndex;
 279   }
 280 }
 281
 282 Boolean TextIter::next(TextItem::Type &type, const Char *&str, size_t &length,
 283                        const Location *&loc)
 284 {
 285   const TextItem *end = text_->items_.begin() + text_->items_.size();
 286   if (ptr_ == end)
 287     return 0;
 288   type = ptr_->type;
 289   loc = &ptr_->loc;
 290   if (type == TextItem::ignore) {
 291     str = &ptr_->c;
 292     length = 1;
 293   }
 294   else {
 295     const StringC &chars = text_->chars_;
 296     size_t charsIndex = ptr_->index;
 297     str = chars.data() + charsIndex;
 298     if (ptr_ + 1 != end)
 299       length = ptr_[1].index - charsIndex;
 300     else
 301       length = chars.size() - charsIndex;
 302   }
 303   ptr_++;
 304   return 1;
 305 }
 306
 307 void Text::insertChars(const StringC &s, const Location &loc)
 308 {
 309   chars_.insert(0, s);
 310   items_.resize(items_.size() + 1);
 311   for (size_t i = items_.size() - 1; i > 0; i--) {
 312     items_[i] = items_[i - 1];
 313     items_[i].index += s.size();
 314   }
 315   items_[0].loc = loc;
 316   items_[0].type = TextItem::data;
 317   items_[0].index = 0;
 318 }
 319
 320 size_t Text::nDataEntities() const
 321 {
 322   size_t n = 0;
 323   for (size_t i = 0; i < items_.size(); i++)
 324     switch (items_[i].type) {
 325     case TextItem::sdata:
 326     case TextItem::cdata:
 327       n++;
 328       break;
 329     default:
 330       break;
 331     }
 332   return n;
 333 }
 334
 335 // This is used to determine for a FIXED CDATA attribute
 336 // whether a specified value if equal to the default value.
 337
 338 Boolean Text::fixedEqual(const Text &text) const
 339 {
 340   if (string() != text.string())
 341     return 0;
 342   size_t j = 0;
 343   for (size_t i = 0; i < items_.size(); i++)
 344     switch (items_[i].type) {
 345     case TextItem::cdata:
 346     case TextItem::sdata:
 347       for (;;) {
 348         if (j >= text.items_.size())
 349           return 0;
 350         if (text.items_[j].type == TextItem::cdata
 351             || text.items_[j].type == TextItem::sdata)
 352           break;
 353         j++;
 354       }
 355       if (text.items_[j].index != items_[i].index
 356           || (text.items_[j].loc.origin()->asEntityOrigin()->entity()
 357               != items_[i].loc.origin()->asEntityOrigin()->entity()))
 358         return 0;
 359       break;
 360     default:
 361       break;
 362     }
 363   for (; j < text.items_.size(); j++)
 364     switch (text.items_[j].type) {
 365     case TextItem::cdata:
 366     case TextItem::sdata:
 367       return 0;
 368     default:
 369       break;
 370     }
 371   return 1;
 372 }
 373
 374 Location Text::charLocation(size_t ind) const
 375 {
 376   // Find the last item whose index <= ind.
 377   // Invariant:
 378   // indexes < i implies index <= ind
 379   // indexes >= lim implies index > ind
 380   // The first item will always have index 0.
 381   size_t i = 1;
 382   size_t lim = items_.size();
 383   while (i < lim) {
 384     size_t mid = i + (lim - i)/2;
 385     if (items_[mid].index > ind)
 386       lim = mid;
 387     else
 388       i = mid + 1;
 389   }
 390 #if 0
 391   for (size_t i = 1; i < items_.size(); i++)
 392     if (items_[i].index > ind)
 393       break;
 394 #endif
 395   i--;
 396   Location loc;
 397   // If items_.size() == 0, then i == lim.
 398   if (i < lim) {
 399     loc = items_[i].loc;
 400     loc += ind - items_[i].index;
 401   }
 402   return loc;
 403 }
 404
 405 #ifdef SP_NAMESPACE
 406 }
 407 #endif