2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these libraries and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: ExtendEntityManager.C /main/1 1996/07/29 16:51:42 cde-hp $ */
24 // Copyright (c) 1994, 1995, 1996 James Clark
25 // See the file COPYING for copying permission.
28 #pragma implementation
32 #include "ExtendEntityManager.h"
34 #include "MessageArg.h"
35 #include "OffsetOrderedList.h"
37 #include "StorageManager.h"
41 #include "RegisteredCodingSystem.h"
43 #include "EntityManagerMessages.h"
44 #include "StorageObjectPosition.h"
46 #include "CodingSystem.h"
47 #include "InputSource.h"
49 #include "EntityCatalog.h"
57 #ifdef DECLARE_MEMMOVE
59 void *memmove(void *, const void *, size_t);
64 namespace SP_NAMESPACE {
67 const char EOFCHAR = '\032'; // Control-Z
69 class ExternalInputSource;
71 class EntityManagerImpl : public ExtendEntityManager {
73 EntityManagerImpl(StorageManager *defaultStorageManager,
74 const InputCodingSystem *defaultCodingSystem);
75 void setCatalogManager(CatalogManager *catalogManager);
76 void registerStorageManager(StorageManager *);
77 void registerCodingSystem(const char *, const InputCodingSystem *);
78 InputSource *open(const StringC &sysid,
83 InputSource *openIfExists(const StringC &sysid,
88 ConstPtr<EntityCatalog> makeCatalog(StringC &systemId,
89 const CharsetInfo &charset,
91 Boolean expandSystemId(const StringC &,
98 Boolean mergeSystemIds(const Vector<StringC> &,
99 Boolean mapCatalogDocument,
103 StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const;
104 StorageManager *lookupStorageType(const char *) const;
105 StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const;
106 const InputCodingSystem *lookupCodingSystem(const StringC &,
108 const char *&) const;
109 Boolean resolveSystemId(const StringC &str,
110 const CharsetInfo &idCharset,
112 const Location &defLocation,
114 ParsedSystemId &parsedSysid) const;
115 Boolean parseSystemId(const StringC &str,
116 const CharsetInfo &idCharset,
118 const StorageObjectSpec *defSpec,
120 ParsedSystemId &parsedSysid) const;
122 EntityManagerImpl(const EntityManagerImpl &); // undefined
123 void operator=(const EntityManagerImpl &); // undefined
124 static const StorageObjectSpec *defStorageObject(const Location &);
125 static Boolean matchKey(const StringC &type, const char *s,
126 const CharsetInfo &docCharset);
127 NCVector<Owner<StorageManager> > storageManagers_;
128 Vector<RegisteredCodingSystem> codingSystems_;
129 Owner<StorageManager> defaultStorageManager_;
130 const InputCodingSystem *defaultCodingSystem_;
131 Owner<CatalogManager> catalogManager_;
132 friend class FSIParser;
135 class ExternalInfoImpl : public ExternalInfo {
138 ExternalInfoImpl(ParsedSystemId &parsedSysid);
139 const StorageObjectSpec &spec(size_t i) const;
140 size_t nSpecs() const;
141 const ParsedSystemId &parsedSystemId() const;
143 void noteStorageObjectEnd(Offset);
144 void noteInsertedRSs();
145 void setDecoder(size_t i, Decoder *);
146 StringC &id(size_t i);
147 Boolean convertOffset(Offset, StorageObjectLocation &) const;
149 ParsedSystemId parsedSysid_;
150 NCVector<StorageObjectPosition> position_;
151 size_t currentIndex_;
152 // list of inserted RSs
153 OffsetOrderedList rsList_;
157 class ExternalInputSource : public InputSource {
159 ExternalInputSource(ParsedSystemId &parsedSysid,
160 InputSourceOrigin *origin,
162 Boolean mayNotExist = 0);
163 void pushCharRef(Char, const NamedCharRef &);
164 ~ExternalInputSource();
166 Xchar fill(Messenger &);
167 Boolean rewind(Messenger &);
168 void willNotRewind();
172 void noteRSAt(const Char *);
173 void reallocateBuffer(size_t size);
174 void insertChar(Char);
175 static const Char *findNextCr(const Char *start, const Char *end);
176 static const Char *findNextLf(const Char *start, const Char *end);
177 static const Char *findNextCrOrLf(const Char *start, const Char *end);
179 ExternalInfoImpl *info_;
182 Offset bufLimOffset_;
185 NCVector<Owner<StorageObject> > sov_;
190 const char *leftOver_;
193 Boolean mayNotExist_;
202 RecordType recordType_;
208 FSIParser(const StringC &, const CharsetInfo &idCharset,
210 const StorageObjectSpec *defSpec,
211 const EntityManagerImpl *em,
213 Boolean parse(ParsedSystemId &parsedSysid);
214 static const char *recordsName(StorageObjectSpec::Records records);
217 StorageObjectSpec::Records value;
220 Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid);
221 Boolean convertId(StringC &, Xchar smcrd, const StorageManager *);
224 StorageManager *lookupStorageType(const StringC &key, Boolean &neutral);
225 Boolean matchKey(const StringC &, const char *);
226 Boolean matchChar(Xchar, char);
228 Boolean convertDigit(Xchar c, int &weight);
229 void uncharref(StringC &);
230 Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral,
231 Xchar &smcrd, Boolean &fold);
232 Boolean setCatalogAttributes(ParsedSystemId &parsedSysid);
233 void setDefaults(StorageObjectSpec &sos);
234 Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value);
235 Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &);
236 void convertMinimumLiteral(const StringC &from, StringC &to);
241 const EntityManagerImpl *em_;
242 const StorageObjectSpec *defSpec_;
243 const CharsetInfo &idCharset_;
245 static RecordType recordTypeTable[];
248 const Char RS = '\n';
249 const Char RE = '\r';
250 const char lineEnd = '\n';
252 ExtendEntityManager::CatalogManager::~CatalogManager()
256 ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm,
257 const InputCodingSystem *cs)
259 return new EntityManagerImpl(sm, cs);
262 Boolean ExtendEntityManager::externalize(const ExternalInfo *info,
264 StorageObjectLocation &loc)
268 const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
271 return p->convertOffset(off, loc);
274 const ParsedSystemId *
275 ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info)
279 const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
282 return &p->parsedSystemId();
286 EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager,
287 const InputCodingSystem *defaultCodingSystem)
288 : defaultStorageManager_(defaultStorageManager),
289 defaultCodingSystem_(defaultCodingSystem)
293 InputSource *EntityManagerImpl::open(const StringC &sysid,
294 const CharsetInfo &docCharset,
295 InputSourceOrigin *origin,
299 ParsedSystemId parsedSysid;
300 if (!parseSystemId(sysid, docCharset, 0, 0, mgr, parsedSysid)
301 || !catalogManager_->mapCatalog(parsedSysid, this, mgr))
303 return new ExternalInputSource(parsedSysid, origin, mayRewind, 0);
306 InputSource *EntityManagerImpl::openIfExists(const StringC &sysid,
307 const CharsetInfo &docCharset,
308 InputSourceOrigin *origin,
312 ParsedSystemId parsedSysid;
313 if (!parseSystemId(sysid, docCharset, 0, 0, mgr, parsedSysid)
314 || !catalogManager_->mapCatalog(parsedSysid, this, mgr))
316 return new ExternalInputSource(parsedSysid, origin, mayRewind, 1);
319 ConstPtr<EntityCatalog>
320 EntityManagerImpl::makeCatalog(StringC &systemId,
321 const CharsetInfo &charset,
324 return catalogManager_->makeCatalog(systemId, charset, this, mgr);
328 EntityManagerImpl::mergeSystemIds(const Vector<StringC> &sysids,
329 Boolean mapCatalogDocument,
330 const CharsetInfo &charset,
332 StringC &result) const
334 ParsedSystemId parsedSysid;
335 if (mapCatalogDocument) {
336 parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
337 parsedSysid.maps.back().type = ParsedSystemIdMap::catalogDocument;
339 for (size_t i = 0; i < sysids.size(); i++)
340 if (!parseSystemId(sysids[i],
347 parsedSysid.unparse(charset, result);
352 EntityManagerImpl::expandSystemId(const StringC &str,
353 const Location &defLocation,
355 const CharsetInfo &charset,
356 const StringC *mapCatalogPublic,
360 ParsedSystemId parsedSysid;
361 const StorageObjectSpec *defSpec = defStorageObject(defLocation);
362 if (!parseSystemId(str, charset, isNdata, defSpec, mgr, parsedSysid))
364 if (mapCatalogPublic) {
365 ParsedSystemIdMap map;
366 map.type = ParsedSystemIdMap::catalogPublic;
367 map.publicId = *mapCatalogPublic;
368 parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map);
370 parsedSysid.unparse(charset, result);
374 Boolean EntityManagerImpl::parseSystemId(const StringC &str,
375 const CharsetInfo &idCharset,
377 const StorageObjectSpec *defSpec,
379 ParsedSystemId &parsedSysid) const
381 FSIParser fsiParser(str, idCharset, isNdata, defSpec, this, mgr);
382 return fsiParser.parse(parsedSysid);
386 EntityManagerImpl::guessStorageType(const StringC &type,
387 const CharsetInfo &docCharset) const
389 for (size_t i = 0; i < storageManagers_.size(); i++)
390 if (storageManagers_[i]->guessIsId(type, docCharset))
391 return storageManagers_[i].pointer();
392 if (defaultStorageManager_->guessIsId(type, docCharset))
393 return defaultStorageManager_.pointer();
398 EntityManagerImpl::lookupStorageType(const StringC &type,
399 const CharsetInfo &docCharset) const
401 if (type.size() == 0)
403 if (matchKey(type, defaultStorageManager_->type(), docCharset))
404 return defaultStorageManager_.pointer();
405 for (size_t i = 0; i < storageManagers_.size(); i++)
406 if (matchKey(type, storageManagers_[i]->type(), docCharset))
407 return storageManagers_[i].pointer();
412 EntityManagerImpl::lookupStorageType(const char *type) const
414 if (type == defaultStorageManager_->type())
415 return defaultStorageManager_.pointer();
416 for (size_t i = 0; i < storageManagers_.size(); i++)
417 if (type == storageManagers_[i]->type())
418 return storageManagers_[i].pointer();
422 const InputCodingSystem *
423 EntityManagerImpl::lookupCodingSystem(const StringC &type,
424 const CharsetInfo &docCharset,
425 const char *&name) const
427 for (size_t i = 0; i < codingSystems_.size(); i++)
428 if (matchKey(type, codingSystems_[i].name, docCharset)) {
429 name = codingSystems_[i].name;
430 return codingSystems_[i].ics;
436 EntityManagerImpl::matchKey(const StringC &type,
438 const CharsetInfo &docCharset)
440 if (strlen(s) != type.size())
442 for (size_t i = 0; i < type.size(); i++)
443 if (docCharset.execToDesc(toupper(s[i])) != type[i]
444 && docCharset.execToDesc(tolower(s[i])) != type[i])
449 void EntityManagerImpl::registerStorageManager(StorageManager *sm)
451 storageManagers_.resize(storageManagers_.size() + 1);
452 storageManagers_.back() = sm;
455 void EntityManagerImpl::registerCodingSystem(const char *name,
456 const InputCodingSystem *ics)
458 codingSystems_.resize(codingSystems_.size() + 1);
459 RegisteredCodingSystem &rcs = codingSystems_.back();
464 void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager)
466 catalogManager_ = catalogManager;
469 const StorageObjectSpec *
470 EntityManagerImpl::defStorageObject(const Location &defLocation)
473 const ExternalInfo *info;
474 Location loc(defLocation);
476 if (loc.origin().isNull())
478 const InputSourceOrigin *inputSourceOrigin = loc.origin()->asInputSourceOrigin();
479 if (inputSourceOrigin) {
480 off = inputSourceOrigin->startOffset(loc.index());
481 info = inputSourceOrigin->externalInfo();
484 if (!inputSourceOrigin->defLocation(off, loc))
488 loc = loc.origin()->parent();
490 StorageObjectLocation soLoc;
491 if (!ExtendEntityManager::externalize(info, off, soLoc))
493 return soLoc.storageObjectSpec;
496 ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid,
497 InputSourceOrigin *origin,
500 : InputSource(origin, 0, 0),
501 mayRewind_(mayRewind),
502 mayNotExist_(mayNotExist),
503 sov_(parsedSysid.size()),
506 recordType_(unknown),
510 info_ = new ExternalInfoImpl(parsedSysid);
511 origin->setExternalInfo(info_);
514 void ExternalInputSource::init()
527 ExternalInputSource::~ExternalInputSource()
533 Boolean ExternalInputSource::rewind(Messenger &mgr)
538 // reset makes a new EntityOrigin
539 ParsedSystemId parsedSysid(info_->parsedSystemId());
540 info_ = new ExternalInfoImpl(parsedSysid);
541 inputSourceOrigin()->setExternalInfo(info_);
543 for (size_t i = 0; i < soIndex_; i++) {
544 if (sov_[i] && !sov_[i]->rewind(mgr))
551 void ExternalInputSource::willNotRewind()
553 for (size_t i = 0; i < soIndex_; i++)
555 sov_[i]->willNotRewind();
559 // Round up N so that it is a power of TO.
560 // TO must be a power of 2.
563 size_t roundUp(size_t n, size_t to)
565 return (n + (to - 1)) & ~(to - 1);
569 void ExternalInputSource::noteRSAt(const Char *p)
571 info_->noteRS(bufLimOffset_ - (bufLim_ - p));
575 void ExternalInputSource::noteRS()
580 Xchar ExternalInputSource::fill(Messenger &mgr)
582 ASSERT(cur() == end());
583 while (end() >= bufLim_) {
586 if (soIndex_ >= sov_.size())
589 info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end()));
590 const StorageObjectSpec &spec = info_->spec(soIndex_);
592 NullMessenger nullMgr;
594 = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
597 info_->id(soIndex_));
601 = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
604 info_->id(soIndex_));
605 so_ = sov_[soIndex_].pointer();
607 decoder_ = spec.codingSystem->makeDecoder();
608 info_->setDecoder(soIndex_, decoder_);
609 zapEof_ = spec.zapEof;
610 switch (spec.records) {
611 case StorageObjectSpec::asis:
615 case StorageObjectSpec::cr:
618 case StorageObjectSpec::lf:
621 case StorageObjectSpec::crlf:
624 case StorageObjectSpec::find:
625 recordType_ = unknown;
631 readSize_ = so_->getBlockSize();
640 size_t keepSize = end() - start();
641 const size_t align = sizeof(int)/sizeof(Char);
642 size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char);
643 readSizeChars = roundUp(readSizeChars, align);
644 size_t neededSize; // in Chars
646 // compute neededSize and readSize
647 unsigned minBytesPerChar = decoder_->minBytesPerChar();
648 if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) {
649 // In this case we want to do decoding in place.
650 // FIXME It might be a win on some systems (Irix?) to arrange that the
651 // read buffer is on a page boundary.
653 if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_)
654 abort(); // FIXME throw an exception
656 // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0
658 > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize)
660 neededSize = roundUp(readSizeChars + keepSize + insertRS_, align);
661 startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_)
662 - readSizeChars - insertRS_ - keepSize);
665 // Needs to be room for everything before decoding.
666 neededSize = (keepSize + insertRS_ + readSizeChars
667 + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char));
668 // Also must be room for everything after decoding.
670 = (keepSize + insertRS_
671 // all the converted characters
672 + (nLeftOver_ + readSize_)/minBytesPerChar
673 // enough Chars to contain left over bytes
674 + ((readSize_ % minBytesPerChar + sizeof(Char) - 1)
676 if (neededSize2 > neededSize)
677 neededSize = neededSize2;
678 neededSize = roundUp(neededSize, align);
679 if (neededSize > size_t(-1)/sizeof(Char))
683 if (bufSize_ < neededSize)
684 reallocateBuffer(neededSize);
685 Char *newStart = buf_ + startOffset;
686 if (newStart != start() && keepSize > 0)
687 memmove(newStart, start(), keepSize*sizeof(Char));
688 char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_;
689 if (nLeftOver_ > 0 && leftOver_ != bytesStart)
690 memmove(bytesStart, leftOver_, nLeftOver_);
695 if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_,
698 const char *bytesEnd = bytesStart + nLeftOver_ + nread;
699 size_t nChars = decoder_->decode((Char *)end() + insertRS_,
702 - (zapEof_ && bytesEnd[-1] == EOFCHAR),
704 nLeftOver_ = bytesEnd - leftOver_;
709 advanceEnd(end() + 1);
715 bufLimOffset_ += nChars;
723 ASSERT(end() < bufLim_);
730 switch (recordType_) {
733 const Char *e = findNextCrOrLf(end(), bufLim_);
737 info_->noteInsertedRSs();
743 if (e + 1 < bufLim_) {
747 if (e + 2 == bufLim_) {
756 info_->noteInsertedRSs();
761 recordType_ = crUnknown;
772 if (*cur() == '\n') {
774 advanceEnd(cur() + 1);
778 advanceEnd(cur() + 1);
781 info_->noteInsertedRSs();
787 Char *e = (Char *)findNextLf(end(), bufLim_);
799 const Char *e = findNextCr(end(), bufLim_);
810 const Char *e = end();
812 e = findNextLf(e, bufLim_);
817 // Need to delete final RS if not followed by anything.
818 if (e + 1 == bufLim_) {
840 const Char *ExternalInputSource::findNextCr(const Char *start,
843 for (; start < end; start++)
849 const Char *ExternalInputSource::findNextLf(const Char *start,
852 for (; start < end; start++)
858 const Char *ExternalInputSource::findNextCrOrLf(const Char *start,
861 for (; start < end; start++)
862 if (*start == '\n' || *start == '\r')
867 void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref)
869 ASSERT(cur() == start());
870 noteCharRef(startIndex() + (cur() - start()), ref);
874 void ExternalInputSource::insertChar(Char ch)
876 if (start() > buf_) {
878 memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char));
883 // must have start == buf
884 if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char))
886 if (bufSize_ == size_t(-1))
887 abort(); // FIXME throw an exception
888 reallocateBuffer(bufSize_ + 1);
890 else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) {
891 char *s = (char *)(buf_ + bufSize_) - nLeftOver_;
892 memmove(s, leftOver_, nLeftOver_);
896 memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char));
898 advanceEnd(end() + 1);
903 void ExternalInputSource::reallocateBuffer(size_t newSize)
905 Char *newBuf = new Char[newSize];
907 memcpy(newBuf, buf_, bufSize_*sizeof(Char));
909 changeBuffer(newBuf, buf_);
910 bufLim_ = newBuf + (bufLim_ - buf_);
911 if (nLeftOver_ > 0) {
912 char *s = (char *)(newBuf + bufSize_) - nLeftOver_;
914 newBuf + (leftOver_ - (char *)buf_),
922 RTTI_DEF1(ExternalInfoImpl, ExternalInfo)
924 ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid)
925 : currentIndex_(0), position_(parsedSysid.size())
927 parsedSysid.swap(parsedSysid_);
928 if (parsedSysid_.size() > 0) {
929 notrack_ = parsedSysid_[0].notrack;
935 StringC &ExternalInfoImpl::id(size_t i)
937 return parsedSysid_[i].id;
940 void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder)
942 position_[i].decoder = decoder;
945 void ExternalInfoImpl::noteInsertedRSs()
947 position_[currentIndex_].insertedRSs = 1;
950 void ExternalInfoImpl::noteRS(Offset offset)
953 rsList_.append(offset);
955 == (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset))
956 position_[currentIndex_].startsWithRS = 1;
959 void ExternalInfoImpl::noteStorageObjectEnd(Offset offset)
961 ASSERT(currentIndex_ < position_.size());
962 // The last endOffset_ must be -1.
963 if (currentIndex_ < position_.size() - 1) {
964 position_[currentIndex_++].endOffset = offset;
965 position_[currentIndex_].line1RS = rsList_.size();
966 notrack_ = parsedSysid_[currentIndex_].notrack;
970 Boolean ExternalInfoImpl::convertOffset(Offset off,
971 StorageObjectLocation &ret) const
973 ret.storageObjectSpec = 0;
974 if (off == Offset(-1) || position_.size() == 0)
976 // the last endOffset_ is Offset(-1), so this will
979 for (i = 0; off >= position_[i].endOffset; i++)
981 for (; parsedSysid_[i].id.size() == 0; i--)
984 ret.storageObjectSpec = &parsedSysid_[i];
985 Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset;
986 ret.storageObjectOffset = off - startOffset;
987 ret.byteIndex = ret.storageObjectOffset;
988 if (parsedSysid_[i].notrack
989 || parsedSysid_[i].records == StorageObjectSpec::asis) {
990 ret.lineNumber = (unsigned long)-1;
991 if (parsedSysid_[i].records != StorageObjectSpec::asis) {
992 if (position_[i].insertedRSs)
993 ret.byteIndex = (unsigned long)-1;
994 else if (ret.byteIndex > 0 && position_[i].startsWithRS)
995 ret.byteIndex--; // first RS is inserted
997 ret.columnNumber = (unsigned long)-1;
1001 size_t line1RS = position_[i].line1RS;
1002 // line1RS is now the number of RSs that are before or on the current line.
1005 if (rsList_.findPreceding(off, j, colStart)) {
1006 if (position_[i].insertedRSs)
1007 ret.byteIndex -= j + 1 - line1RS;
1008 else if (ret.byteIndex > 0 && position_[i].startsWithRS)
1009 ret.byteIndex--; // first RS is inserted
1017 // j is now the number of RSs that are before or on the current line
1018 // colStart is the offset of the first column
1019 ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS;
1020 // the offset of the first column
1021 if (colStart < startOffset)
1022 colStart = startOffset;
1023 // the RS that starts a line will be in column 0;
1024 // the first real character of a line will be column 1
1025 ret.columnNumber = 1 + off - colStart;
1027 if (!position_[i].decoder
1028 || !position_[i].decoder->convertOffset(ret.byteIndex))
1029 ret.byteIndex = (unsigned long)-1;
1033 const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const
1035 return parsedSysid_[i];
1038 size_t ExternalInfoImpl::nSpecs() const
1040 return parsedSysid_.size();
1043 const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const
1045 return parsedSysid_;
1048 StorageObjectSpec::StorageObjectSpec()
1049 : storageManager(0), codingSystem(0), codingSystemName(0), notrack(0),
1050 records(find), zapEof(1), search(1)
1054 StorageObjectPosition::StorageObjectPosition()
1055 : endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0)
1059 FSIParser::FSIParser(const StringC &str,
1060 const CharsetInfo &idCharset,
1062 const StorageObjectSpec *defSpec,
1063 const EntityManagerImpl *em,
1067 idCharset_(idCharset),
1075 Xchar FSIParser::get()
1077 if (strIndex_ < str_.size())
1078 return str_[strIndex_++];
1083 void FSIParser::unget()
1089 Boolean FSIParser::matchKey(const StringC &str, const char *s)
1091 if (strlen(s) != str.size())
1093 for (size_t i = 0; i < str.size(); i++)
1094 if (idCharset_.execToDesc(toupper(s[i])) != str[i]
1095 && idCharset_.execToDesc(tolower(s[i])) != str[i])
1100 Boolean FSIParser::matchChar(Xchar ch, char execC)
1102 return ch == idCharset_.execToDesc(execC);
1105 Boolean FSIParser::isS(Xchar c)
1107 return (matchChar(c, ' ')
1108 || matchChar(c, '\r')
1109 || matchChar(c, '\n')
1110 || matchChar(c, ' '));
1113 Boolean FSIParser::convertDigit(Xchar c, int &weight)
1115 static const char digits[] = "0123456789";
1116 for (int i = 0; digits[i] != '\0'; i++)
1117 if (matchChar(c, digits[i])) {
1124 Boolean FSIParser::parse(ParsedSystemId &parsedSysid)
1126 size_t startIndex = strIndex_;
1127 if (!matchChar(get(), '<'))
1128 return handleInformal(startIndex, parsedSysid);
1133 return handleInformal(startIndex, parsedSysid);
1134 if (isS(c) || matchChar(c, '>'))
1139 if (matchKey(key, "CATALOG")) {
1140 if (!setCatalogAttributes(parsedSysid))
1142 return parse(parsedSysid);
1145 StorageManager *sm = lookupStorageType(key, neutral);
1147 return handleInformal(startIndex, parsedSysid);
1149 parsedSysid.resize(parsedSysid.size() + 1);
1150 StorageObjectSpec &sos = parsedSysid.back();
1151 sos.storageManager = sm;
1154 if (!setAttributes(sos, neutral, smcrd, fold))
1158 Boolean hadData = 0;
1163 if (matchChar(c, '<')) {
1174 if (isS(c) || matchChar(c, '>')) {
1176 sm = lookupStorageType(key, neutral);
1188 else if (!((!hadData && matchChar(c, '\r')) // ignored RE
1189 || matchChar(c, '\n') )) { // ignored RS
1194 if (id.size() > 0 && matchChar(id[id.size() - 1], '\r'))
1195 id.resize(id.size() - 1);
1197 id.swap(sos.specId);
1198 if (!convertId(sos.specId, smcrd, sos.storageManager))
1201 if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_))
1204 if (sos.storageManager->resolveRelative(sos.baseId, sos.specId,
1206 sos.baseId.resize(0);
1213 Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid)
1215 parsedSysid.resize(parsedSysid.size() + 1);
1216 StorageObjectSpec &sos = parsedSysid.back();
1217 sos.specId.assign(str_.data() + index,
1218 str_.size() - index);
1219 sos.storageManager = em_->guessStorageType(sos.specId, idCharset_);
1220 if (!sos.storageManager) {
1221 if (defSpec_ && defSpec_->storageManager->inheritable())
1222 sos.storageManager = defSpec_->storageManager;
1224 sos.storageManager = em_->defaultStorageManager_.pointer();
1227 if (!convertId(sos.specId, -1, sos.storageManager))
1229 if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search))
1230 sos.baseId.resize(0);
1234 StorageManager *FSIParser::lookupStorageType(const StringC &key,
1237 if (matchKey(key, "NEUTRAL")) {
1239 if (defSpec_ && defSpec_->storageManager->inheritable())
1240 return defSpec_->storageManager;
1242 return em_->defaultStorageManager_.pointer();
1245 StorageManager *sm = em_->lookupStorageType(key, idCharset_);
1252 Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid)
1254 Boolean hadPublic = 0;
1255 parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
1256 parsedSysid.maps.back().type = ParsedSystemIdMap::catalogDocument;
1258 StringC token, value;
1260 if (!parseAttribute(token, gotValue, value)) {
1261 mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
1264 if (token.size() == 0)
1266 if (matchKey(token, "PUBLIC")) {
1268 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1269 StringMessageArg(idCharset_.execToDesc("PUBLIC")));
1270 else if (gotValue) {
1271 convertMinimumLiteral(value, parsedSysid.maps.back().publicId);
1272 parsedSysid.maps.back().type = ParsedSystemIdMap::catalogPublic;
1275 mgr_.message(EntityManagerMessages::fsiMissingValue,
1276 StringMessageArg(token));
1280 mgr_.message(gotValue
1281 ? EntityManagerMessages::fsiUnsupportedAttribute
1282 : EntityManagerMessages::fsiUnsupportedAttributeToken,
1283 StringMessageArg(token));
1288 void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to)
1290 // Do just enough to ensure it can be reparsed.
1292 for (size_t i = 0; i < from.size(); i++) {
1294 if (matchChar(c, '"') || matchChar(c, '#'))
1295 mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c));
1296 else if (matchChar(c, ' ')) {
1297 if (to.size() && to[to.size() - 1] != c)
1303 if (to.size() && matchChar(to[to.size() - 1], ' '))
1304 to.resize(to.size() - 1);
1307 // FIXME This should be table driven.
1309 Boolean FSIParser::setAttributes(StorageObjectSpec &sos,
1314 Boolean hadBctf = 0;
1315 Boolean hadTracking = 0;
1316 Boolean hadSmcrd = 0;
1319 Boolean hadRecords = 0;
1320 Boolean hadBase = 0;
1321 Boolean hadZapeof = 0;
1322 Boolean hadSearch = 0;
1323 Boolean hadFold = 0;
1324 StorageObjectSpec::Records records;
1327 StringC token, value;
1329 if (!parseAttribute(token, gotValue, value)) {
1330 mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
1333 if (token.size() == 0)
1335 if (matchKey(token, "BCTF")) {
1336 if (sos.storageManager->requiredCodingSystem())
1337 mgr_.message(EntityManagerMessages::fsiBctfNotApplicable);
1339 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1340 StringMessageArg(token));
1341 else if (gotValue) {
1342 const char *codingSystemName;
1343 const InputCodingSystem *codingSystem
1344 = em_->lookupCodingSystem(value, idCharset_, codingSystemName);
1346 sos.codingSystem = codingSystem;
1347 sos.codingSystemName = codingSystemName;
1349 else if (matchKey(value, "SAME")) {
1352 sos.codingSystem = defSpec_->codingSystem;
1353 sos.codingSystemName = defSpec_->codingSystemName;
1356 sos.codingSystem = em_->defaultCodingSystem_;
1357 sos.codingSystemName = 0;
1362 mgr_.message(EntityManagerMessages::fsiUnknownBctf,
1363 StringMessageArg(value));
1366 mgr_.message(EntityManagerMessages::fsiMissingValue,
1367 StringMessageArg(token));
1370 else if (matchKey(token, "TRACKING")) {
1372 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1373 StringMessageArg(token));
1374 else if (gotValue) {
1375 if (matchKey(value, "NOTRACK"))
1377 else if (!matchKey(value, "TRACK"))
1378 mgr_.message(EntityManagerMessages::fsiBadTracking,
1379 StringMessageArg(value));
1382 mgr_.message(EntityManagerMessages::fsiMissingValue,
1383 StringMessageArg(token));
1386 else if (matchKey(token, "ZAPEOF")) {
1387 if (sos.storageManager->requiredCodingSystem())
1388 mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
1390 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1391 StringMessageArg(token));
1392 else if (gotValue) {
1393 if (matchKey(value, "ZAPEOF"))
1395 else if (matchKey(value, "NOZAPEOF"))
1398 mgr_.message(EntityManagerMessages::fsiBadZapeof,
1399 StringMessageArg(value));
1405 else if (matchKey(token, "NOZAPEOF")) {
1406 if (sos.storageManager->requiredCodingSystem())
1407 mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
1409 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1410 StringMessageArg(idCharset_.execToDesc("ZAPEOF")));
1412 mgr_.message(EntityManagerMessages::fsiValueAsName,
1413 StringMessageArg(token));
1418 else if (matchKey(token, "SEARCH")) {
1420 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1421 StringMessageArg(token));
1422 else if (gotValue) {
1423 if (matchKey(value, "SEARCH"))
1425 else if (matchKey(value, "NOSEARCH"))
1428 mgr_.message(EntityManagerMessages::fsiBadSearch,
1429 StringMessageArg(value));
1435 else if (matchKey(token, "NOSEARCH")) {
1437 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1438 StringMessageArg(idCharset_.execToDesc("SEARCH")));
1440 mgr_.message(EntityManagerMessages::fsiValueAsName,
1441 StringMessageArg(token));
1446 else if (matchKey(token, "FOLD")) {
1448 mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
1450 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1451 StringMessageArg(token));
1452 else if (gotValue) {
1453 if (matchKey(value, "FOLD"))
1455 else if (matchKey(value, "NOFOLD"))
1458 mgr_.message(EntityManagerMessages::fsiBadFold,
1459 StringMessageArg(value));
1465 else if (matchKey(token, "NOFOLD")) {
1467 mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
1469 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1470 StringMessageArg(idCharset_.execToDesc("FOLD")));
1472 mgr_.message(EntityManagerMessages::fsiValueAsName,
1473 StringMessageArg(token));
1478 else if (matchKey(token, "SMCRD")) {
1480 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1481 StringMessageArg(token));
1482 else if (gotValue) {
1483 if (value.size() == 0)
1485 else if (value.size() == 1)
1488 mgr_.message(EntityManagerMessages::fsiBadSmcrd,
1489 StringMessageArg(value));
1492 mgr_.message(EntityManagerMessages::fsiMissingValue,
1493 StringMessageArg(token));
1496 else if (matchKey(token, "RECORDS")) {
1497 if (sos.storageManager->requiresCr())
1498 mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
1499 else if (hadRecords)
1500 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1501 StringMessageArg(token));
1502 else if (gotValue) {
1503 if (!lookupRecords(value, sos.records))
1504 mgr_.message(EntityManagerMessages::fsiUnsupportedRecords,
1505 StringMessageArg(value));
1508 mgr_.message(EntityManagerMessages::fsiMissingValue,
1509 StringMessageArg(token));
1512 else if (matchKey(token, "SOIBASE")) {
1514 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1515 StringMessageArg(token));
1517 value.swap(sos.baseId);
1519 mgr_.message(EntityManagerMessages::fsiMissingValue,
1520 StringMessageArg(token));
1521 sos.baseId.resize(0);
1525 else if (lookupRecords(token, records)) {
1526 if (sos.storageManager->requiresCr())
1527 mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
1528 else if (hadRecords)
1529 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1530 StringMessageArg(idCharset_.execToDesc("RECORDS")));
1532 sos.records = records;
1534 mgr_.message(EntityManagerMessages::fsiValueAsName,
1535 StringMessageArg(token));
1538 else if (matchKey(token, "NOTRACK")) {
1540 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1541 StringMessageArg(idCharset_.execToDesc("TRACKING")));
1545 mgr_.message(EntityManagerMessages::fsiValueAsName,
1546 StringMessageArg(token));
1549 else if (matchKey(token, "TRACK")) {
1551 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1552 StringMessageArg(idCharset_.execToDesc("TRACKING")));
1554 mgr_.message(EntityManagerMessages::fsiValueAsName,
1555 StringMessageArg(token));
1559 mgr_.message(gotValue
1560 ? EntityManagerMessages::fsiUnsupportedAttribute
1561 : EntityManagerMessages::fsiUnsupportedAttributeToken,
1562 StringMessageArg(token));
1564 if (hadBase && sos.baseId.size() > 0) {
1565 convertId(sos.baseId, smcrd, sos.storageManager);
1567 if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_))
1568 sos.baseId.resize(0);
1571 if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis)
1576 FSIParser::RecordType FSIParser::recordTypeTable[] = {
1577 { "FIND", StorageObjectSpec::find },
1578 { "ASIS", StorageObjectSpec::asis },
1579 { "CR", StorageObjectSpec::cr },
1580 { "LF", StorageObjectSpec::lf },
1581 { "CRLF", StorageObjectSpec::crlf }
1584 const char *FSIParser::recordsName(StorageObjectSpec::Records records)
1586 for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
1587 if (records == recordTypeTable[i].value)
1588 return recordTypeTable[i].name;
1592 Boolean FSIParser::lookupRecords(const StringC &token,
1593 StorageObjectSpec::Records &result)
1595 for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
1596 if (matchKey(token, recordTypeTable[i].name)) {
1597 result = recordTypeTable[i].value;
1603 void FSIParser::setDefaults(StorageObjectSpec &sos)
1605 if (sos.storageManager->requiresCr())
1606 sos.records = StorageObjectSpec::cr;
1608 || (defSpec_ && defSpec_->records == StorageObjectSpec::asis))
1609 sos.records = StorageObjectSpec::asis;
1610 if (isNdata_ || (defSpec_ && !defSpec_->zapEof))
1612 if (defSpec_ && defSpec_->storageManager == sos.storageManager) {
1613 if (defSpec_->id.size())
1614 sos.baseId = defSpec_->id;
1616 sos.baseId = defSpec_->specId;
1617 sos.storageManager->resolveRelative(defSpec_->baseId,
1622 sos.codingSystem = sos.storageManager->requiredCodingSystem();
1623 if (sos.codingSystem)
1624 sos.zapEof = 0; // hack
1626 sos.codingSystem = em_->defaultCodingSystem_;
1628 const InputCodingSystem *id = 0;
1630 for (j = 0; j < em_->codingSystems_.size(); j++)
1631 if (em_->codingSystems_[j].ics->isIdentity()) {
1632 id = em_->codingSystems_[j].ics;
1635 if (id && id != em_->defaultCodingSystem_) {
1636 sos.codingSystem = id;
1637 sos.codingSystemName = em_->codingSystems_[j].name;
1640 else if (defSpec_) {
1641 sos.codingSystem = defSpec_->codingSystem;
1642 sos.codingSystemName = defSpec_->codingSystemName;
1647 Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue,
1657 if (matchChar(c, '>'))
1659 if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '='))
1668 if (matchChar(c, '>') || matchChar(c, '='))
1675 if (!matchChar(c, '=')) {
1686 if (matchChar(c, '>') || matchChar(c, '='))
1688 if (matchChar(c, '"') || matchChar(c, '\'')) {
1696 if (matchChar(c, '\n'))
1698 else if (matchChar(c, '\r') || matchChar(c, '\t'))
1699 value += idCharset_.execToDesc(' ');
1713 if (matchChar(c, '>') || matchChar(c, '=')) {
1722 void FSIParser::uncharref(StringC &str)
1726 while (i < str.size()) {
1728 if (matchChar(str[i], '&')
1729 && i + 2 < str.size()
1730 && matchChar(str[i + 1], '#')
1731 && convertDigit(str[i + 2], digit)) {
1732 unsigned long val = digit;
1734 while (i < str.size() && convertDigit(str[i], digit)) {
1735 val = val*10 + digit;
1739 if (i < str.size() && matchChar(str[i], ';'))
1743 str[j++] = str[i++];
1748 Boolean FSIParser::convertId(StringC &id, Xchar smcrd,
1749 const StorageManager *sm)
1751 const CharsetInfo *smCharset = sm->idCharset();
1754 while (i < id.size()) {
1757 ISet<WideChar> wideSet;
1759 if (Xchar(id[i]) == smcrd
1760 && i + 1 < id.size()
1761 && convertDigit(id[i + 1], digit)) {
1764 while (i < id.size() && convertDigit(id[i], digit)) {
1765 val = val*10 + digit;
1769 if (i < id.size() && matchChar(id[i], ';'))
1772 else if (smCharset) {
1773 if (!idCharset_.descToUniv(id[i++], univ))
1775 if (univ == UnivCharsetDesc::rs)
1777 else if (univ == UnivCharsetDesc::re && sm->reString())
1778 newId += *sm->reString();
1779 else if (smCharset->univToDesc(univ, wide, wideSet) != 1
1781 return 0; // FIXME give error
1783 newId += Char(wide);
1792 ParsedSystemId:: ParsedSystemId()
1797 void unparseSoi(const StringC &soi,
1798 const CharsetInfo *idCharset,
1799 const CharsetInfo &resultCharset,
1801 Boolean &needSmcrd);
1803 void ParsedSystemId::unparse(const CharsetInfo &resultCharset,
1804 StringC &result) const
1806 size_t len = size();
1809 for (i = 0; i < maps.size(); i++) {
1810 if (maps[i].type == ParsedSystemIdMap::catalogDocument)
1811 result += resultCharset.execToDesc("<CATALOG>");
1812 else if (maps[i].type == ParsedSystemIdMap::catalogPublic) {
1813 result += resultCharset.execToDesc("<CATALOG PUBLIC=\"");
1814 result += maps[i].publicId;
1815 result += resultCharset.execToDesc("\">");
1818 for (i = 0; i < len; i++) {
1819 const StorageObjectSpec &sos = (*this)[i];
1820 result += resultCharset.execToDesc('<');
1821 result += resultCharset.execToDesc(sos.storageManager->type());
1823 result += resultCharset.execToDesc(" NOTRACK");
1825 result += resultCharset.execToDesc(" NOSEARCH");
1826 if (!sos.storageManager->requiresCr()
1827 && sos.records != StorageObjectSpec::find) {
1828 result += resultCharset.execToDesc(' ');
1829 result += resultCharset.execToDesc(FSIParser::recordsName(sos.records));
1831 if (sos.codingSystemName) {
1833 result += resultCharset.execToDesc(" NOZAPEOF");
1834 result += resultCharset.execToDesc(" BCTF=");
1835 result += resultCharset.execToDesc(sos.codingSystemName);
1837 Boolean needSmcrd = 0;
1838 if (sos.baseId.size() != 0) {
1839 result += resultCharset.execToDesc(" SOIBASE='");
1840 unparseSoi(sos.baseId,
1841 sos.storageManager->idCharset(),
1845 result += resultCharset.execToDesc('\'');
1848 unparseSoi(sos.specId,
1849 sos.storageManager->idCharset(),
1854 result += resultCharset.execToDesc(" SMCRD='^'");
1855 result += resultCharset.execToDesc('>');
1860 void unparseSoi(const StringC &soi,
1861 const CharsetInfo *idCharset,
1862 const CharsetInfo &resultCharset,
1867 for (size_t i = 0; i < soi.size(); i++) {
1869 sprintf(buf, "&#%lu;", (unsigned long)soi[i]);
1870 result += resultCharset.execToDesc(buf);
1874 for (size_t i = 0; i < soi.size(); i++) {
1877 ISet<WideChar> toSet;
1878 if (!idCharset->descToUniv(soi[i], univ)
1883 #ifndef MSDOS_FILENAMES
1884 || univ == 92 // backslash
1887 || resultCharset.univToDesc(univ, to, toSet) != 1) {
1890 sprintf(buf, "^%lu;", (unsigned long)soi[i]);
1891 result += resultCharset.execToDesc(buf);
1895 case 34: // double quote
1897 case 39: // apostrophe
1901 sprintf(buf, "&#%lu;", (unsigned long)to);
1902 result += resultCharset.execToDesc(buf);