2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: ExtendEntityManager.C /main/1 1996/07/29 16:51:42 cde-hp $ */
24 // Copyright (c) 1994, 1995, 1996 James Clark
25 // See the file COPYING for copying permission.
28 #pragma implementation
32 #include "ExtendEntityManager.h"
34 #include "MessageArg.h"
35 #include "OffsetOrderedList.h"
37 #include "StorageManager.h"
41 #include "RegisteredCodingSystem.h"
43 #include "EntityManagerMessages.h"
44 #include "StorageObjectPosition.h"
46 #include "CodingSystem.h"
47 #include "InputSource.h"
49 #include "EntityCatalog.h"
57 #ifdef DECLARE_MEMMOVE
59 void *memmove(void *, const void *, size_t);
64 namespace SP_NAMESPACE {
67 const char EOFCHAR = '\032'; // Control-Z
69 class ExternalInputSource;
71 class EntityManagerImpl : public ExtendEntityManager {
73 EntityManagerImpl(StorageManager *defaultStorageManager,
74 const InputCodingSystem *defaultCodingSystem);
75 void setCatalogManager(CatalogManager *catalogManager);
76 void registerStorageManager(StorageManager *);
77 void registerCodingSystem(const char *, const InputCodingSystem *);
78 InputSource *open(const StringC &sysid,
83 InputSource *openIfExists(const StringC &sysid,
88 ConstPtr<EntityCatalog> makeCatalog(StringC &systemId,
89 const CharsetInfo &charset,
91 Boolean expandSystemId(const StringC &,
98 Boolean mergeSystemIds(const Vector<StringC> &,
99 Boolean mapCatalogDocument,
103 StorageManager *lookupStorageType(const StringC &, const CharsetInfo &) const;
104 StorageManager *lookupStorageType(const char *) const;
105 StorageManager *guessStorageType(const StringC &, const CharsetInfo &) const;
106 const InputCodingSystem *lookupCodingSystem(const StringC &,
108 const char *&) const;
109 Boolean resolveSystemId(const StringC &str,
110 const CharsetInfo &idCharset,
112 const Location &defLocation,
114 ParsedSystemId &parsedSysid) const;
115 Boolean parseSystemId(const StringC &str,
116 const CharsetInfo &idCharset,
118 const StorageObjectSpec *defSpec,
120 ParsedSystemId &parsedSysid) const;
122 EntityManagerImpl(const EntityManagerImpl &); // undefined
123 void operator=(const EntityManagerImpl &); // undefined
124 static const StorageObjectSpec *defStorageObject(const Location &);
125 static Boolean matchKey(const StringC &type, const char *s,
126 const CharsetInfo &docCharset);
127 NCVector<Owner<StorageManager> > storageManagers_;
128 Vector<RegisteredCodingSystem> codingSystems_;
129 Owner<StorageManager> defaultStorageManager_;
130 const InputCodingSystem *defaultCodingSystem_;
131 Owner<CatalogManager> catalogManager_;
132 friend class FSIParser;
135 class ExternalInfoImpl : public ExternalInfo {
138 ExternalInfoImpl(ParsedSystemId &parsedSysid);
139 const StorageObjectSpec &spec(size_t i) const;
140 size_t nSpecs() const;
141 const ParsedSystemId &parsedSystemId() const;
143 void noteStorageObjectEnd(Offset);
144 void noteInsertedRSs();
145 void setDecoder(size_t i, Decoder *);
146 StringC &id(size_t i);
147 Boolean convertOffset(Offset, StorageObjectLocation &) const;
149 ParsedSystemId parsedSysid_;
150 NCVector<StorageObjectPosition> position_;
151 size_t currentIndex_;
152 // list of inserted RSs
153 OffsetOrderedList rsList_;
157 class ExternalInputSource : public InputSource {
159 ExternalInputSource(ParsedSystemId &parsedSysid,
160 InputSourceOrigin *origin,
162 Boolean mayNotExist = 0);
163 void pushCharRef(Char, const NamedCharRef &);
164 ~ExternalInputSource();
166 Xchar fill(Messenger &);
167 Boolean rewind(Messenger &);
168 void willNotRewind();
172 void noteRSAt(const Char *);
173 void reallocateBuffer(size_t size);
174 void insertChar(Char);
175 static const Char *findNextCr(const Char *start, const Char *end);
176 static const Char *findNextLf(const Char *start, const Char *end);
177 static const Char *findNextCrOrLf(const Char *start, const Char *end);
179 ExternalInfoImpl *info_;
182 Offset bufLimOffset_;
185 NCVector<Owner<StorageObject> > sov_;
190 const char *leftOver_;
193 Boolean mayNotExist_;
202 RecordType recordType_;
208 FSIParser(const StringC &, const CharsetInfo &idCharset,
210 const StorageObjectSpec *defSpec,
211 const EntityManagerImpl *em,
213 Boolean parse(ParsedSystemId &parsedSysid);
214 static const char *recordsName(StorageObjectSpec::Records records);
217 StorageObjectSpec::Records value;
220 Boolean handleInformal(size_t startIndex, ParsedSystemId &parsedSysid);
221 Boolean convertId(StringC &, Xchar smcrd, const StorageManager *);
224 StorageManager *lookupStorageType(const StringC &key, Boolean &neutral);
225 Boolean matchKey(const StringC &, const char *);
226 Boolean matchChar(Xchar, char);
228 Boolean convertDigit(Xchar c, int &weight);
229 void uncharref(StringC &);
230 Boolean setAttributes(StorageObjectSpec &sos, Boolean neutral,
231 Xchar &smcrd, Boolean &fold);
232 Boolean setCatalogAttributes(ParsedSystemId &parsedSysid);
233 void setDefaults(StorageObjectSpec &sos);
234 Boolean parseAttribute(StringC &token, Boolean &gotValue, StringC &value);
235 Boolean lookupRecords(const StringC &token, StorageObjectSpec::Records &);
236 void convertMinimumLiteral(const StringC &from, StringC &to);
241 const EntityManagerImpl *em_;
242 const StorageObjectSpec *defSpec_;
243 const CharsetInfo &idCharset_;
245 static RecordType recordTypeTable[];
248 const Char RS = '\n';
249 const Char RE = '\r';
250 const char lineEnd = '\n';
252 ExtendEntityManager::CatalogManager::~CatalogManager()
256 ExtendEntityManager *ExtendEntityManager::make(StorageManager *sm,
257 const InputCodingSystem *cs)
259 return new EntityManagerImpl(sm, cs);
262 Boolean ExtendEntityManager::externalize(const ExternalInfo *info,
264 StorageObjectLocation &loc)
268 const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
271 return p->convertOffset(off, loc);
274 const ParsedSystemId *
275 ExtendEntityManager::externalInfoParsedSystemId(const ExternalInfo *info)
279 const ExternalInfoImpl *p = DYNAMIC_CAST_CONST_PTR(ExternalInfoImpl, info);
282 return &p->parsedSystemId();
286 EntityManagerImpl::EntityManagerImpl(StorageManager *defaultStorageManager,
287 const InputCodingSystem *defaultCodingSystem)
288 : defaultStorageManager_(defaultStorageManager),
289 defaultCodingSystem_(defaultCodingSystem)
293 InputSource *EntityManagerImpl::open(const StringC &sysid,
294 const CharsetInfo &docCharset,
295 InputSourceOrigin *origin,
299 ParsedSystemId parsedSysid;
300 if (!parseSystemId(sysid, docCharset, 0, 0, mgr, parsedSysid)
301 || !catalogManager_->mapCatalog(parsedSysid, this, mgr))
303 return new ExternalInputSource(parsedSysid, origin, mayRewind, 0);
306 InputSource *EntityManagerImpl::openIfExists(const StringC &sysid,
307 const CharsetInfo &docCharset,
308 InputSourceOrigin *origin,
312 ParsedSystemId parsedSysid;
313 if (!parseSystemId(sysid, docCharset, 0, 0, mgr, parsedSysid)
314 || !catalogManager_->mapCatalog(parsedSysid, this, mgr))
316 return new ExternalInputSource(parsedSysid, origin, mayRewind, 1);
319 ConstPtr<EntityCatalog>
320 EntityManagerImpl::makeCatalog(StringC &systemId,
321 const CharsetInfo &charset,
324 return catalogManager_->makeCatalog(systemId, charset, this, mgr);
328 EntityManagerImpl::mergeSystemIds(const Vector<StringC> &sysids,
329 Boolean mapCatalogDocument,
330 const CharsetInfo &charset,
332 StringC &result) const
334 ParsedSystemId parsedSysid;
335 if (mapCatalogDocument) {
336 parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
337 parsedSysid.maps.back().type = ParsedSystemIdMap::catalogDocument;
339 for (size_t i = 0; i < sysids.size(); i++)
340 if (!parseSystemId(sysids[i],
347 parsedSysid.unparse(charset, result);
352 EntityManagerImpl::expandSystemId(const StringC &str,
353 const Location &defLocation,
355 const CharsetInfo &charset,
356 const StringC *mapCatalogPublic,
360 ParsedSystemId parsedSysid;
361 const StorageObjectSpec *defSpec = defStorageObject(defLocation);
362 if (!parseSystemId(str, charset, isNdata, defSpec, mgr, parsedSysid))
364 if (mapCatalogPublic) {
365 ParsedSystemIdMap map;
366 map.type = ParsedSystemIdMap::catalogPublic;
367 map.publicId = *mapCatalogPublic;
368 parsedSysid.maps.insert(parsedSysid.maps.begin(), 1, map);
370 parsedSysid.unparse(charset, result);
374 Boolean EntityManagerImpl::parseSystemId(const StringC &str,
375 const CharsetInfo &idCharset,
377 const StorageObjectSpec *defSpec,
379 ParsedSystemId &parsedSysid) const
381 FSIParser fsiParser(str, idCharset, isNdata, defSpec, this, mgr);
382 return fsiParser.parse(parsedSysid);
386 EntityManagerImpl::guessStorageType(const StringC &type,
387 const CharsetInfo &docCharset) const
389 for (size_t i = 0; i < storageManagers_.size(); i++)
390 if (storageManagers_[i]->guessIsId(type, docCharset))
391 return storageManagers_[i].pointer();
392 if (defaultStorageManager_->guessIsId(type, docCharset))
393 return defaultStorageManager_.pointer();
398 EntityManagerImpl::lookupStorageType(const StringC &type,
399 const CharsetInfo &docCharset) const
401 if (type.size() == 0)
403 if (matchKey(type, defaultStorageManager_->type(), docCharset))
404 return defaultStorageManager_.pointer();
405 for (size_t i = 0; i < storageManagers_.size(); i++)
406 if (matchKey(type, storageManagers_[i]->type(), docCharset))
407 return storageManagers_[i].pointer();
412 EntityManagerImpl::lookupStorageType(const char *type) const
414 if (type == defaultStorageManager_->type())
415 return defaultStorageManager_.pointer();
416 for (size_t i = 0; i < storageManagers_.size(); i++)
417 if (type == storageManagers_[i]->type())
418 return storageManagers_[i].pointer();
422 const InputCodingSystem *
423 EntityManagerImpl::lookupCodingSystem(const StringC &type,
424 const CharsetInfo &docCharset,
425 const char *&name) const
427 for (size_t i = 0; i < codingSystems_.size(); i++)
428 if (matchKey(type, codingSystems_[i].name, docCharset)) {
429 name = codingSystems_[i].name;
430 return codingSystems_[i].ics;
436 EntityManagerImpl::matchKey(const StringC &type,
438 const CharsetInfo &docCharset)
440 if (strlen(s) != type.size())
442 for (size_t i = 0; i < type.size(); i++)
443 if (docCharset.execToDesc(toupper(s[i])) != type[i]
444 && docCharset.execToDesc(tolower(s[i])) != type[i])
449 void EntityManagerImpl::registerStorageManager(StorageManager *sm)
451 storageManagers_.resize(storageManagers_.size() + 1);
452 storageManagers_.back() = sm;
455 void EntityManagerImpl::registerCodingSystem(const char *name,
456 const InputCodingSystem *ics)
458 codingSystems_.resize(codingSystems_.size() + 1);
459 RegisteredCodingSystem &rcs = codingSystems_.back();
464 void EntityManagerImpl::setCatalogManager(CatalogManager *catalogManager)
466 catalogManager_ = catalogManager;
469 const StorageObjectSpec *
470 EntityManagerImpl::defStorageObject(const Location &defLocation)
473 const ExternalInfo *info;
474 Location loc(defLocation);
476 if (loc.origin().isNull())
478 const InputSourceOrigin *inputSourceOrigin = loc.origin()->asInputSourceOrigin();
479 if (inputSourceOrigin) {
480 off = inputSourceOrigin->startOffset(loc.index());
481 info = inputSourceOrigin->externalInfo();
484 if (!inputSourceOrigin->defLocation(off, loc))
488 loc = loc.origin()->parent();
490 StorageObjectLocation soLoc;
491 if (!ExtendEntityManager::externalize(info, off, soLoc))
493 return soLoc.storageObjectSpec;
496 ExternalInputSource::ExternalInputSource(ParsedSystemId &parsedSysid,
497 InputSourceOrigin *origin,
500 : InputSource(origin, 0, 0),
501 mayRewind_(mayRewind),
502 mayNotExist_(mayNotExist),
503 sov_(parsedSysid.size())
506 info_ = new ExternalInfoImpl(parsedSysid);
507 origin->setExternalInfo(info_);
510 void ExternalInputSource::init()
523 ExternalInputSource::~ExternalInputSource()
529 Boolean ExternalInputSource::rewind(Messenger &mgr)
534 // reset makes a new EntityOrigin
535 ParsedSystemId parsedSysid(info_->parsedSystemId());
536 info_ = new ExternalInfoImpl(parsedSysid);
537 inputSourceOrigin()->setExternalInfo(info_);
539 for (size_t i = 0; i < soIndex_; i++) {
540 if (sov_[i] && !sov_[i]->rewind(mgr))
547 void ExternalInputSource::willNotRewind()
549 for (size_t i = 0; i < soIndex_; i++)
551 sov_[i]->willNotRewind();
555 // Round up N so that it is a power of TO.
556 // TO must be a power of 2.
559 size_t roundUp(size_t n, size_t to)
561 return (n + (to - 1)) & ~(to - 1);
565 void ExternalInputSource::noteRSAt(const Char *p)
567 info_->noteRS(bufLimOffset_ - (bufLim_ - p));
571 void ExternalInputSource::noteRS()
576 Xchar ExternalInputSource::fill(Messenger &mgr)
578 ASSERT(cur() == end());
579 while (end() >= bufLim_) {
582 if (soIndex_ >= sov_.size())
585 info_->noteStorageObjectEnd(bufLimOffset_ - (bufLim_ - end()));
586 const StorageObjectSpec &spec = info_->spec(soIndex_);
588 NullMessenger nullMgr;
590 = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
593 info_->id(soIndex_));
597 = spec.storageManager->makeStorageObject(spec.specId, spec.baseId,
600 info_->id(soIndex_));
601 so_ = sov_[soIndex_].pointer();
603 decoder_ = spec.codingSystem->makeDecoder();
604 info_->setDecoder(soIndex_, decoder_);
605 zapEof_ = spec.zapEof;
606 switch (spec.records) {
607 case StorageObjectSpec::asis:
611 case StorageObjectSpec::cr:
614 case StorageObjectSpec::lf:
617 case StorageObjectSpec::crlf:
620 case StorageObjectSpec::find:
621 recordType_ = unknown;
627 readSize_ = so_->getBlockSize();
636 size_t keepSize = end() - start();
637 const size_t align = sizeof(int)/sizeof(Char);
638 size_t readSizeChars = (readSize_ + (sizeof(Char) - 1))/sizeof(Char);
639 readSizeChars = roundUp(readSizeChars, align);
640 size_t neededSize; // in Chars
642 // compute neededSize and readSize
643 unsigned minBytesPerChar = decoder_->minBytesPerChar();
644 if (nLeftOver_ == 0 && minBytesPerChar >= sizeof(Char)) {
645 // In this case we want to do decoding in place.
646 // FIXME It might be a win on some systems (Irix?) to arrange that the
647 // read buffer is on a page boundary.
649 if (keepSize >= size_t(-1)/sizeof(Char) - (align - 1) - insertRS_)
650 abort(); // FIXME throw an exception
652 // Now size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize > 0
654 > size_t(-1)/sizeof(Char) - (align - 1) - insertRS_ - keepSize)
656 neededSize = roundUp(readSizeChars + keepSize + insertRS_, align);
657 startOffset = ((neededSize > bufSize_ ? neededSize : bufSize_)
658 - readSizeChars - insertRS_ - keepSize);
661 // Needs to be room for everything before decoding.
662 neededSize = (keepSize + insertRS_ + readSizeChars
663 + (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char));
664 // Also must be room for everything after decoding.
666 = (keepSize + insertRS_
667 // all the converted characters
668 + (nLeftOver_ + readSize_)/minBytesPerChar
669 // enough Chars to contain left over bytes
670 + ((readSize_ % minBytesPerChar + sizeof(Char) - 1)
672 if (neededSize2 > neededSize)
673 neededSize = neededSize2;
674 neededSize = roundUp(neededSize, align);
675 if (neededSize > size_t(-1)/sizeof(Char))
679 if (bufSize_ < neededSize)
680 reallocateBuffer(neededSize);
681 Char *newStart = buf_ + startOffset;
682 if (newStart != start() && keepSize > 0)
683 memmove(newStart, start(), keepSize*sizeof(Char));
684 char *bytesStart = (char *)(buf_ + bufSize_ - readSizeChars) - nLeftOver_;
685 if (nLeftOver_ > 0 && leftOver_ != bytesStart)
686 memmove(bytesStart, leftOver_, nLeftOver_);
691 if (so_->read((char *)(buf_ + bufSize_ - readSizeChars), readSize_,
694 const char *bytesEnd = bytesStart + nLeftOver_ + nread;
695 size_t nChars = decoder_->decode((Char *)end() + insertRS_,
698 - (zapEof_ && bytesEnd[-1] == EOFCHAR),
700 nLeftOver_ = bytesEnd - leftOver_;
705 advanceEnd(end() + 1);
711 bufLimOffset_ += nChars;
719 ASSERT(end() < bufLim_);
726 switch (recordType_) {
729 const Char *e = findNextCrOrLf(end(), bufLim_);
733 info_->noteInsertedRSs();
739 if (e + 1 < bufLim_) {
743 if (e + 2 == bufLim_) {
752 info_->noteInsertedRSs();
757 recordType_ = crUnknown;
768 if (*cur() == '\n') {
770 advanceEnd(cur() + 1);
774 advanceEnd(cur() + 1);
777 info_->noteInsertedRSs();
783 Char *e = (Char *)findNextLf(end(), bufLim_);
795 const Char *e = findNextCr(end(), bufLim_);
806 const Char *e = end();
808 e = findNextLf(e, bufLim_);
813 // Need to delete final RS if not followed by anything.
814 if (e + 1 == bufLim_) {
836 const Char *ExternalInputSource::findNextCr(const Char *start,
839 for (; start < end; start++)
845 const Char *ExternalInputSource::findNextLf(const Char *start,
848 for (; start < end; start++)
854 const Char *ExternalInputSource::findNextCrOrLf(const Char *start,
857 for (; start < end; start++)
858 if (*start == '\n' || *start == '\r')
863 void ExternalInputSource::pushCharRef(Char ch, const NamedCharRef &ref)
865 ASSERT(cur() == start());
866 noteCharRef(startIndex() + (cur() - start()), ref);
870 void ExternalInputSource::insertChar(Char ch)
872 if (start() > buf_) {
874 memmove((Char *)start() - 1, start(), (cur() - start())*sizeof(Char));
879 // must have start == buf
880 if (buf_ + (bufSize_ - (nLeftOver_ + sizeof(Char) - 1)/sizeof(Char))
882 if (bufSize_ == size_t(-1))
883 abort(); // FIXME throw an exception
884 reallocateBuffer(bufSize_ + 1);
886 else if (nLeftOver_ > 0 && ((char *)(bufLim_ + 1) > leftOver_)) {
887 char *s = (char *)(buf_ + bufSize_) - nLeftOver_;
888 memmove(s, leftOver_, nLeftOver_);
892 memmove((Char *)cur() + 1, cur(), (bufLim_ - cur())*sizeof(Char));
894 advanceEnd(end() + 1);
899 void ExternalInputSource::reallocateBuffer(size_t newSize)
901 Char *newBuf = new Char[newSize];
903 memcpy(newBuf, buf_, bufSize_*sizeof(Char));
905 changeBuffer(newBuf, buf_);
906 bufLim_ = newBuf + (bufLim_ - buf_);
907 if (nLeftOver_ > 0) {
908 char *s = (char *)(newBuf + bufSize_) - nLeftOver_;
910 newBuf + (leftOver_ - (char *)buf_),
918 RTTI_DEF1(ExternalInfoImpl, ExternalInfo)
920 ExternalInfoImpl::ExternalInfoImpl(ParsedSystemId &parsedSysid)
921 : currentIndex_(0), position_(parsedSysid.size())
923 parsedSysid.swap(parsedSysid_);
924 if (parsedSysid_.size() > 0)
925 notrack_ = parsedSysid_[0].notrack;
928 StringC &ExternalInfoImpl::id(size_t i)
930 return parsedSysid_[i].id;
933 void ExternalInfoImpl::setDecoder(size_t i, Decoder *decoder)
935 position_[i].decoder = decoder;
938 void ExternalInfoImpl::noteInsertedRSs()
940 position_[currentIndex_].insertedRSs = 1;
943 void ExternalInfoImpl::noteRS(Offset offset)
946 rsList_.append(offset);
948 == (currentIndex_ == 0 ? 0 : position_[currentIndex_- 1].endOffset))
949 position_[currentIndex_].startsWithRS = 1;
952 void ExternalInfoImpl::noteStorageObjectEnd(Offset offset)
954 ASSERT(currentIndex_ < position_.size());
955 // The last endOffset_ must be -1.
956 if (currentIndex_ < position_.size() - 1) {
957 position_[currentIndex_++].endOffset = offset;
958 position_[currentIndex_].line1RS = rsList_.size();
959 notrack_ = parsedSysid_[currentIndex_].notrack;
963 Boolean ExternalInfoImpl::convertOffset(Offset off,
964 StorageObjectLocation &ret) const
966 ret.storageObjectSpec = 0;
967 if (off == Offset(-1) || position_.size() == 0)
969 // the last endOffset_ is Offset(-1), so this will
972 for (i = 0; off >= position_[i].endOffset; i++)
974 for (; parsedSysid_[i].id.size() == 0; i--)
977 ret.storageObjectSpec = &parsedSysid_[i];
978 Offset startOffset = i == 0 ? 0 : position_[i - 1].endOffset;
979 ret.storageObjectOffset = off - startOffset;
980 ret.byteIndex = ret.storageObjectOffset;
981 if (parsedSysid_[i].notrack
982 || parsedSysid_[i].records == StorageObjectSpec::asis) {
983 ret.lineNumber = (unsigned long)-1;
984 if (parsedSysid_[i].records != StorageObjectSpec::asis) {
985 if (position_[i].insertedRSs)
986 ret.byteIndex = (unsigned long)-1;
987 else if (ret.byteIndex > 0 && position_[i].startsWithRS)
988 ret.byteIndex--; // first RS is inserted
990 ret.columnNumber = (unsigned long)-1;
994 size_t line1RS = position_[i].line1RS;
995 // line1RS is now the number of RSs that are before or on the current line.
998 if (rsList_.findPreceding(off, j, colStart)) {
999 if (position_[i].insertedRSs)
1000 ret.byteIndex -= j + 1 - line1RS;
1001 else if (ret.byteIndex > 0 && position_[i].startsWithRS)
1002 ret.byteIndex--; // first RS is inserted
1010 // j is now the number of RSs that are before or on the current line
1011 // colStart is the offset of the first column
1012 ret.lineNumber = j - line1RS + 1 - position_[i].startsWithRS;
1013 // the offset of the first column
1014 if (colStart < startOffset)
1015 colStart = startOffset;
1016 // the RS that starts a line will be in column 0;
1017 // the first real character of a line will be column 1
1018 ret.columnNumber = 1 + off - colStart;
1020 if (!position_[i].decoder
1021 || !position_[i].decoder->convertOffset(ret.byteIndex))
1022 ret.byteIndex = (unsigned long)-1;
1026 const StorageObjectSpec &ExternalInfoImpl::spec(size_t i) const
1028 return parsedSysid_[i];
1031 size_t ExternalInfoImpl::nSpecs() const
1033 return parsedSysid_.size();
1036 const ParsedSystemId &ExternalInfoImpl::parsedSystemId() const
1038 return parsedSysid_;
1041 StorageObjectSpec::StorageObjectSpec()
1042 : storageManager(0), codingSystem(0), codingSystemName(0), notrack(0),
1043 records(find), zapEof(1), search(1)
1047 StorageObjectPosition::StorageObjectPosition()
1048 : endOffset(Offset(-1)), line1RS(0), startsWithRS(0), insertedRSs(0)
1052 FSIParser::FSIParser(const StringC &str,
1053 const CharsetInfo &idCharset,
1055 const StorageObjectSpec *defSpec,
1056 const EntityManagerImpl *em,
1060 idCharset_(idCharset),
1068 Xchar FSIParser::get()
1070 if (strIndex_ < str_.size())
1071 return str_[strIndex_++];
1076 void FSIParser::unget()
1082 Boolean FSIParser::matchKey(const StringC &str, const char *s)
1084 if (strlen(s) != str.size())
1086 for (size_t i = 0; i < str.size(); i++)
1087 if (idCharset_.execToDesc(toupper(s[i])) != str[i]
1088 && idCharset_.execToDesc(tolower(s[i])) != str[i])
1093 Boolean FSIParser::matchChar(Xchar ch, char execC)
1095 return ch == idCharset_.execToDesc(execC);
1098 Boolean FSIParser::isS(Xchar c)
1100 return (matchChar(c, ' ')
1101 || matchChar(c, '\r')
1102 || matchChar(c, '\n')
1103 || matchChar(c, ' '));
1106 Boolean FSIParser::convertDigit(Xchar c, int &weight)
1108 static const char digits[] = "0123456789";
1109 for (int i = 0; digits[i] != '\0'; i++)
1110 if (matchChar(c, digits[i])) {
1117 Boolean FSIParser::parse(ParsedSystemId &parsedSysid)
1119 size_t startIndex = strIndex_;
1120 if (!matchChar(get(), '<'))
1121 return handleInformal(startIndex, parsedSysid);
1126 return handleInformal(startIndex, parsedSysid);
1127 if (isS(c) || matchChar(c, '>'))
1132 if (matchKey(key, "CATALOG")) {
1133 if (!setCatalogAttributes(parsedSysid))
1135 return parse(parsedSysid);
1138 StorageManager *sm = lookupStorageType(key, neutral);
1140 return handleInformal(startIndex, parsedSysid);
1142 parsedSysid.resize(parsedSysid.size() + 1);
1143 StorageObjectSpec &sos = parsedSysid.back();
1144 sos.storageManager = sm;
1147 if (!setAttributes(sos, neutral, smcrd, fold))
1151 Boolean hadData = 0;
1156 if (matchChar(c, '<')) {
1167 if (isS(c) || matchChar(c, '>')) {
1169 sm = lookupStorageType(key, neutral);
1181 else if (!((!hadData && matchChar(c, '\r')) // ignored RE
1182 || matchChar(c, '\n') )) { // ignored RS
1187 if (id.size() > 0 && matchChar(id[id.size() - 1], '\r'))
1188 id.resize(id.size() - 1);
1190 id.swap(sos.specId);
1191 if (!convertId(sos.specId, smcrd, sos.storageManager))
1194 if (!sos.storageManager->transformNeutral(sos.specId, fold, mgr_))
1197 if (sos.storageManager->resolveRelative(sos.baseId, sos.specId,
1199 sos.baseId.resize(0);
1206 Boolean FSIParser::handleInformal(size_t index, ParsedSystemId &parsedSysid)
1208 parsedSysid.resize(parsedSysid.size() + 1);
1209 StorageObjectSpec &sos = parsedSysid.back();
1210 sos.specId.assign(str_.data() + index,
1211 str_.size() - index);
1212 sos.storageManager = em_->guessStorageType(sos.specId, idCharset_);
1213 if (!sos.storageManager) {
1214 if (defSpec_ && defSpec_->storageManager->inheritable())
1215 sos.storageManager = defSpec_->storageManager;
1217 sos.storageManager = em_->defaultStorageManager_.pointer();
1220 if (!convertId(sos.specId, -1, sos.storageManager))
1222 if (sos.storageManager->resolveRelative(sos.baseId, sos.specId, sos.search))
1223 sos.baseId.resize(0);
1227 StorageManager *FSIParser::lookupStorageType(const StringC &key,
1230 if (matchKey(key, "NEUTRAL")) {
1232 if (defSpec_ && defSpec_->storageManager->inheritable())
1233 return defSpec_->storageManager;
1235 return em_->defaultStorageManager_.pointer();
1238 StorageManager *sm = em_->lookupStorageType(key, idCharset_);
1245 Boolean FSIParser::setCatalogAttributes(ParsedSystemId &parsedSysid)
1247 Boolean hadPublic = 0;
1248 parsedSysid.maps.resize(parsedSysid.maps.size() + 1);
1249 parsedSysid.maps.back().type = ParsedSystemIdMap::catalogDocument;
1251 StringC token, value;
1253 if (!parseAttribute(token, gotValue, value)) {
1254 mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
1257 if (token.size() == 0)
1259 if (matchKey(token, "PUBLIC")) {
1261 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1262 StringMessageArg(idCharset_.execToDesc("PUBLIC")));
1263 else if (gotValue) {
1264 convertMinimumLiteral(value, parsedSysid.maps.back().publicId);
1265 parsedSysid.maps.back().type = ParsedSystemIdMap::catalogPublic;
1268 mgr_.message(EntityManagerMessages::fsiMissingValue,
1269 StringMessageArg(token));
1273 mgr_.message(gotValue
1274 ? EntityManagerMessages::fsiUnsupportedAttribute
1275 : EntityManagerMessages::fsiUnsupportedAttributeToken,
1276 StringMessageArg(token));
1281 void FSIParser::convertMinimumLiteral(const StringC &from, StringC &to)
1283 // Do just enough to ensure it can be reparsed.
1285 for (size_t i = 0; i < from.size(); i++) {
1287 if (matchChar(c, '"') || matchChar(c, '#'))
1288 mgr_.message(EntityManagerMessages::fsiLookupChar, NumberMessageArg(c));
1289 else if (matchChar(c, ' ')) {
1290 if (to.size() && to[to.size() - 1] != c)
1296 if (to.size() && matchChar(to[to.size() - 1], ' '))
1297 to.resize(to.size() - 1);
1300 // FIXME This should be table driven.
1302 Boolean FSIParser::setAttributes(StorageObjectSpec &sos,
1307 Boolean hadBctf = 0;
1308 Boolean hadTracking = 0;
1309 Boolean hadSmcrd = 0;
1312 Boolean hadRecords = 0;
1313 Boolean hadBase = 0;
1314 Boolean hadZapeof = 0;
1315 Boolean hadSearch = 0;
1316 Boolean hadFold = 0;
1317 StorageObjectSpec::Records records;
1320 StringC token, value;
1322 if (!parseAttribute(token, gotValue, value)) {
1323 mgr_.message(EntityManagerMessages::fsiSyntax, StringMessageArg(str_));
1326 if (token.size() == 0)
1328 if (matchKey(token, "BCTF")) {
1329 if (sos.storageManager->requiredCodingSystem())
1330 mgr_.message(EntityManagerMessages::fsiBctfNotApplicable);
1332 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1333 StringMessageArg(token));
1334 else if (gotValue) {
1335 const char *codingSystemName;
1336 const InputCodingSystem *codingSystem
1337 = em_->lookupCodingSystem(value, idCharset_, codingSystemName);
1339 sos.codingSystem = codingSystem;
1340 sos.codingSystemName = codingSystemName;
1342 else if (matchKey(value, "SAME")) {
1345 sos.codingSystem = defSpec_->codingSystem;
1346 sos.codingSystemName = defSpec_->codingSystemName;
1349 sos.codingSystem = em_->defaultCodingSystem_;
1350 sos.codingSystemName = 0;
1355 mgr_.message(EntityManagerMessages::fsiUnknownBctf,
1356 StringMessageArg(value));
1359 mgr_.message(EntityManagerMessages::fsiMissingValue,
1360 StringMessageArg(token));
1363 else if (matchKey(token, "TRACKING")) {
1365 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1366 StringMessageArg(token));
1367 else if (gotValue) {
1368 if (matchKey(value, "NOTRACK"))
1370 else if (!matchKey(value, "TRACK"))
1371 mgr_.message(EntityManagerMessages::fsiBadTracking,
1372 StringMessageArg(value));
1375 mgr_.message(EntityManagerMessages::fsiMissingValue,
1376 StringMessageArg(token));
1379 else if (matchKey(token, "ZAPEOF")) {
1380 if (sos.storageManager->requiredCodingSystem())
1381 mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
1383 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1384 StringMessageArg(token));
1385 else if (gotValue) {
1386 if (matchKey(value, "ZAPEOF"))
1388 else if (matchKey(value, "NOZAPEOF"))
1391 mgr_.message(EntityManagerMessages::fsiBadZapeof,
1392 StringMessageArg(value));
1398 else if (matchKey(token, "NOZAPEOF")) {
1399 if (sos.storageManager->requiredCodingSystem())
1400 mgr_.message(EntityManagerMessages::fsiZapeofNotApplicable);
1402 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1403 StringMessageArg(idCharset_.execToDesc("ZAPEOF")));
1405 mgr_.message(EntityManagerMessages::fsiValueAsName,
1406 StringMessageArg(token));
1411 else if (matchKey(token, "SEARCH")) {
1413 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1414 StringMessageArg(token));
1415 else if (gotValue) {
1416 if (matchKey(value, "SEARCH"))
1418 else if (matchKey(value, "NOSEARCH"))
1421 mgr_.message(EntityManagerMessages::fsiBadSearch,
1422 StringMessageArg(value));
1428 else if (matchKey(token, "NOSEARCH")) {
1430 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1431 StringMessageArg(idCharset_.execToDesc("SEARCH")));
1433 mgr_.message(EntityManagerMessages::fsiValueAsName,
1434 StringMessageArg(token));
1439 else if (matchKey(token, "FOLD")) {
1441 mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
1443 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1444 StringMessageArg(token));
1445 else if (gotValue) {
1446 if (matchKey(value, "FOLD"))
1448 else if (matchKey(value, "NOFOLD"))
1451 mgr_.message(EntityManagerMessages::fsiBadFold,
1452 StringMessageArg(value));
1458 else if (matchKey(token, "NOFOLD")) {
1460 mgr_.message(EntityManagerMessages::fsiFoldNotNeutral);
1462 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1463 StringMessageArg(idCharset_.execToDesc("FOLD")));
1465 mgr_.message(EntityManagerMessages::fsiValueAsName,
1466 StringMessageArg(token));
1471 else if (matchKey(token, "SMCRD")) {
1473 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1474 StringMessageArg(token));
1475 else if (gotValue) {
1476 if (value.size() == 0)
1478 else if (value.size() == 1)
1481 mgr_.message(EntityManagerMessages::fsiBadSmcrd,
1482 StringMessageArg(value));
1485 mgr_.message(EntityManagerMessages::fsiMissingValue,
1486 StringMessageArg(token));
1489 else if (matchKey(token, "RECORDS")) {
1490 if (sos.storageManager->requiresCr())
1491 mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
1492 else if (hadRecords)
1493 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1494 StringMessageArg(token));
1495 else if (gotValue) {
1496 if (!lookupRecords(value, sos.records))
1497 mgr_.message(EntityManagerMessages::fsiUnsupportedRecords,
1498 StringMessageArg(value));
1501 mgr_.message(EntityManagerMessages::fsiMissingValue,
1502 StringMessageArg(token));
1505 else if (matchKey(token, "SOIBASE")) {
1507 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1508 StringMessageArg(token));
1510 value.swap(sos.baseId);
1512 mgr_.message(EntityManagerMessages::fsiMissingValue,
1513 StringMessageArg(token));
1514 sos.baseId.resize(0);
1518 else if (lookupRecords(token, records)) {
1519 if (sos.storageManager->requiresCr())
1520 mgr_.message(EntityManagerMessages::fsiRecordsNotApplicable);
1521 else if (hadRecords)
1522 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1523 StringMessageArg(idCharset_.execToDesc("RECORDS")));
1525 sos.records = records;
1527 mgr_.message(EntityManagerMessages::fsiValueAsName,
1528 StringMessageArg(token));
1531 else if (matchKey(token, "NOTRACK")) {
1533 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1534 StringMessageArg(idCharset_.execToDesc("TRACKING")));
1538 mgr_.message(EntityManagerMessages::fsiValueAsName,
1539 StringMessageArg(token));
1542 else if (matchKey(token, "TRACK")) {
1544 mgr_.message(EntityManagerMessages::fsiDuplicateAttribute,
1545 StringMessageArg(idCharset_.execToDesc("TRACKING")));
1547 mgr_.message(EntityManagerMessages::fsiValueAsName,
1548 StringMessageArg(token));
1552 mgr_.message(gotValue
1553 ? EntityManagerMessages::fsiUnsupportedAttribute
1554 : EntityManagerMessages::fsiUnsupportedAttributeToken,
1555 StringMessageArg(token));
1557 if (hadBase && sos.baseId.size() > 0) {
1558 convertId(sos.baseId, smcrd, sos.storageManager);
1560 if (!sos.storageManager->transformNeutral(sos.baseId, fold, mgr_))
1561 sos.baseId.resize(0);
1564 if (!hadZapeof && hadRecords && sos.records == StorageObjectSpec::asis)
1569 FSIParser::RecordType FSIParser::recordTypeTable[] = {
1570 { "FIND", StorageObjectSpec::find },
1571 { "ASIS", StorageObjectSpec::asis },
1572 { "CR", StorageObjectSpec::cr },
1573 { "LF", StorageObjectSpec::lf },
1574 { "CRLF", StorageObjectSpec::crlf }
1577 const char *FSIParser::recordsName(StorageObjectSpec::Records records)
1579 for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
1580 if (records == recordTypeTable[i].value)
1581 return recordTypeTable[i].name;
1585 Boolean FSIParser::lookupRecords(const StringC &token,
1586 StorageObjectSpec::Records &result)
1588 for (size_t i = 0; i < SIZEOF(recordTypeTable); i++)
1589 if (matchKey(token, recordTypeTable[i].name)) {
1590 result = recordTypeTable[i].value;
1596 void FSIParser::setDefaults(StorageObjectSpec &sos)
1598 if (sos.storageManager->requiresCr())
1599 sos.records = StorageObjectSpec::cr;
1601 || (defSpec_ && defSpec_->records == StorageObjectSpec::asis))
1602 sos.records = StorageObjectSpec::asis;
1603 if (isNdata_ || (defSpec_ && !defSpec_->zapEof))
1605 if (defSpec_ && defSpec_->storageManager == sos.storageManager) {
1606 if (defSpec_->id.size())
1607 sos.baseId = defSpec_->id;
1609 sos.baseId = defSpec_->specId;
1610 sos.storageManager->resolveRelative(defSpec_->baseId,
1615 sos.codingSystem = sos.storageManager->requiredCodingSystem();
1616 if (sos.codingSystem)
1617 sos.zapEof = 0; // hack
1619 sos.codingSystem = em_->defaultCodingSystem_;
1621 const InputCodingSystem *id = 0;
1623 for (j = 0; j < em_->codingSystems_.size(); j++)
1624 if (em_->codingSystems_[j].ics->isIdentity()) {
1625 id = em_->codingSystems_[j].ics;
1628 if (id && id != em_->defaultCodingSystem_) {
1629 sos.codingSystem = id;
1630 sos.codingSystemName = em_->codingSystems_[j].name;
1633 else if (defSpec_) {
1634 sos.codingSystem = defSpec_->codingSystem;
1635 sos.codingSystemName = defSpec_->codingSystemName;
1640 Boolean FSIParser::parseAttribute(StringC &token, Boolean &gotValue,
1650 if (matchChar(c, '>'))
1652 if (matchChar(c, '"') || matchChar(c, '\'') || matchChar(c, '='))
1661 if (matchChar(c, '>') || matchChar(c, '='))
1668 if (!matchChar(c, '=')) {
1679 if (matchChar(c, '>') || matchChar(c, '='))
1681 if (matchChar(c, '"') || matchChar(c, '\'')) {
1689 if (matchChar(c, '\n'))
1691 else if (matchChar(c, '\r') || matchChar(c, '\t'))
1692 value += idCharset_.execToDesc(' ');
1706 if (matchChar(c, '>') || matchChar(c, '=')) {
1715 void FSIParser::uncharref(StringC &str)
1719 while (i < str.size()) {
1721 if (matchChar(str[i], '&')
1722 && i + 2 < str.size()
1723 && matchChar(str[i + 1], '#')
1724 && convertDigit(str[i + 2], digit)) {
1725 unsigned long val = digit;
1727 while (i < str.size() && convertDigit(str[i], digit)) {
1728 val = val*10 + digit;
1732 if (i < str.size() && matchChar(str[i], ';'))
1736 str[j++] = str[i++];
1741 Boolean FSIParser::convertId(StringC &id, Xchar smcrd,
1742 const StorageManager *sm)
1744 const CharsetInfo *smCharset = sm->idCharset();
1747 while (i < id.size()) {
1750 ISet<WideChar> wideSet;
1752 if (Xchar(id[i]) == smcrd
1753 && i + 1 < id.size()
1754 && convertDigit(id[i + 1], digit)) {
1757 while (i < id.size() && convertDigit(id[i], digit)) {
1758 val = val*10 + digit;
1762 if (i < id.size() && matchChar(id[i], ';'))
1765 else if (smCharset) {
1766 if (!idCharset_.descToUniv(id[i++], univ))
1768 if (univ == UnivCharsetDesc::rs)
1770 else if (univ == UnivCharsetDesc::re && sm->reString())
1771 newId += *sm->reString();
1772 else if (smCharset->univToDesc(univ, wide, wideSet) != 1
1774 return 0; // FIXME give error
1776 newId += Char(wide);
1785 ParsedSystemId:: ParsedSystemId()
1790 void unparseSoi(const StringC &soi,
1791 const CharsetInfo *idCharset,
1792 const CharsetInfo &resultCharset,
1794 Boolean &needSmcrd);
1796 void ParsedSystemId::unparse(const CharsetInfo &resultCharset,
1797 StringC &result) const
1799 size_t len = size();
1802 for (i = 0; i < maps.size(); i++) {
1803 if (maps[i].type == ParsedSystemIdMap::catalogDocument)
1804 result += resultCharset.execToDesc("<CATALOG>");
1805 else if (maps[i].type == ParsedSystemIdMap::catalogPublic) {
1806 result += resultCharset.execToDesc("<CATALOG PUBLIC=\"");
1807 result += maps[i].publicId;
1808 result += resultCharset.execToDesc("\">");
1811 for (i = 0; i < len; i++) {
1812 const StorageObjectSpec &sos = (*this)[i];
1813 result += resultCharset.execToDesc('<');
1814 result += resultCharset.execToDesc(sos.storageManager->type());
1816 result += resultCharset.execToDesc(" NOTRACK");
1818 result += resultCharset.execToDesc(" NOSEARCH");
1819 if (!sos.storageManager->requiresCr()
1820 && sos.records != StorageObjectSpec::find) {
1821 result += resultCharset.execToDesc(' ');
1822 result += resultCharset.execToDesc(FSIParser::recordsName(sos.records));
1824 if (sos.codingSystemName) {
1826 result += resultCharset.execToDesc(" NOZAPEOF");
1827 result += resultCharset.execToDesc(" BCTF=");
1828 result += resultCharset.execToDesc(sos.codingSystemName);
1830 Boolean needSmcrd = 0;
1831 if (sos.baseId.size() != 0) {
1832 result += resultCharset.execToDesc(" SOIBASE='");
1833 unparseSoi(sos.baseId,
1834 sos.storageManager->idCharset(),
1838 result += resultCharset.execToDesc('\'');
1841 unparseSoi(sos.specId,
1842 sos.storageManager->idCharset(),
1847 result += resultCharset.execToDesc(" SMCRD='^'");
1848 result += resultCharset.execToDesc('>');
1853 void unparseSoi(const StringC &soi,
1854 const CharsetInfo *idCharset,
1855 const CharsetInfo &resultCharset,
1860 for (size_t i = 0; i < soi.size(); i++) {
1862 sprintf(buf, "&#%lu;", (unsigned long)soi[i]);
1863 result += resultCharset.execToDesc(buf);
1867 for (size_t i = 0; i < soi.size(); i++) {
1870 ISet<WideChar> toSet;
1871 if (!idCharset->descToUniv(soi[i], univ)
1876 #ifndef MSDOS_FILENAMES
1877 || univ == 92 // backslash
1880 || resultCharset.univToDesc(univ, to, toSet) != 1) {
1883 sprintf(buf, "^%lu;", (unsigned long)soi[i]);
1884 result += resultCharset.execToDesc(buf);
1888 case 34: // double quote
1890 case 39: // apostrophe
1894 sprintf(buf, "&#%lu;", (unsigned long)to);
1895 result += resultCharset.execToDesc(buf);