2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: UTF8CodingSystem.C /main/1 1996/07/29 17:07:15 cde-hp $ */
24 // Copyright (c) 1994 James Clark
25 // See the file COPYING for copying permission.
31 #include "UTF8CodingSystem.h"
40 namespace SP_NAMESPACE {
44 // cmaskN is mask for first byte to test for N byte sequence
51 // cvalN is value of masked first byte of N byte sequence
58 // vmaskN is mask to get value from first byte in N byte sequence
64 // minN is minimum legal resulting value for N byte sequence
73 class UTF8Decoder : public Decoder {
76 size_t decode(Char *, const char *, size_t, const char **);
78 // value for encoding error
79 enum { invalid = 0xfffd };
83 class UTF8Encoder : public Encoder {
86 void output(const Char *, size_t, streambuf *);
89 Decoder *UTF8CodingSystem::makeDecoder() const
91 return new UTF8Decoder;
94 Encoder *UTF8CodingSystem::makeEncoder() const
96 return new UTF8Encoder;
100 UTF8Decoder::UTF8Decoder()
105 size_t UTF8Decoder::decode(Char *to, const char *s,
106 size_t slen, const char **result)
109 const unsigned char *us = (const unsigned char *)s;
117 if ((c0 & cmask1) == cval1) {
122 else if ((c0 & cmask2) == cval2) {
125 unsigned c1 = us[1] ^ 0x80;
128 unsigned c = ((c0 & vmask2) << 6) | c1;
135 else if ((c0 & cmask3) == cval3) {
138 unsigned c1 = us[1] ^ 0x80;
139 unsigned c2 = us[2] ^ 0x80;
140 if ((c1 | c2) & 0xc0)
142 unsigned c = ((((c0 & vmask3) << 6) | c1) << 6) | c2;
149 else if ((c0 & cmask4) == cval4) {
152 if (charMax < min5 - 1)
154 unsigned c1 = us[1] ^ 0x80;
155 unsigned c2 = us[2] ^ 0x80;
156 unsigned c3 = us[3] ^ 0x80;
157 if ((c1 | c2 | c3) & 0xc0)
160 unsigned long c = ((((c0 & vmask4) << 6) | c1) << 6) | c2;
169 else if ((c0 & cmask5) == cval5) {
172 unsigned c1 = us[1] ^ 0x80;
173 unsigned c2 = us[2] ^ 0x80;
174 unsigned c3 = us[3] ^ 0x80;
175 unsigned c4 = us[4] ^ 0x80;
176 if ((c1 | c2 | c3 | c4) & 0xc0)
178 if (charMax < min6 - 1)
181 unsigned long c = ((((c0 & vmask5) << 6) | c1) << 6) | c2;
182 c = (((c << 6) | c3) << 6) | c4;
190 else if ((c0 & cmask6) == cval6) {
193 unsigned c1 = us[1] ^ 0x80;
194 unsigned c2 = us[2] ^ 0x80;
195 unsigned c3 = us[3] ^ 0x80;
196 unsigned c4 = us[4] ^ 0x80;
197 unsigned c5 = us[5] ^ 0x80;
198 if ((c1 | c2 | c3 | c4 | c5) & 0xc0)
203 unsigned long c = ((((c0 & vmask6) << 6) | c1) << 6) | c2;
204 c = (((((c << 6) | c3) << 6) | c4) << 6) | c5;
223 if ((*us & 0xc0) != 0x80)
231 *result = (char *)us;
235 UTF8Encoder::UTF8Encoder()
239 // FIXME handle errors from streambuf::sputc
241 void UTF8Encoder::output(const Char *s, size_t n, streambuf *sb)
243 for (; n > 0; s++, n--) {
248 sb->sputc((c >> 6) | cval2);
249 sb->sputc((c & 0x3f) | 0x80);
252 sb->sputc((c >> 12) | cval3);
253 sb->sputc(((c >> 6) & 0x3f) | 0x80);
254 sb->sputc((c & 0x3f) | 0x80);
257 sb->sputc((c >> 18) | cval4);
258 sb->sputc(((c >> 12) & 0x3f) | 0x80);
259 sb->sputc(((c >> 6) & 0x3f) | 0x80);
260 sb->sputc((c & 0x3f) | 0x80);
263 sb->sputc((c >> 24) | cval5);
264 sb->sputc(((c >> 18) & 0x3f) | 0x80);
265 sb->sputc(((c >> 12) & 0x3f) | 0x80);
266 sb->sputc(((c >> 6) & 0x3f) | 0x80);
267 sb->sputc((c & 0x3f) | 0x80);
269 else if (c <= max6) {
270 sb->sputc((c >> 30) | cval6);
271 sb->sputc(((c >> 24) & 0x3f) | 0x80);
272 sb->sputc(((c >> 18) & 0x3f) | 0x80);
273 sb->sputc(((c >> 12) & 0x3f) | 0x80);
274 sb->sputc(((c >> 6) & 0x3f) | 0x80);
275 sb->sputc((c & 0x3f) | 0x80);
283 #else /* not SP_MULTI_BYTE */
286 static char non_empty_translation_unit; // sigh
289 #endif /* not SP_MULTI_BYTE */