2 * CDE - Common Desktop Environment
4 * Copyright (c) 1993-2012, The Open Group. All rights reserved.
6 * These libraries and programs are free software; you can
7 * redistribute them and/or modify them under the terms of the GNU
8 * Lesser General Public License as published by the Free Software
9 * Foundation; either version 2 of the License, or (at your option)
12 * These libraries and programs are distributed in the hope that
13 * they will be useful, but WITHOUT ANY WARRANTY; without even the
14 * implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR
15 * PURPOSE. See the GNU Lesser General Public License for more
18 * You should have received a copy of the GNU Lesser General Public
19 * License along with these librararies and programs; if not, write
20 * to the Free Software Foundation, Inc., 51 Franklin Street, Fifth
21 * Floor, Boston, MA 02110-1301 USA
23 /* $XConsortium: UTF8CodingSystem.C /main/1 1996/07/29 17:07:15 cde-hp $ */
24 // Copyright (c) 1994 James Clark
25 // See the file COPYING for copying permission.
31 #include "UTF8CodingSystem.h"
36 namespace SP_NAMESPACE {
40 // cmaskN is mask for first byte to test for N byte sequence
47 // cvalN is value of masked first byte of N byte sequence
54 // vmaskN is mask to get value from first byte in N byte sequence
60 // minN is minimum legal resulting value for N byte sequence
69 class UTF8Decoder : public Decoder {
72 size_t decode(Char *, const char *, size_t, const char **);
74 // value for encoding error
75 enum { invalid = 0xfffd };
79 class UTF8Encoder : public Encoder {
82 void output(const Char *, size_t, streambuf *);
85 Decoder *UTF8CodingSystem::makeDecoder() const
87 return new UTF8Decoder;
90 Encoder *UTF8CodingSystem::makeEncoder() const
92 return new UTF8Encoder;
96 UTF8Decoder::UTF8Decoder()
101 size_t UTF8Decoder::decode(Char *to, const char *s,
102 size_t slen, const char **result)
105 const unsigned char *us = (const unsigned char *)s;
113 if ((c0 & cmask1) == cval1) {
118 else if ((c0 & cmask2) == cval2) {
121 unsigned c1 = us[1] ^ 0x80;
124 unsigned c = ((c0 & vmask2) << 6) | c1;
131 else if ((c0 & cmask3) == cval3) {
134 unsigned c1 = us[1] ^ 0x80;
135 unsigned c2 = us[2] ^ 0x80;
136 if ((c1 | c2) & 0xc0)
138 unsigned c = ((((c0 & vmask3) << 6) | c1) << 6) | c2;
145 else if ((c0 & cmask4) == cval4) {
148 if (charMax < min5 - 1)
150 unsigned c1 = us[1] ^ 0x80;
151 unsigned c2 = us[2] ^ 0x80;
152 unsigned c3 = us[3] ^ 0x80;
153 if ((c1 | c2 | c3) & 0xc0)
156 unsigned long c = ((((c0 & vmask4) << 6) | c1) << 6) | c2;
165 else if ((c0 & cmask5) == cval5) {
168 unsigned c1 = us[1] ^ 0x80;
169 unsigned c2 = us[2] ^ 0x80;
170 unsigned c3 = us[3] ^ 0x80;
171 unsigned c4 = us[4] ^ 0x80;
172 if ((c1 | c2 | c3 | c4) & 0xc0)
174 if (charMax < min6 - 1)
177 unsigned long c = ((((c0 & vmask5) << 6) | c1) << 6) | c2;
178 c = (((c << 6) | c3) << 6) | c4;
186 else if ((c0 & cmask6) == cval6) {
189 unsigned c1 = us[1] ^ 0x80;
190 unsigned c2 = us[2] ^ 0x80;
191 unsigned c3 = us[3] ^ 0x80;
192 unsigned c4 = us[4] ^ 0x80;
193 unsigned c5 = us[5] ^ 0x80;
194 if ((c1 | c2 | c3 | c4 | c5) & 0xc0)
199 unsigned long c = ((((c0 & vmask6) << 6) | c1) << 6) | c2;
200 c = (((((c << 6) | c3) << 6) | c4) << 6) | c5;
219 if ((*us & 0xc0) != 0x80)
227 *result = (char *)us;
231 UTF8Encoder::UTF8Encoder()
235 // FIXME handle errors from streambuf::sputc
237 void UTF8Encoder::output(const Char *s, size_t n, streambuf *sb)
239 for (; n > 0; s++, n--) {
244 sb->sputc((c >> 6) | cval2);
245 sb->sputc((c & 0x3f) | 0x80);
248 sb->sputc((c >> 12) | cval3);
249 sb->sputc(((c >> 6) & 0x3f) | 0x80);
250 sb->sputc((c & 0x3f) | 0x80);
253 sb->sputc((c >> 18) | cval4);
254 sb->sputc(((c >> 12) & 0x3f) | 0x80);
255 sb->sputc(((c >> 6) & 0x3f) | 0x80);
256 sb->sputc((c & 0x3f) | 0x80);
259 sb->sputc((c >> 24) | cval5);
260 sb->sputc(((c >> 18) & 0x3f) | 0x80);
261 sb->sputc(((c >> 12) & 0x3f) | 0x80);
262 sb->sputc(((c >> 6) & 0x3f) | 0x80);
263 sb->sputc((c & 0x3f) | 0x80);
265 else if (c <= max6) {
266 sb->sputc((c >> 30) | cval6);
267 sb->sputc(((c >> 24) & 0x3f) | 0x80);
268 sb->sputc(((c >> 18) & 0x3f) | 0x80);
269 sb->sputc(((c >> 12) & 0x3f) | 0x80);
270 sb->sputc(((c >> 6) & 0x3f) | 0x80);
271 sb->sputc((c & 0x3f) | 0x80);
279 #else /* not SP_MULTI_BYTE */
282 static char non_empty_translation_unit; // sigh
285 #endif /* not SP_MULTI_BYTE */