Linux-libre 4.15.7-gnu
[librecmc/linux-libre.git] / fs / hfsplus / unicode.c
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  *  linux/fs/hfsplus/unicode.c
4  *
5  * Copyright (C) 2001
6  * Brad Boyer (flar@allandria.com)
7  * (C) 2003 Ardis Technologies <roman@ardistech.com>
8  *
9  * Handler routines for unicode strings
10  */
11
12 #include <linux/types.h>
13 #include <linux/nls.h>
14 #include "hfsplus_fs.h"
15 #include "hfsplus_raw.h"
16
17 /* Fold the case of a unicode char, given the 16 bit value */
18 /* Returns folded char, or 0 if ignorable */
19 static inline u16 case_fold(u16 c)
20 {
21         u16 tmp;
22
23         tmp = hfsplus_case_fold_table[c >> 8];
24         if (tmp)
25                 tmp = hfsplus_case_fold_table[tmp + (c & 0xff)];
26         else
27                 tmp = c;
28         return tmp;
29 }
30
31 /* Compare unicode strings, return values like normal strcmp */
32 int hfsplus_strcasecmp(const struct hfsplus_unistr *s1,
33                        const struct hfsplus_unistr *s2)
34 {
35         u16 len1, len2, c1, c2;
36         const hfsplus_unichr *p1, *p2;
37
38         len1 = be16_to_cpu(s1->length);
39         len2 = be16_to_cpu(s2->length);
40         p1 = s1->unicode;
41         p2 = s2->unicode;
42
43         while (1) {
44                 c1 = c2 = 0;
45
46                 while (len1 && !c1) {
47                         c1 = case_fold(be16_to_cpu(*p1));
48                         p1++;
49                         len1--;
50                 }
51                 while (len2 && !c2) {
52                         c2 = case_fold(be16_to_cpu(*p2));
53                         p2++;
54                         len2--;
55                 }
56
57                 if (c1 != c2)
58                         return (c1 < c2) ? -1 : 1;
59                 if (!c1 && !c2)
60                         return 0;
61         }
62 }
63
64 /* Compare names as a sequence of 16-bit unsigned integers */
65 int hfsplus_strcmp(const struct hfsplus_unistr *s1,
66                    const struct hfsplus_unistr *s2)
67 {
68         u16 len1, len2, c1, c2;
69         const hfsplus_unichr *p1, *p2;
70         int len;
71
72         len1 = be16_to_cpu(s1->length);
73         len2 = be16_to_cpu(s2->length);
74         p1 = s1->unicode;
75         p2 = s2->unicode;
76
77         for (len = min(len1, len2); len > 0; len--) {
78                 c1 = be16_to_cpu(*p1);
79                 c2 = be16_to_cpu(*p2);
80                 if (c1 != c2)
81                         return c1 < c2 ? -1 : 1;
82                 p1++;
83                 p2++;
84         }
85
86         return len1 < len2 ? -1 :
87                len1 > len2 ? 1 : 0;
88 }
89
90
91 #define Hangul_SBase    0xac00
92 #define Hangul_LBase    0x1100
93 #define Hangul_VBase    0x1161
94 #define Hangul_TBase    0x11a7
95 #define Hangul_SCount   11172
96 #define Hangul_LCount   19
97 #define Hangul_VCount   21
98 #define Hangul_TCount   28
99 #define Hangul_NCount   (Hangul_VCount * Hangul_TCount)
100
101
102 static u16 *hfsplus_compose_lookup(u16 *p, u16 cc)
103 {
104         int i, s, e;
105
106         s = 1;
107         e = p[1];
108         if (!e || cc < p[s * 2] || cc > p[e * 2])
109                 return NULL;
110         do {
111                 i = (s + e) / 2;
112                 if (cc > p[i * 2])
113                         s = i + 1;
114                 else if (cc < p[i * 2])
115                         e = i - 1;
116                 else
117                         return hfsplus_compose_table + p[i * 2 + 1];
118         } while (s <= e);
119         return NULL;
120 }
121
122 int hfsplus_uni2asc(struct super_block *sb,
123                 const struct hfsplus_unistr *ustr,
124                 char *astr, int *len_p)
125 {
126         const hfsplus_unichr *ip;
127         struct nls_table *nls = HFSPLUS_SB(sb)->nls;
128         u8 *op;
129         u16 cc, c0, c1;
130         u16 *ce1, *ce2;
131         int i, len, ustrlen, res, compose;
132
133         op = astr;
134         ip = ustr->unicode;
135         ustrlen = be16_to_cpu(ustr->length);
136         len = *len_p;
137         ce1 = NULL;
138         compose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
139
140         while (ustrlen > 0) {
141                 c0 = be16_to_cpu(*ip++);
142                 ustrlen--;
143                 /* search for single decomposed char */
144                 if (likely(compose))
145                         ce1 = hfsplus_compose_lookup(hfsplus_compose_table, c0);
146                 if (ce1)
147                         cc = ce1[0];
148                 else
149                         cc = 0;
150                 if (cc) {
151                         /* start of a possibly decomposed Hangul char */
152                         if (cc != 0xffff)
153                                 goto done;
154                         if (!ustrlen)
155                                 goto same;
156                         c1 = be16_to_cpu(*ip) - Hangul_VBase;
157                         if (c1 < Hangul_VCount) {
158                                 /* compose the Hangul char */
159                                 cc = (c0 - Hangul_LBase) * Hangul_VCount;
160                                 cc = (cc + c1) * Hangul_TCount;
161                                 cc += Hangul_SBase;
162                                 ip++;
163                                 ustrlen--;
164                                 if (!ustrlen)
165                                         goto done;
166                                 c1 = be16_to_cpu(*ip) - Hangul_TBase;
167                                 if (c1 > 0 && c1 < Hangul_TCount) {
168                                         cc += c1;
169                                         ip++;
170                                         ustrlen--;
171                                 }
172                                 goto done;
173                         }
174                 }
175                 while (1) {
176                         /* main loop for common case of not composed chars */
177                         if (!ustrlen)
178                                 goto same;
179                         c1 = be16_to_cpu(*ip);
180                         if (likely(compose))
181                                 ce1 = hfsplus_compose_lookup(
182                                         hfsplus_compose_table, c1);
183                         if (ce1)
184                                 break;
185                         switch (c0) {
186                         case 0:
187                                 c0 = 0x2400;
188                                 break;
189                         case '/':
190                                 c0 = ':';
191                                 break;
192                         }
193                         res = nls->uni2char(c0, op, len);
194                         if (res < 0) {
195                                 if (res == -ENAMETOOLONG)
196                                         goto out;
197                                 *op = '?';
198                                 res = 1;
199                         }
200                         op += res;
201                         len -= res;
202                         c0 = c1;
203                         ip++;
204                         ustrlen--;
205                 }
206                 ce2 = hfsplus_compose_lookup(ce1, c0);
207                 if (ce2) {
208                         i = 1;
209                         while (i < ustrlen) {
210                                 ce1 = hfsplus_compose_lookup(ce2,
211                                         be16_to_cpu(ip[i]));
212                                 if (!ce1)
213                                         break;
214                                 i++;
215                                 ce2 = ce1;
216                         }
217                         cc = ce2[0];
218                         if (cc) {
219                                 ip += i;
220                                 ustrlen -= i;
221                                 goto done;
222                         }
223                 }
224 same:
225                 switch (c0) {
226                 case 0:
227                         cc = 0x2400;
228                         break;
229                 case '/':
230                         cc = ':';
231                         break;
232                 default:
233                         cc = c0;
234                 }
235 done:
236                 res = nls->uni2char(cc, op, len);
237                 if (res < 0) {
238                         if (res == -ENAMETOOLONG)
239                                 goto out;
240                         *op = '?';
241                         res = 1;
242                 }
243                 op += res;
244                 len -= res;
245         }
246         res = 0;
247 out:
248         *len_p = (char *)op - astr;
249         return res;
250 }
251
252 /*
253  * Convert one or more ASCII characters into a single unicode character.
254  * Returns the number of ASCII characters corresponding to the unicode char.
255  */
256 static inline int asc2unichar(struct super_block *sb, const char *astr, int len,
257                               wchar_t *uc)
258 {
259         int size = HFSPLUS_SB(sb)->nls->char2uni(astr, len, uc);
260         if (size <= 0) {
261                 *uc = '?';
262                 size = 1;
263         }
264         switch (*uc) {
265         case 0x2400:
266                 *uc = 0;
267                 break;
268         case ':':
269                 *uc = '/';
270                 break;
271         }
272         return size;
273 }
274
275 /* Decomposes a single unicode character. */
276 static inline u16 *decompose_unichar(wchar_t uc, int *size)
277 {
278         int off;
279
280         off = hfsplus_decompose_table[(uc >> 12) & 0xf];
281         if (off == 0 || off == 0xffff)
282                 return NULL;
283
284         off = hfsplus_decompose_table[off + ((uc >> 8) & 0xf)];
285         if (!off)
286                 return NULL;
287
288         off = hfsplus_decompose_table[off + ((uc >> 4) & 0xf)];
289         if (!off)
290                 return NULL;
291
292         off = hfsplus_decompose_table[off + (uc & 0xf)];
293         *size = off & 3;
294         if (*size == 0)
295                 return NULL;
296         return hfsplus_decompose_table + (off / 4);
297 }
298
299 int hfsplus_asc2uni(struct super_block *sb,
300                     struct hfsplus_unistr *ustr, int max_unistr_len,
301                     const char *astr, int len)
302 {
303         int size, dsize, decompose;
304         u16 *dstr, outlen = 0;
305         wchar_t c;
306
307         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
308         while (outlen < max_unistr_len && len > 0) {
309                 size = asc2unichar(sb, astr, len, &c);
310
311                 if (decompose)
312                         dstr = decompose_unichar(c, &dsize);
313                 else
314                         dstr = NULL;
315                 if (dstr) {
316                         if (outlen + dsize > max_unistr_len)
317                                 break;
318                         do {
319                                 ustr->unicode[outlen++] = cpu_to_be16(*dstr++);
320                         } while (--dsize > 0);
321                 } else
322                         ustr->unicode[outlen++] = cpu_to_be16(c);
323
324                 astr += size;
325                 len -= size;
326         }
327         ustr->length = cpu_to_be16(outlen);
328         if (len > 0)
329                 return -ENAMETOOLONG;
330         return 0;
331 }
332
333 /*
334  * Hash a string to an integer as appropriate for the HFS+ filesystem.
335  * Composed unicode characters are decomposed and case-folding is performed
336  * if the appropriate bits are (un)set on the superblock.
337  */
338 int hfsplus_hash_dentry(const struct dentry *dentry, struct qstr *str)
339 {
340         struct super_block *sb = dentry->d_sb;
341         const char *astr;
342         const u16 *dstr;
343         int casefold, decompose, size, len;
344         unsigned long hash;
345         wchar_t c;
346         u16 c2;
347
348         casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
349         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
350         hash = init_name_hash(dentry);
351         astr = str->name;
352         len = str->len;
353         while (len > 0) {
354                 int uninitialized_var(dsize);
355                 size = asc2unichar(sb, astr, len, &c);
356                 astr += size;
357                 len -= size;
358
359                 if (decompose)
360                         dstr = decompose_unichar(c, &dsize);
361                 else
362                         dstr = NULL;
363                 if (dstr) {
364                         do {
365                                 c2 = *dstr++;
366                                 if (casefold)
367                                         c2 = case_fold(c2);
368                                 if (!casefold || c2)
369                                         hash = partial_name_hash(c2, hash);
370                         } while (--dsize > 0);
371                 } else {
372                         c2 = c;
373                         if (casefold)
374                                 c2 = case_fold(c2);
375                         if (!casefold || c2)
376                                 hash = partial_name_hash(c2, hash);
377                 }
378         }
379         str->hash = end_name_hash(hash);
380
381         return 0;
382 }
383
384 /*
385  * Compare strings with HFS+ filename ordering.
386  * Composed unicode characters are decomposed and case-folding is performed
387  * if the appropriate bits are (un)set on the superblock.
388  */
389 int hfsplus_compare_dentry(const struct dentry *dentry,
390                 unsigned int len, const char *str, const struct qstr *name)
391 {
392         struct super_block *sb = dentry->d_sb;
393         int casefold, decompose, size;
394         int dsize1, dsize2, len1, len2;
395         const u16 *dstr1, *dstr2;
396         const char *astr1, *astr2;
397         u16 c1, c2;
398         wchar_t c;
399
400         casefold = test_bit(HFSPLUS_SB_CASEFOLD, &HFSPLUS_SB(sb)->flags);
401         decompose = !test_bit(HFSPLUS_SB_NODECOMPOSE, &HFSPLUS_SB(sb)->flags);
402         astr1 = str;
403         len1 = len;
404         astr2 = name->name;
405         len2 = name->len;
406         dsize1 = dsize2 = 0;
407         dstr1 = dstr2 = NULL;
408
409         while (len1 > 0 && len2 > 0) {
410                 if (!dsize1) {
411                         size = asc2unichar(sb, astr1, len1, &c);
412                         astr1 += size;
413                         len1 -= size;
414
415                         if (decompose)
416                                 dstr1 = decompose_unichar(c, &dsize1);
417                         if (!decompose || !dstr1) {
418                                 c1 = c;
419                                 dstr1 = &c1;
420                                 dsize1 = 1;
421                         }
422                 }
423
424                 if (!dsize2) {
425                         size = asc2unichar(sb, astr2, len2, &c);
426                         astr2 += size;
427                         len2 -= size;
428
429                         if (decompose)
430                                 dstr2 = decompose_unichar(c, &dsize2);
431                         if (!decompose || !dstr2) {
432                                 c2 = c;
433                                 dstr2 = &c2;
434                                 dsize2 = 1;
435                         }
436                 }
437
438                 c1 = *dstr1;
439                 c2 = *dstr2;
440                 if (casefold) {
441                         c1 = case_fold(c1);
442                         if (!c1) {
443                                 dstr1++;
444                                 dsize1--;
445                                 continue;
446                         }
447                         c2 = case_fold(c2);
448                         if (!c2) {
449                                 dstr2++;
450                                 dsize2--;
451                                 continue;
452                         }
453                 }
454                 if (c1 < c2)
455                         return -1;
456                 else if (c1 > c2)
457                         return 1;
458
459                 dstr1++;
460                 dsize1--;
461                 dstr2++;
462                 dsize2--;
463         }
464
465         if (len1 < len2)
466                 return -1;
467         if (len1 > len2)
468                 return 1;
469         return 0;
470 }