luci-base: handle more po format features in po2lmo
[oweals/luci.git] / modules / luci-base / src / po2lmo.c
1 /*
2  * lmo - Lua Machine Objects - PO to LMO conversion tool
3  *
4  *   Copyright (C) 2009-2012 Jo-Philipp Wich <jow@openwrt.org>
5  *
6  *  Licensed under the Apache License, Version 2.0 (the "License");
7  *  you may not use this file except in compliance with the License.
8  *  You may obtain a copy of the License at
9  *
10  *      http://www.apache.org/licenses/LICENSE-2.0
11  *
12  *  Unless required by applicable law or agreed to in writing, software
13  *  distributed under the License is distributed on an "AS IS" BASIS,
14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  *  See the License for the specific language governing permissions and
16  *  limitations under the License.
17  */
18
19 #include "template_lmo.h"
20
21 static void die(const char *msg)
22 {
23         fprintf(stderr, "Error: %s\n", msg);
24         exit(1);
25 }
26
27 static void usage(const char *name)
28 {
29         fprintf(stderr, "Usage: %s input.po output.lmo\n", name);
30         exit(1);
31 }
32
33 static void print(const void *ptr, size_t size, size_t nmemb, FILE *stream)
34 {
35         if( fwrite(ptr, size, nmemb, stream) == 0 )
36                 die("Failed to write stdout");
37 }
38
39 static int extract_string(const char *src, char *dest, int len)
40 {
41         int pos = 0;
42         int esc = 0;
43         int off = -1;
44
45         for( pos = 0; (pos < strlen(src)) && (pos < len); pos++ )
46         {
47                 if( (off == -1) && (src[pos] == '"') )
48                 {
49                         off = pos + 1;
50                 }
51                 else if( off >= 0 )
52                 {
53                         if( esc == 1 )
54                         {
55                                 switch (src[pos])
56                                 {
57                                 case '"':
58                                 case '\\':
59                                         off++;
60                                         break;
61                                 }
62                                 dest[pos-off] = src[pos];
63                                 esc = 0;
64                         }
65                         else if( src[pos] == '\\' )
66                         {
67                                 dest[pos-off] = src[pos];
68                                 esc = 1;
69                         }
70                         else if( src[pos] != '"' )
71                         {
72                                 dest[pos-off] = src[pos];
73                         }
74                         else
75                         {
76                                 dest[pos-off] = '\0';
77                                 break;
78                         }
79                 }
80         }
81
82         return (off > -1) ? strlen(dest) : -1;
83 }
84
85 static int cmp_index(const void *a, const void *b)
86 {
87         uint32_t x = ((const lmo_entry_t *)a)->key_id;
88         uint32_t y = ((const lmo_entry_t *)b)->key_id;
89
90         if (x < y)
91                 return -1;
92         else if (x > y)
93                 return 1;
94
95         return 0;
96 }
97
98 static void print_uint32(uint32_t x, FILE *out)
99 {
100         uint32_t y = htonl(x);
101         print(&y, sizeof(uint32_t), 1, out);
102 }
103
104 static void print_index(void *array, int n, FILE *out)
105 {
106         lmo_entry_t *e;
107
108         qsort(array, n, sizeof(*e), cmp_index);
109
110         for (e = array; n > 0; n--, e++)
111         {
112                 print_uint32(e->key_id, out);
113                 print_uint32(e->val_id, out);
114                 print_uint32(e->offset, out);
115                 print_uint32(e->length, out);
116         }
117 }
118
119 enum fieldtype {
120         UNSPEC        = 0,
121         MSG_CTXT      = 1,
122         MSG_ID        = 2,
123         MSG_ID_PLURAL = 3,
124         MSG_STR       = 4
125 };
126
127 int main(int argc, char *argv[])
128 {
129         char line[4096];
130         char key[4096];
131         char val[4096];
132         char tmp[4096];
133         int offset = 0;
134         int length = 0;
135         int n_entries = 0;
136         void *array = NULL;
137         lmo_entry_t *entry = NULL;
138         uint32_t key_id, val_id;
139         enum fieldtype type = UNSPEC, prev_type = UNSPEC;
140         int plural_num = -1, prev_plural_num = -1;
141         char *ctxt = NULL, *id = NULL, *p;
142         int eof, esc;
143
144         FILE *in;
145         FILE *out;
146
147         if( (argc != 3) || ((in = fopen(argv[1], "r")) == NULL) || ((out = fopen(argv[2], "w")) == NULL) )
148                 usage(argv[0]);
149
150         while (1) {
151                 line[0] = 0;
152                 eof = !fgets(line, sizeof(line), in);
153
154                 if (!strncmp(line, "msgctxt \"", 9)) {
155                         free(ctxt);
156                         type = MSG_CTXT;
157                         ctxt = NULL;
158                 }
159                 else if (!strncmp(line, "msgid \"", 7)) {
160                         if (prev_type != MSG_CTXT) {
161                                 free(ctxt);
162                                 ctxt = NULL;
163                         }
164
165                         free(id);
166                         type = MSG_ID;
167                         id = NULL;
168                 }
169                 else if (!strncmp(line, "msgid_plural \"", 14)) {
170                         type = MSG_ID_PLURAL;
171                 }
172                 else if (!strncmp(line, "msgstr \"", 8) || !strncmp(line, "msgstr[", 7)) {
173                         type = MSG_STR;
174
175                         if (line[6] == '[')
176                                 plural_num = strtoul(line + 7, NULL, 10);
177                         else
178                                 plural_num = -1;
179                 }
180
181                 if (type != prev_type || plural_num != prev_plural_num || eof) {
182                         switch (prev_type) {
183                         case MSG_CTXT:
184                                 ctxt = strdup(val);
185                                 break;
186
187                         case MSG_ID:
188                                 id = strdup(val);
189                                 break;
190
191                         case MSG_STR:
192                                 if (id && id[0] && val[0]) {
193                                         if (ctxt && ctxt[0] && prev_plural_num > -1)
194                                                 snprintf(key, sizeof(key), "%s\1%s\2%d", ctxt, id, prev_plural_num);
195                                         else if (ctxt && ctxt[0])
196                                                 snprintf(key, sizeof(key), "%s\1%s", ctxt, id);
197                                         else if (prev_plural_num > -1)
198                                                 snprintf(key, sizeof(key), "%s\2%d", id, prev_plural_num);
199                                         else
200                                                 snprintf(key, sizeof(key), "%s", id);
201
202                                         key_id = sfh_hash(key, strlen(key));
203                                         val_id = sfh_hash(val, strlen(val));
204
205                                         if (key_id != val_id) {
206                                                 n_entries++;
207                                                 array = realloc(array, n_entries * sizeof(lmo_entry_t));
208                                                 entry = (lmo_entry_t *)array + n_entries - 1;
209
210                                                 if (!array)
211                                                         die("Out of memory");
212
213                                                 entry->key_id = key_id;
214                                                 entry->val_id = prev_plural_num + 1;
215                                                 entry->offset = offset;
216                                                 entry->length = strlen(val);
217
218                                                 length = strlen(val) + ((4 - (strlen(val) % 4)) % 4);
219
220                                                 print(val, length, 1, out);
221                                                 offset += length;
222                                         }
223                                 }
224                                 else if (id && id[0] == 0) {
225                                         for (id = val, p = val; *p; p++) {
226                                                 if (esc) {
227                                                         if (*p == 'n') {
228                                                                 p[-1] = 0;
229
230                                                                 if (!strncasecmp(id, "Plural-Forms: ", 14)) {
231                                                                         id += 14;
232
233                                                                         n_entries++;
234                                                                         array = realloc(array, n_entries * sizeof(lmo_entry_t));
235                                                                         entry = (lmo_entry_t *)array + n_entries - 1;
236
237                                                                         if (!array)
238                                                                                 die("Out of memory");
239
240                                                                         entry->key_id = 0;
241                                                                         entry->val_id = 0;
242                                                                         entry->offset = offset;
243                                                                         entry->length = strlen(id);
244
245                                                                         length = strlen(id) + ((4 - (strlen(id) % 4)) % 4);
246
247                                                                         print(id, length, 1, out);
248                                                                         offset += length;
249                                                                 }
250                                                         }
251
252                                                         id = p + 1;
253                                                         esc = 0;
254                                                 }
255                                                 else if (*p == '\\') {
256                                                         esc = 1;
257                                                 }
258                                         }
259
260                                         id = NULL;
261                                 }
262
263                                 break;
264
265                         default:
266                                 break;
267                         }
268
269                         val[0] = 0;
270                         prev_type = type;
271                         prev_plural_num = plural_num;
272                 }
273
274                 if (eof)
275                         break;
276
277                 if (prev_type != UNSPEC) {
278                         switch (extract_string(line, tmp, sizeof(tmp))) {
279                         case -1:
280                                 type = UNSPEC;
281                                 plural_num = -1;
282                                 break;
283
284                         default:
285                                 strcat(val, tmp);
286                         }
287                 }
288         }
289
290         print_index(array, n_entries, out);
291
292         if (offset > 0) {
293                 print_uint32(offset, out);
294                 fsync(fileno(out));
295                 fclose(out);
296         }
297         else {
298                 fclose(out);
299                 unlink(argv[2]);
300         }
301
302         fclose(in);
303         return(0);
304 }