luci-base: po2lmo: refactor code
authorJo-Philipp Wich <jo@mein.io>
Sat, 1 Feb 2020 13:29:23 +0000 (14:29 +0100)
committerJo-Philipp Wich <jo@mein.io>
Thu, 7 May 2020 17:40:49 +0000 (19:40 +0200)
Refactor the po2lmo conversion code to be more robust and to properly
handle input files without location comments.

Ref: https://github.com/openwrt/luci/commit/9c1bac4168d151c2a3a5a1f957adbd68b6ae0746#commitcomment-37036663
Signed-off-by: Jo-Philipp Wich <jo@mein.io>
(cherry picked from commit 2a1245a4970c2655fda10c51a6be4973f3e2ce84)

modules/luci-base/src/po2lmo.c

index 930ea12c7d3b026602b20d5c999339a93c74da5c..5f398c266e5c012e19294c93df8129182bc6da87 100644 (file)
@@ -32,8 +32,14 @@ static void usage(const char *name)
 
 static void print(const void *ptr, size_t size, size_t nmemb, FILE *stream)
 {
-       if( fwrite(ptr, size, nmemb, stream) == 0 )
-               die("Failed to write stdout");
+       int i;
+
+       if (fwrite(ptr, size, nmemb, stream) == 0)
+               die("Failed to write");
+
+       for (i = 0; i < ((4 - (size % 4)) % 4); i++)
+               if (fputc(0, stream))
+                       die("Failed to write");
 }
 
 static int extract_string(const char *src, char *dest, int len)
@@ -42,6 +48,9 @@ static int extract_string(const char *src, char *dest, int len)
        int esc = 0;
        int off = -1;
 
+       if (*src == '#')
+               return -1;
+
        for( pos = 0; (pos < strlen(src)) && (pos < len); pos++ )
        {
                if( (off == -1) && (src[pos] == '"') )
@@ -124,165 +133,184 @@ enum fieldtype {
        MSG_STR       = 4
 };
 
-int main(int argc, char *argv[])
+struct msg {
+       int plural_num;
+       char *ctxt;
+       char *id;
+       char *id_plural;
+       char *val[10];
+       size_t len;
+       char **cur;
+};
+
+static void *array = NULL;
+static int n_entries = 0;
+static size_t offset = 0;
+
+static void print_msg(struct msg *msg, FILE *out)
 {
-       char line[4096];
-       char key[4096];
-       char val[4096];
-       char tmp[4096];
-       int offset = 0;
-       int length = 0;
-       int n_entries = 0;
-       void *array = NULL;
-       lmo_entry_t *entry = NULL;
+       char key[4096], *field, *p;
        uint32_t key_id, val_id;
-       enum fieldtype type = UNSPEC, prev_type = UNSPEC;
-       int plural_num = -1, prev_plural_num = -1;
-       char *ctxt = NULL, *id = NULL, *p;
-       int eof, esc;
+       lmo_entry_t *entry;
+       size_t len;
+       int esc, i;
+
+       if (msg->id && msg->val[0]) {
+               for (i = 0; i <= msg->plural_num; i++) {
+                       if (!msg->val[i])
+                               continue;
+
+                       if (msg->ctxt && msg->id_plural)
+                               snprintf(key, sizeof(key), "%s\1%s\2%d", msg->ctxt, msg->id, i);
+                       else if (msg->ctxt)
+                               snprintf(key, sizeof(key), "%s\1%s", msg->ctxt, msg->id);
+                       else if (msg->id_plural)
+                               snprintf(key, sizeof(key), "%s\2%d", msg->id, i);
+                       else
+                               snprintf(key, sizeof(key), "%s", msg->id);
 
-       FILE *in;
-       FILE *out;
+                       key_id = sfh_hash(key, strlen(key));
+                       val_id = sfh_hash(msg->val[i], strlen(msg->val[i]));
 
-       if( (argc != 3) || ((in = fopen(argv[1], "r")) == NULL) || ((out = fopen(argv[2], "w")) == NULL) )
-               usage(argv[0]);
+                       if (key_id != val_id) {
+                               n_entries++;
+                               array = realloc(array, n_entries * sizeof(lmo_entry_t));
 
-       while (1) {
-               line[0] = 0;
-               eof = !fgets(line, sizeof(line), in);
+                               if (!array)
+                                       die("Out of memory");
 
-               if (!strncmp(line, "msgctxt \"", 9)) {
-                       free(ctxt);
-                       type = MSG_CTXT;
-                       ctxt = NULL;
-               }
-               else if (!strncmp(line, "msgid \"", 7)) {
-                       if (prev_type != MSG_CTXT) {
-                               free(ctxt);
-                               ctxt = NULL;
-                       }
+                               entry = (lmo_entry_t *)array + n_entries - 1;
+                               entry->key_id = key_id;
+                               entry->val_id = msg->plural_num + 1;
+                               entry->offset = offset;
+                               entry->length = strlen(msg->val[i]);
 
-                       free(id);
-                       type = MSG_ID;
-                       id = NULL;
-               }
-               else if (!strncmp(line, "msgid_plural \"", 14)) {
-                       type = MSG_ID_PLURAL;
-               }
-               else if (!strncmp(line, "msgstr \"", 8) || !strncmp(line, "msgstr[", 7)) {
-                       type = MSG_STR;
+                               len = entry->length + ((4 - (entry->length % 4)) % 4);
 
-                       if (line[6] == '[')
-                               plural_num = strtoul(line + 7, NULL, 10);
-                       else
-                               plural_num = -1;
+                               print(msg->val[i], entry->length, 1, out);
+                               offset += len;
+                       }
                }
+       }
+       else if (msg->val[0]) {
+               for (field = msg->val[0], p = field, esc = 0; *p; p++) {
+                       if (esc) {
+                               if (*p == 'n') {
+                                       p[-1] = 0;
 
-               if (type != prev_type || plural_num != prev_plural_num || eof) {
-                       switch (prev_type) {
-                       case MSG_CTXT:
-                               ctxt = strdup(val);
-                               break;
-
-                       case MSG_ID:
-                               id = strdup(val);
-                               break;
+                                       if (!strncasecmp(field, "Plural-Forms: ", 14)) {
+                                               field += 14;
 
-                       case MSG_STR:
-                               if (id && id[0] && val[0]) {
-                                       if (ctxt && ctxt[0] && prev_plural_num > -1)
-                                               snprintf(key, sizeof(key), "%s\1%s\2%d", ctxt, id, prev_plural_num);
-                                       else if (ctxt && ctxt[0])
-                                               snprintf(key, sizeof(key), "%s\1%s", ctxt, id);
-                                       else if (prev_plural_num > -1)
-                                               snprintf(key, sizeof(key), "%s\2%d", id, prev_plural_num);
-                                       else
-                                               snprintf(key, sizeof(key), "%s", id);
-
-                                       key_id = sfh_hash(key, strlen(key));
-                                       val_id = sfh_hash(val, strlen(val));
-
-                                       if (key_id != val_id) {
                                                n_entries++;
                                                array = realloc(array, n_entries * sizeof(lmo_entry_t));
-                                               entry = (lmo_entry_t *)array + n_entries - 1;
 
                                                if (!array)
                                                        die("Out of memory");
 
-                                               entry->key_id = key_id;
-                                               entry->val_id = prev_plural_num + 1;
+                                               entry = (lmo_entry_t *)array + n_entries - 1;
+                                               entry->key_id = 0;
+                                               entry->val_id = 0;
                                                entry->offset = offset;
-                                               entry->length = strlen(val);
+                                               entry->length = strlen(field);
 
-                                               length = strlen(val) + ((4 - (strlen(val) % 4)) % 4);
+                                               len = entry->length + ((4 - (entry->length % 4)) % 4);
 
-                                               print(val, length, 1, out);
-                                               offset += length;
-                                       }
-                               }
-                               else if (id && id[0] == 0) {
-                                       for (id = val, p = val; *p; p++) {
-                                               if (esc) {
-                                                       if (*p == 'n') {
-                                                               p[-1] = 0;
-
-                                                               if (!strncasecmp(id, "Plural-Forms: ", 14)) {
-                                                                       id += 14;
-
-                                                                       n_entries++;
-                                                                       array = realloc(array, n_entries * sizeof(lmo_entry_t));
-                                                                       entry = (lmo_entry_t *)array + n_entries - 1;
-
-                                                                       if (!array)
-                                                                               die("Out of memory");
-
-                                                                       entry->key_id = 0;
-                                                                       entry->val_id = 0;
-                                                                       entry->offset = offset;
-                                                                       entry->length = strlen(id);
-
-                                                                       length = strlen(id) + ((4 - (strlen(id) % 4)) % 4);
-
-                                                                       print(id, length, 1, out);
-                                                                       offset += length;
-                                                               }
-                                                       }
-
-                                                       id = p + 1;
-                                                       esc = 0;
-                                               }
-                                               else if (*p == '\\') {
-                                                       esc = 1;
-                                               }
+                                               print(field, entry->length, 1, out);
+                                               offset += len;
+                                               break;
                                        }
 
-                                       id = NULL;
+                                       field = p + 1;
                                }
 
-                               break;
-
-                       default:
-                               break;
+                               esc = 0;
                        }
+                       else if (*p == '\\') {
+                               esc = 1;
+                       }
+               }
+       }
+
+       free(msg->ctxt);
+       free(msg->id);
+       free(msg->id_plural);
+
+       for (i = 0; i < sizeof(msg->val) / sizeof(msg->val[0]); i++)
+               free(msg->val[i]);
+
+       memset(msg, 0, sizeof(*msg));
+}
+
+int main(int argc, char *argv[])
+{
+       struct msg msg = { .plural_num = -1 };
+       char line[4096], tmp[4096];
+       FILE *in, *out;
+       ssize_t len;
+       int eof;
+
+       if ((argc != 3) || ((in = fopen(argv[1], "r")) == NULL) || ((out = fopen(argv[2], "w")) == NULL))
+               usage(argv[0]);
+
+       while (1) {
+               line[0] = 0;
+               eof = !fgets(line, sizeof(line), in);
 
-                       val[0] = 0;
-                       prev_type = type;
-                       prev_plural_num = plural_num;
+               if (!strncmp(line, "msgctxt \"", 9)) {
+                       if (msg.id || msg.val[0])
+                               print_msg(&msg, out);
+                       else
+                               free(msg.ctxt);
+
+                       msg.ctxt = NULL;
+                       msg.cur = &msg.ctxt;
+                       msg.len = 0;
+               }
+               else if (eof || !strncmp(line, "msgid \"", 7)) {
+                       if (msg.id || msg.val[0])
+                               print_msg(&msg, out);
+                       else
+                               free(msg.id);
+
+                       msg.id = NULL;
+                       msg.cur = &msg.id;
+                       msg.len = 0;
+               }
+               else if (!strncmp(line, "msgid_plural \"", 14)) {
+                       free(msg.id_plural);
+                       msg.id_plural = NULL;
+                       msg.cur = &msg.id_plural;
+                       msg.len = 0;
+               }
+               else if (!strncmp(line, "msgstr \"", 8) || !strncmp(line, "msgstr[", 7)) {
+                       if (line[6] == '[')
+                               msg.plural_num = strtoul(line + 7, NULL, 10);
+                       else
+                               msg.plural_num = 0;
+
+                       if (msg.plural_num >= 10)
+                               die("Too many plural forms");
+
+                       free(msg.val[msg.plural_num]);
+                       msg.val[msg.plural_num] = NULL;
+                       msg.cur = &msg.val[msg.plural_num];
+                       msg.len = 0;
                }
 
                if (eof)
                        break;
 
-               if (prev_type != UNSPEC) {
-                       switch (extract_string(line, tmp, sizeof(tmp))) {
-                       case -1:
-                               type = UNSPEC;
-                               plural_num = -1;
-                               break;
+               if (msg.cur) {
+                       len = extract_string(line, tmp, sizeof(tmp));
+
+                       if (len > 0) {
+                               *msg.cur = realloc(*msg.cur, msg.len + len + 1);
+
+                               if (!*msg.cur)
+                                       die("Out of memory");
 
-                       default:
-                               strcat(val, tmp);
+                               memcpy(*msg.cur + msg.len, tmp, len + 1);
+                               msg.len += len;
                        }
                }
        }