Cleanup parsing of packages.
authorgraham.gower <graham.gower@e8e0d7a0-c8d9-11dd-a880-a1081c7ac358>
Wed, 11 Nov 2009 03:18:23 +0000 (03:18 +0000)
committergraham.gower <graham.gower@e8e0d7a0-c8d9-11dd-a880-a1081c7ac358>
Wed, 11 Nov 2009 03:18:23 +0000 (03:18 +0000)
This diff is loosely based on a patch by Jo-Philipp Wich <google@wwsnet.net>
posted in the bugtracker (Issue #24). Peak memory consumption while parsing
package lists is dramatically reduced.

Thanks to Camille Moncelier for fixing problems after reallocing for long lines.

git-svn-id: http://opkg.googlecode.com/svn/trunk@283 e8e0d7a0-c8d9-11dd-a880-a1081c7ac358

libopkg/opkg_utils.c
libopkg/opkg_utils.h
libopkg/pkg.c
libopkg/pkg_hash.c
libopkg/pkg_parse.c
libopkg/pkg_parse.h

index e293197..d40999f 100644 (file)
@@ -47,58 +47,6 @@ long unsigned int get_available_blocks(char * filesystem)
     return 0;
 }
 
-char **read_raw_pkgs_from_file(const char *file_name)
-{
-     FILE *fp; 
-     char **ret;
-    
-     if(!(fp = fopen(file_name, "r"))){
-         fprintf(stderr, "can't get %s open for read\n", file_name);
-         return NULL;
-     }
-
-     ret = read_raw_pkgs_from_stream(fp);
-
-     fclose(fp);
-
-     return ret;
-}
-
-char **read_raw_pkgs_from_stream(FILE *fp)
-{    
-     char **raw = NULL, *buf, *scout;
-     int count = 0;
-     size_t size = 512;
-     
-     buf = xcalloc(1, size);
-
-     while (fgets(buf, size, fp)) {
-         while (strlen (buf) == (size - 1)
-                && buf[size-2] != '\n') {
-              size_t o = size - 1;
-              size *= 2;
-              buf = xrealloc (buf, size);
-              if (fgets (buf + o, size - o, fp) == NULL)
-                   break;
-         }
-         
-         if(!(count % 50))
-              raw = xrealloc(raw, (count + 50) * sizeof(char *));
-       
-         if((scout = strchr(buf, '\n')))
-              *scout = '\0';
-
-         raw[count++] = xstrdup(buf);
-     }
-    
-     raw = xrealloc(raw, (count + 1) * sizeof(char *));
-     raw[count] = NULL;
-
-     free (buf);
-    
-     return raw;
-}
-
 /* something to remove whitespace, a hash pooper */
 char *trim_alloc(const char *src)
 {
index fdba37d..faa504b 100644 (file)
@@ -26,8 +26,6 @@ void free_error_list(void);
 void print_error_list(void);
 
 long unsigned int get_available_blocks(char * filesystem);
-char **read_raw_pkgs_from_file(const char *file_name);
-char **read_raw_pkgs_from_stream(FILE *fp);
 char *trim_alloc(const char *line);
 int line_is_blank(const char *line);
 
index 13415c7..b1f45dd 100644 (file)
@@ -318,7 +318,6 @@ void pkg_deinit(pkg_t *pkg)
 int pkg_init_from_file(pkg_t *pkg, const char *filename)
 {
      int err;
-     char **raw, **raw_start;
      FILE *control_file;
 
      err = pkg_init(pkg);
@@ -331,17 +330,10 @@ int pkg_init_from_file(pkg_t *pkg, const char *filename)
      if (err) { return err; }
 
      rewind(control_file);
-     raw = raw_start = read_raw_pkgs_from_stream(control_file);
-     pkg_parse_raw(pkg, &raw, NULL, NULL);
+     pkg_parse_from_stream(pkg, control_file, PFM_ALL);
 
      fclose(control_file);
 
-     raw = raw_start;
-     while (*raw) {
-         free(*raw++);
-     }
-     free(raw_start);
-
      return 0;
 }
 
@@ -508,41 +500,27 @@ int abstract_pkg_init(abstract_pkg_t *ab_pkg)
 }
 
 void set_flags_from_control(opkg_conf_t *conf, pkg_t *pkg){
-     char * temp_str;
-     char **raw =NULL;
-     char **raw_start=NULL; 
+     char *file_name;
+     FILE *fp;
 
-     size_t str_size = strlen(pkg->dest->info_dir)+strlen(pkg->name)+12;
-     temp_str = (char *) alloca (str_size);
-     memset(temp_str, 0 , str_size);
-     
-     if (temp_str == NULL ){
-        opkg_message(conf, OPKG_INFO, "Out of memory in  %s\n", __FUNCTION__);
-        return;
-     }
-     sprintf( temp_str,"%s/%s.control",pkg->dest->info_dir,pkg->name);
-   
-     raw = raw_start = read_raw_pkgs_from_file(temp_str);
-     if (raw == NULL ){
-        opkg_message(conf, OPKG_ERROR, "Unable to open the control file in  %s\n", __FUNCTION__);
-        return;
-     }
+     sprintf_alloc(&file_name,"%s/%s.control", pkg->dest->info_dir, pkg->name);
 
-     while(*raw){
-        if (!pkg_valorize_other_field(pkg, &raw ) == 0) {
-            opkg_message(conf, OPKG_DEBUG, "unable to read control file for %s. May be empty\n", pkg->name);
-        }
-     }
-     raw = raw_start;
-     while (*raw) {
-        if (raw!=NULL)
-          free(*raw++);
+     fp = fopen(file_name, "r");
+     if (fp == NULL) {
+            opkg_message(conf, OPKG_ERROR, "fopen(%s): %s\n",
+                            file_name, strerror(errno));
+            return;
      }
 
-     free(raw_start); 
+     free(file_name);
+
+     if (pkg_parse_from_stream(pkg, fp, PFM_ESSENTIAL)) {
+        opkg_message(conf, OPKG_DEBUG, "unable to read control file for %s. May be empty\n", pkg->name);
+     }
 
-     return ;
+     fclose(fp);
 
+     return;
 }
 
 void pkg_formatted_field(FILE *fp, pkg_t *pkg, const char *field)
index cba3599..a445839 100644 (file)
@@ -113,41 +113,53 @@ static char *pkg_get_default_arch(opkg_conf_t *conf)
 int pkg_hash_add_from_file(opkg_conf_t *conf, const char *file_name,
                           pkg_src_t *src, pkg_dest_t *dest, int is_status_file)
 {
-     hash_table_t *hash = &conf->pkg_hash;
-     char **raw;
-     char **raw_start;
-     pkg_t *pkg;
-    
-     raw = raw_start = read_raw_pkgs_from_file(file_name);
-     if (!raw)
-        return -ENOMEM;
-
-     while(*raw){         /* don't worry, we'll increment raw in the parsing function */
-         pkg = pkg_new();
-
-         if (pkg_parse_raw(pkg, &raw, src, dest) == 0) {
-              if (!pkg->architecture) {
-                   char *version_str = pkg_version_str_alloc(pkg);
-                   pkg->architecture = pkg_get_default_arch(conf);
-                   opkg_message(conf, OPKG_ERROR, "Package %s version %s has no architecture specified, defaulting to %s.\n",
-                                pkg->name, version_str, pkg->architecture);
-                   free(version_str);
-              }
-              hash_insert_pkg(hash, pkg, is_status_file,conf);
-         } else {
-              pkg_deinit (pkg);
-              free(pkg);
-         }
-     }
+       hash_table_t *hash = &conf->pkg_hash;
+       pkg_t *pkg;
+       FILE *fp;
+       char *buf;
+       const size_t len = 4096;
+       int ret = 0;
+
+       fp = fopen(file_name, "r");
+       if (fp == NULL) {
+               fprintf(stderr, "%s: fopen(%s): %s\n",
+                            __FUNCTION__, file_name, strerror(errno));
+               return -1;
+       }
 
-     /* XXX: CLEANUP: I'd like a cleaner interface for cleaning up
-       memory after read_raw_pkgs_from_file */
-     raw = raw_start;
-     while (*raw) {
-         free(*raw++);
-     }
-     free(raw_start);
-     return 0;
+       buf = xmalloc(len);
+
+       do {
+               pkg = pkg_new();
+               pkg->src = src;
+               pkg->dest = dest;
+
+               ret = pkg_parse_from_stream_nomalloc(pkg, fp, PFM_ALL,
+                               &buf, len);
+               if (ret) {
+                       pkg_deinit (pkg);
+                       free(pkg);
+                       if (ret == -1)
+                               break;
+                       continue;
+               }
+
+               if (!pkg->architecture) {
+                       char *version_str = pkg_version_str_alloc(pkg);
+                       pkg->architecture = pkg_get_default_arch(conf);
+                       opkg_message(conf, OPKG_ERROR, "Package %s version %s has no architecture specified, defaulting to %s.\n",
+                       pkg->name, version_str, pkg->architecture);
+                       free(version_str);
+               }
+
+               hash_insert_pkg(hash, pkg, is_status_file, conf);
+
+       } while (!feof(fp));
+
+       free(buf);
+       fclose(fp);
+
+       return ret;
 }
 
 abstract_pkg_t * abstract_pkg_fetch_by_name(hash_table_t * hash, const char * pkg_name)
index 12aabbe..47ff3bb 100644 (file)
 #include "pkg_parse.h"
 #include "libbb/libbb.h"
 
-int isGenericFieldType(char * type, char * line)
+static int isGenericFieldType(char * type, const char * line)
 {
     if(!strncmp(line, type, strlen(type)))
        return 1;
     return 0;
 }
 
-char * parseGenericFieldType(char * type, char * raw)
+static char * parseGenericFieldType(char * type, const char * raw)
 {
-    char * field_value = raw + (strlen(type) + 1);
+    const char * field_value = raw + (strlen(type) + 1);
     return trim_alloc(field_value);
 }
 
-void parseStatus(pkg_t *pkg, char * raw)
+static void parseStatus(pkg_t *pkg, const char * raw)
 {
     char sw_str[64], sf_str[64], ss_str[64];
 
@@ -47,7 +47,7 @@ void parseStatus(pkg_t *pkg, char * raw)
     pkg->state_status = pkg_state_status_from_str(ss_str);
 }
 
-char ** parseDependsString(char * raw, int * depends_count)
+static char ** parseDependsString(const char * raw, int * depends_count)
 {
     char ** depends = NULL;
     int line_count = 0;
@@ -82,7 +82,7 @@ char ** parseDependsString(char * raw, int * depends_count)
     return depends;
 }
 
-void parseConffiles(pkg_t * pkg, char * raw)
+static void parseConffiles(pkg_t * pkg, const char * raw)
 {
     char file_name[1048], md5sum[1048];  /* please tell me there aren't any longer that 1k */
 
@@ -103,7 +103,7 @@ void parseConffiles(pkg_t * pkg, char * raw)
     }
 }    
 
-int parseVersion(pkg_t *pkg, char *raw)
+int parseVersion(pkg_t *pkg, const char *raw)
 {
   char *colon, *eepochcolon;
   char *hyphen;
@@ -156,54 +156,19 @@ int parseVersion(pkg_t *pkg, char *raw)
   return 0;
 }
 
-/* Some random thoughts from Carl:
-
-   This function could be considerably simplified if we just kept
-   an array of all the generic string-valued field names, and looped
-   through those looking for a match. Also, these fields could perhaps
-   be stored in the package as an array as well, (or, probably better,
-   as an nv_pair_list_t).
-
-   Fields which require special parsing or storage, (such as Depends:
-   and Status:) could be handled as they are now. 
-*/
-/* XXX: FEATURE: The Suggests: field needs to be changed from a string
-   to a dependency list. And, since we already have
-   Depends/Pre-Depends and need to add Conflicts, Recommends, and
-   Enhances, perhaps we could generalize all of these and save some
-   code duplication.
-*/
-int pkg_parse_raw(pkg_t *pkg, char ***raw, pkg_src_t *src, pkg_dest_t *dest)
+static int
+pkg_parse_line(pkg_t *pkg, const char *line, uint mask)
 {
-    int reading_conffiles, reading_description;
-    char ** lines;
-
-    pkg->src = src;
-    pkg->dest = dest;
-
-    reading_conffiles = reading_description = 0;
-
-    for (lines = *raw; *lines; lines++) {
-       /*      fprintf(stderr, "PARSING %s\n", *lines);*/
-       switch (**lines) {
-       case 'P':
-           if(isGenericFieldType("Package:", *lines)) 
-               pkg->name = parseGenericFieldType("Package", *lines);
-           else if(isGenericFieldType("Priority:", *lines))
-               pkg->priority = parseGenericFieldType("Priority", *lines);
-           else if(isGenericFieldType("Provides", *lines)){
-               pkg->provides_str = parseDependsString(*lines, &pkg->provides_count);
-           } 
-           else if(isGenericFieldType("Pre-Depends", *lines))
-               pkg->pre_depends_str = parseDependsString(*lines, &pkg->pre_depends_count);
-           break;
+       /* these flags are a bit hackish... */
+       static int reading_conffiles = 0, reading_description = 0;
 
+       switch (*line) {
        case 'A':
-           if(isGenericFieldType("Architecture:", *lines))
-               pkg->architecture = parseGenericFieldType("Architecture", *lines);
-           else if(isGenericFieldType("Auto-Installed:", *lines)) {
+           if((mask & PFM_ARCHITECTURE ) && isGenericFieldType("Architecture:", line))
+               pkg->architecture = parseGenericFieldType("Architecture", line);
+           else if((mask & PFM_AUTO_INSTALLED) && isGenericFieldType("Auto-Installed:", line)) {
                char *auto_installed_value;
-               auto_installed_value = parseGenericFieldType("Auto-Installed:", *lines);
+               auto_installed_value = parseGenericFieldType("Auto-Installed:", line);
                if (strcmp(auto_installed_value, "yes") == 0) {
                    pkg->auto_installed = 1;
                }
@@ -211,144 +176,244 @@ int pkg_parse_raw(pkg_t *pkg, char ***raw, pkg_src_t *src, pkg_dest_t *dest)
            }
            break;
 
-       case 'F':
-           if(isGenericFieldType("Filename:", *lines))
-               pkg->filename = parseGenericFieldType("Filename", *lines);
+       case 'C':
+           if((mask & PFM_CONFFILES) && isGenericFieldType("Conffiles", line)){
+               parseConffiles(pkg, line);
+               reading_conffiles = 1;
+               reading_description = 0;
+               goto dont_reset_flags;
+           }
+           else if((mask & PFM_CONFLICTS) && isGenericFieldType("Conflicts", line))
+               pkg->conflicts_str = parseDependsString(line, &pkg->conflicts_count);
            break;
 
-       case 'S':
-           if(isGenericFieldType("Section:", *lines))
-               pkg->section = parseGenericFieldType("Section", *lines);
-#ifdef HAVE_SHA256
-           else if(isGenericFieldType("SHA256sum:", *lines))
-               pkg->sha256sum = parseGenericFieldType("SHA256sum", *lines);
-#endif
-           else if(isGenericFieldType("Size:", *lines))
-               pkg->size = parseGenericFieldType("Size", *lines);
-           else if(isGenericFieldType("Source:", *lines))
-               pkg->source = parseGenericFieldType("Source", *lines);
-           else if(isGenericFieldType("Status", *lines))
-               parseStatus(pkg, *lines);
-           else if(isGenericFieldType("Suggests", *lines))
-               pkg->suggests_str = parseDependsString(*lines, &pkg->suggests_count);
+       case 'D':
+           if((mask & PFM_DESCRIPTION) && isGenericFieldType("Description", line)) {
+               pkg->description = parseGenericFieldType("Description", line);
+               reading_conffiles = 0;
+               reading_description = 1;
+               goto dont_reset_flags;
+           }
+           else if((mask & PFM_DEPENDS) && isGenericFieldType("Depends", line))
+               pkg->depends_str = parseDependsString(line, &pkg->depends_count);
            break;
 
-       case 'T':
-           if(isGenericFieldType("Tags:", *lines))
-               pkg->tags = parseGenericFieldType("Tags", *lines);
+       case 'E':
+           if((mask & PFM_ESSENTIAL) && isGenericFieldType("Essential:", line)) {
+               char *essential_value;
+               essential_value = parseGenericFieldType("Essential", line);
+               if (strcmp(essential_value, "yes") == 0) {
+                   pkg->essential = 1;
+               }
+               free(essential_value);
+           }
            break;
 
-       case 'M':
-           if(isGenericFieldType("MD5sum:", *lines))
-               pkg->md5sum = parseGenericFieldType("MD5sum", *lines);
-           /* The old opkg wrote out status files with the wrong case for MD5sum,
-               let's parse it either way */
-           else if(isGenericFieldType("MD5Sum:", *lines))
-               pkg->md5sum = parseGenericFieldType("MD5Sum", *lines);
-           else if(isGenericFieldType("Maintainer", *lines))
-               pkg->maintainer = parseGenericFieldType("Maintainer", *lines);
+       case 'F':
+           if((mask & PFM_FILENAME) && isGenericFieldType("Filename:", line))
+               pkg->filename = parseGenericFieldType("Filename", line);
            break;
 
        case 'I':
-           if(isGenericFieldType("Installed-Size:", *lines))
-               pkg->installed_size = parseGenericFieldType("Installed-Size", *lines);
-           else if(isGenericFieldType("Installed-Time:", *lines)) {
-               char *time_str = parseGenericFieldType("Installed-Time", *lines);
+           if((mask && PFM_INSTALLED_SIZE) && isGenericFieldType("Installed-Size:", line))
+               pkg->installed_size = parseGenericFieldType("Installed-Size", line);
+           else if((mask && PFM_INSTALLED_TIME) && isGenericFieldType("Installed-Time:", line)) {
+               char *time_str = parseGenericFieldType("Installed-Time", line);
                pkg->installed_time = strtoul(time_str, NULL, 0);
                free (time_str);
            }       
            break;
 
-       case 'E':
-           if(isGenericFieldType("Essential:", *lines)) {
-               char *essential_value;
-               essential_value = parseGenericFieldType("Essential", *lines);
-               if (strcmp(essential_value, "yes") == 0) {
-                   pkg->essential = 1;
-               }
-               free(essential_value);
-           }
+       case 'M':
+           if(mask && PFM_MD5SUM) {
+               if (isGenericFieldType("MD5sum:", line))
+                       pkg->md5sum = parseGenericFieldType("MD5sum", line);
+                       /* The old opkg wrote out status files with the wrong
+                        * case for MD5sum, let's parse it either way */
+               else if(isGenericFieldType("MD5Sum:", line))
+                       pkg->md5sum = parseGenericFieldType("MD5Sum", line);
+           } else if((mask & PFM_MAINTAINER) && isGenericFieldType("Maintainer", line))
+               pkg->maintainer = parseGenericFieldType("Maintainer", line);
            break;
 
-       case 'V':
-           if(isGenericFieldType("Version", *lines))
-               parseVersion(pkg, *lines);
+       case 'P':
+           if((mask & PFM_PACKAGE) && isGenericFieldType("Package:", line)) 
+               pkg->name = parseGenericFieldType("Package", line);
+           else if((mask & PFM_PRIORITY) && isGenericFieldType("Priority:", line))
+               pkg->priority = parseGenericFieldType("Priority", line);
+           else if((mask & PFM_PROVIDES) && isGenericFieldType("Provides", line)){
+               pkg->provides_str = parseDependsString(line, &pkg->provides_count);
+           } 
+           else if((mask & PFM_PRE_DEPENDS) && isGenericFieldType("Pre-Depends", line))
+               pkg->pre_depends_str = parseDependsString(line, &pkg->pre_depends_count);
            break;
 
-       case 'C':
-           if(isGenericFieldType("Conffiles", *lines)){
-               parseConffiles(pkg, *lines);
-               reading_conffiles = 1;
-           }
-           else if(isGenericFieldType("Conflicts", *lines))
-               pkg->conflicts_str = parseDependsString(*lines, &pkg->conflicts_count);
+       case 'R':
+           if((mask & PFM_RECOMMENDS) && isGenericFieldType("Recommends", line))
+               pkg->recommends_str = parseDependsString(line, &pkg->recommends_count);
+           else if((mask & PFM_REPLACES) && isGenericFieldType("Replaces", line))
+               pkg->replaces_str = parseDependsString(line, &pkg->replaces_count);
+           
            break;
 
-       case 'D':
-           if(isGenericFieldType("Description", *lines)) {
-               pkg->description = parseGenericFieldType("Description", *lines);
-               reading_conffiles = 0;
-               reading_description = 1;
-           }
-           else if(isGenericFieldType("Depends", *lines))
-               pkg->depends_str = parseDependsString(*lines, &pkg->depends_count);
+       case 'S':
+           if((mask & PFM_SECTION) && isGenericFieldType("Section:", line))
+               pkg->section = parseGenericFieldType("Section", line);
+#ifdef HAVE_SHA256
+           else if((mask & PFM_SHA256SUM) && isGenericFieldType("SHA256sum:", line))
+               pkg->sha256sum = parseGenericFieldType("SHA256sum", line);
+#endif
+           else if((mask & PFM_SIZE) && isGenericFieldType("Size:", line))
+               pkg->size = parseGenericFieldType("Size", line);
+           else if((mask & PFM_SOURCE) && isGenericFieldType("Source:", line))
+               pkg->source = parseGenericFieldType("Source", line);
+           else if((mask & PFM_STATUS) && isGenericFieldType("Status", line))
+               parseStatus(pkg, line);
+           else if((mask & PFM_SUGGESTS) && isGenericFieldType("Suggests", line))
+               pkg->suggests_str = parseDependsString(line, &pkg->suggests_count);
            break;
 
-       case 'R':
-           if(isGenericFieldType("Recommends", *lines))
-               pkg->recommends_str = parseDependsString(*lines, &pkg->recommends_count);
-           else if(isGenericFieldType("Replaces", *lines))
-               pkg->replaces_str = parseDependsString(*lines, &pkg->replaces_count);
-           
+       case 'T':
+           if((mask & PFM_TAGS) && isGenericFieldType("Tags:", line))
+               pkg->tags = parseGenericFieldType("Tags", line);
+           break;
+
+       case 'V':
+           if((mask & PFM_VERSION) && isGenericFieldType("Version", line))
+               parseVersion(pkg, line);
            break;
 
        case ' ':
-           if(reading_description) {
+           if((mask & PFM_DESCRIPTION) && reading_description) {
                /* we already know it's not blank, so the rest of description */      
                pkg->description = xrealloc(pkg->description,
                                           strlen(pkg->description)
-                                          + 1 + strlen(*lines) + 1);
+                                          + 1 + strlen(line) + 1);
                strcat(pkg->description, "\n");
-               strcat(pkg->description, (*lines));
+               strcat(pkg->description, (line));
+               goto dont_reset_flags;
+           }
+           else if((mask && PFM_CONFFILES) && reading_conffiles) {
+               parseConffiles(pkg, line);
+               goto dont_reset_flags;
            }
-           else if(reading_conffiles)
-               parseConffiles(pkg, *lines);
-               
            break;
 
        default:
-           if(line_is_blank(*lines)) {
-               lines++;
-               goto out;
+           /* For package lists, signifies end of package. */
+           if(line_is_blank(line)) {
+               return 1;
            }
        }
-    }
-out:;
-    
-    *raw = lines;
 
-    if (pkg->name) {
+       reading_description = 0;
+       reading_conffiles = 0;
+
+dont_reset_flags:
+
        return 0;
-    } else {
-       return EINVAL;
-    }
 }
 
-int pkg_valorize_other_field(pkg_t *pkg, char ***raw)
+int
+pkg_parse_from_stream_nomalloc(pkg_t *pkg, FILE *fp, uint mask,
+                                               char **buf0, size_t buf0len)
 {
-    char ** lines;
-
-    for (lines = *raw; *lines; lines++) {
-       if(isGenericFieldType("Essential:", *lines)) {
-           char *essential_value;
-           essential_value = parseGenericFieldType("Essential", *lines);
-           if (strcmp(essential_value, "yes") == 0) {
-               pkg->essential = 1;
-           }
-           free(essential_value);
+       int ret, lineno;
+       char *buf, *nl;
+       size_t buflen;
+
+       lineno = 1;
+       ret = 0;
+
+       buflen = buf0len;
+       buf = *buf0;
+       buf[0] = '\0';
+
+       while (1) {
+               if (fgets(buf, buflen, fp) == NULL) {
+                       if (ferror(fp)) {
+                               fprintf(stderr, "%s: fgets: %s\n",
+                                       __FUNCTION__, strerror(errno));
+                               ret = -1;
+                       } else if (strlen(*buf0) == buflen-1) {
+                               fprintf(stderr, "%s: missing new line character"
+                                               " at end of file!\n",
+                                       __FUNCTION__);
+                               pkg_parse_line(pkg, *buf0, mask);
+                       }
+                       break;
+               }
+
+               nl = strchr(buf, '\n');
+               if (nl == NULL) {
+                       if (strlen(buf) < buflen-1) {
+                               /*
+                                * Line could be exactly buflen-1 long and
+                                * missing a newline, but we won't know until
+                                * fgets fails to read more data.
+                                */
+                               fprintf(stderr, "%s: missing new line character"
+                                               " at end of file!\n",
+                                       __FUNCTION__);
+                               pkg_parse_line(pkg, *buf0, mask);
+                               break;
+                       }
+                       if (buf0len >= EXCESSIVE_LINE_LEN) {
+                               fprintf(stderr, "%s: excessively long line at "
+                                       "%d. Corrupt file?\n",
+                                       __FUNCTION__, lineno);
+                               ret = -1;
+                               break;
+                       }
+
+                       /*
+                        * Realloc and move buf past the data already read.
+                        * |<--------------- buf0len ----------------->|
+                        * |                     |<------- buflen ---->|
+                        * |---------------------|---------------------|
+                        * buf0                   buf
+                        */
+                       buflen = buf0len;
+                       buf0len *= 2;
+                       *buf0 = xrealloc(*buf0, buf0len);
+                       buf = *buf0 + buflen -1;
+
+                       continue;
+               }
+
+               *nl = '\0';
+
+               lineno++;
+
+               if (pkg_parse_line(pkg, *buf0, mask))
+                       break;
+
+               if (buf != *buf0)
+                       fprintf(stderr, "%s: %s\n", __FUNCTION__, pkg->name);
+
+               buf = *buf0;
+               buflen = buf0len;
+               buf[0] = '\0';
+       };
+
+       if (pkg->name == NULL) {
+               /* probably just a blank line */
+               ret = EINVAL;
        }
-    }
-    *raw = lines;
 
-    return 0;
+       return ret;
+}
+
+int
+pkg_parse_from_stream(pkg_t *pkg, FILE *fp, uint mask)
+{
+       int ret;
+       char *buf;
+       const size_t len = 4096;
+
+       buf = xmalloc(len);
+       ret = pkg_parse_from_stream_nomalloc(pkg, fp, mask, &buf, len);
+       free(buf);
+
+       return ret;
 }
index 58e530a..5461607 100644 (file)
 #ifndef PKG_PARSE_H
 #define PKG_PARSE_H
 
-int isGenericFieldType(char * type, char * line);
-char * parseGenericFieldType(char * type, char * raw);
-void parseStatus(pkg_t *pkg, char * raw);
-int parseVersion(pkg_t *pkg, char *raw);
-char ** parseDependsString(char * raw, int * depends_count);
-int parseVersion(pkg_t *pkg, char *raw);
-void parseConffiles(pkg_t * pkg, char * raw);
-int pkg_parse_raw(pkg_t *pkg, char ***raw, pkg_src_t *src, pkg_dest_t *dest);
-int pkg_valorize_other_field(pkg_t *pkg, char ***raw);
+int parseVersion(pkg_t *pkg, const char *raw);
+int pkg_parse_from_stream(pkg_t *pkg, FILE *fp, uint mask);
+int pkg_parse_from_stream_nomalloc(pkg_t *pkg, FILE *fp, uint mask,
+                                               char **buf0, size_t buf0len);
+
+#define EXCESSIVE_LINE_LEN     (4096 << 8)
+
+/* package field mask */
+#define PFM_ARCHITECTURE       (1 << 1)
+#define PFM_AUTO_INSTALLED     (1 << 2)
+#define PFM_CONFFILES          (1 << 3)
+#define PFM_CONFLICTS          (1 << 4)
+#define PFM_DESCRIPTION                (1 << 5)
+#define PFM_DEPENDS            (1 << 6)
+#define PFM_ESSENTIAL          (1 << 7)
+#define PFM_FILENAME           (1 << 8)
+#define PFM_INSTALLED_SIZE     (1 << 9)
+#define PFM_INSTALLED_TIME     (1 << 10)
+#define PFM_MD5SUM             (1 << 11)
+#define PFM_MAINTAINER         (1 << 12)
+#define PFM_PACKAGE            (1 << 13)
+#define PFM_PRIORITY           (1 << 14)
+#define PFM_PROVIDES           (1 << 15)
+#define PFM_PRE_DEPENDS                (1 << 16)
+#define PFM_RECOMMENDS         (1 << 17)
+#define PFM_REPLACES           (1 << 18)
+#define PFM_SECTION            (1 << 19)
+#define PFM_SHA256SUM          (1 << 20)
+#define PFM_SIZE               (1 << 21)
+#define PFM_SOURCE             (1 << 22)
+#define PFM_STATUS             (1 << 23)
+#define PFM_SUGGESTS           (1 << 24)
+#define PFM_TAGS               (1 << 25)
+#define PFM_VERSION            (1 << 26)
+
+#define PFM_ALL        (~(uint)0)
 
 #endif