X-Git-Url: https://git.librecmc.org/?a=blobdiff_plain;f=libbb%2Fget_line_from_file.c;h=3cb46d240fb9d2bb95aa41e532f01b9fb636b99f;hb=e992bae6f9adf3718c6263a36c004ead1c7272a8;hp=66ea5a1a5d99d5bde517090d9cc838bdee82869f;hpb=defc1ea34074e7882724c460260d307cdf981a70;p=oweals%2Fbusybox.git diff --git a/libbb/get_line_from_file.c b/libbb/get_line_from_file.c index 66ea5a1a5..3cb46d240 100644 --- a/libbb/get_line_from_file.c +++ b/libbb/get_line_from_file.c @@ -9,14 +9,22 @@ * Licensed under GPLv2 or later, see file LICENSE in this tarball for details. */ +/* for getline() [GNUism] +#ifndef _GNU_SOURCE +#define _GNU_SOURCE 1 +#endif +*/ #include "libbb.h" /* This function reads an entire line from a text file, up to a newline * or NUL byte, inclusive. It returns a malloc'ed char * which * must be free'ed by the caller. If end is NULL '\n' isn't considered - * end of line. If end isn't NULL, length of the chunk read is stored in it. - * Return NULL if EOF/error */ -char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end) + * end of line. If end isn't NULL, length of the chunk is stored in it. + * If lineno is not NULL, *lineno is incremented for each line, + * and also trailing '\' is recognized as line continuation. + * + * Returns NULL if EOF/error. */ +char* FAST_FUNC bb_get_chunk_with_continuation(FILE *file, int *end, int *lineno) { int ch; int idx = 0; @@ -26,12 +34,20 @@ char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end) while ((ch = getc(file)) != EOF) { /* grow the line buffer as necessary */ if (idx >= linebufsz) { - linebufsz += 80; + linebufsz += 256; linebuf = xrealloc(linebuf, linebufsz); } linebuf[idx++] = (char) ch; - if (!ch || (end && ch == '\n')) + if (!ch) break; + if (end && ch == '\n') { + if (lineno == NULL) + break; + (*lineno)++; + if (idx < 2 || linebuf[idx-2] != '\\') + break; + idx -= 2; + } } if (end) *end = idx; @@ -42,12 +58,17 @@ char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end) // free(linebuf); // return NULL; //} - linebuf = xrealloc(linebuf, idx+1); + linebuf = xrealloc(linebuf, idx + 1); linebuf[idx] = '\0'; } return linebuf; } +char* FAST_FUNC bb_get_chunk_from_file(FILE *file, int *end) +{ + return bb_get_chunk_with_continuation(file, end, NULL); +} + /* Get line, including trailing \n if any */ char* FAST_FUNC xmalloc_fgets(FILE *file) { @@ -55,7 +76,6 @@ char* FAST_FUNC xmalloc_fgets(FILE *file) return bb_get_chunk_from_file(file, &i); } - /* Get line. Remove trailing \n */ char* FAST_FUNC xmalloc_fgetline(FILE *file) { @@ -67,3 +87,121 @@ char* FAST_FUNC xmalloc_fgetline(FILE *file) return c; } + +#if 0 +/* GNUism getline() should be faster (not tested) than a loop with fgetc */ + +/* Get line, including trailing \n if any */ +char* FAST_FUNC xmalloc_fgets(FILE *file) +{ + char *res_buf = NULL; + size_t res_sz; + + if (getline(&res_buf, &res_sz, file) == -1) { + free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */ + res_buf = NULL; + } +//TODO: trimming to res_sz? + return res_buf; +} +/* Get line. Remove trailing \n */ +char* FAST_FUNC xmalloc_fgetline(FILE *file) +{ + char *res_buf = NULL; + size_t res_sz; + + res_sz = getline(&res_buf, &res_sz, file); + + if ((ssize_t)res_sz != -1) { + if (res_buf[res_sz - 1] == '\n') + res_buf[--res_sz] = '\0'; +//TODO: trimming to res_sz? + } else { + free(res_buf); /* uclibc allocates a buffer even on EOF. WTF? */ + res_buf = NULL; + } + return res_buf; +} + +#endif + +#if 0 +/* Faster routines (~twice as fast). +170 bytes. Unused as of 2008-07. + * + * NB: they stop at NUL byte too. + * Performance is important here. Think "grep 50gigabyte_file"... + * Ironically, grep can't use it because of NUL issue. + * We sorely need C lib to provide fgets which reports size! + * + * Update: + * Actually, uclibc and glibc have it. man getline. It's GNUism, + * but very useful one (if it's as fast as this code). + * TODO: + * - currently, sed and sort use bb_get_chunk_from_file and heavily + * depend on its "stop on \n or \0" behavior, and STILL they fail + * to handle all cases with embedded NULs correctly. So: + * - audit sed and sort; convert them to getline FIRST. + * - THEN ditch bb_get_chunk_from_file, replace it with getline. + * - provide getline implementation for non-GNU systems. + */ + +static char* xmalloc_fgets_internal(FILE *file, int *sizep) +{ + int len; + int idx = 0; + char *linebuf = NULL; + + while (1) { + char *r; + + linebuf = xrealloc(linebuf, idx + 0x100); + r = fgets(&linebuf[idx], 0x100, file); + if (!r) { + /* need to terminate in case this is error + * (EOF puts NUL itself) */ + linebuf[idx] = '\0'; + break; + } + /* stupid. fgets knows the len, it should report it somehow */ + len = strlen(&linebuf[idx]); + idx += len; + if (len != 0xff || linebuf[idx - 1] == '\n') + break; + } + *sizep = idx; + if (idx) { + /* xrealloc(linebuf, idx + 1) is up to caller */ + return linebuf; + } + free(linebuf); + return NULL; +} + +/* Get line, remove trailing \n */ +char* FAST_FUNC xmalloc_fgetline_fast(FILE *file) +{ + int sz; + char *r = xmalloc_fgets_internal(file, &sz); + if (r && r[sz - 1] == '\n') + r[--sz] = '\0'; + return r; /* not xrealloc(r, sz + 1)! */ +} + +char* FAST_FUNC xmalloc_fgets(FILE *file) +{ + int sz; + return xmalloc_fgets_internal(file, &sz); +} + +/* Get line, remove trailing \n */ +char* FAST_FUNC xmalloc_fgetline(FILE *file) +{ + int sz; + char *r = xmalloc_fgets_internal(file, &sz); + if (!r) + return r; + if (r[sz - 1] == '\n') + r[--sz] = '\0'; + return xrealloc(r, sz + 1); +} +#endif