Make grep/egrep/fgrep independently selectable
[oweals/busybox.git] / findutils / grep.c
1 /* vi: set sw=4 ts=4: */
2 /*
3  * Mini grep implementation for busybox using libc regex.
4  *
5  * Copyright (C) 1999,2000,2001 by Lineo, inc. and Mark Whitley
6  * Copyright (C) 1999,2000,2001 by Mark Whitley <markw@codepoet.org>
7  *
8  * Licensed under GPLv2 or later, see file LICENSE in this source tree.
9  */
10 /* BB_AUDIT SUSv3 defects - unsupported option -x "match whole line only". */
11 /* BB_AUDIT GNU defects - always acts as -a.  */
12 /* http://www.opengroup.org/onlinepubs/007904975/utilities/grep.html */
13 /*
14  * 2004,2006 (C) Vladimir Oleynik <dzo@simtreas.ru> -
15  * correction "-e pattern1 -e pattern2" logic and more optimizations.
16  * precompiled regex
17  *
18  * (C) 2006 Jac Goudsmit added -o option
19  */
20
21 //config:config GREP
22 //config:       bool "grep"
23 //config:       default y
24 //config:       help
25 //config:         grep is used to search files for a specified pattern.
26 //config:
27 //config:config EGREP
28 //config:       bool "egrep"
29 //config:       default y
30 //config:       help
31 //config:         Alias to "grep -E"
32 //config:
33 //config:config FGREP
34 //config:       bool "fgrep"
35 //config:       default y
36 //config:       help
37 //config:         Alias to "grep -F"
38 //config:
39 //config:config FEATURE_GREP_CONTEXT
40 //config:       bool "Enable before and after context flags (-A, -B and -C)"
41 //config:       default y
42 //config:       depends on GREP || EGREP
43 //config:       help
44 //config:         Print the specified number of leading (-B) and/or trailing (-A)
45 //config:         context surrounding our matching lines.
46 //config:         Print the specified number of context lines (-C).
47
48 //applet:IF_GREP(APPLET(grep, BB_DIR_BIN, BB_SUID_DROP))
49 //applet:IF_EGREP(APPLET_ODDNAME(egrep, grep, BB_DIR_BIN, BB_SUID_DROP, egrep))
50 //applet:IF_FGREP(APPLET_ODDNAME(fgrep, grep, BB_DIR_BIN, BB_SUID_DROP, fgrep))
51
52 //kbuild:lib-$(CONFIG_GREP) += grep.o
53 //kbuild:lib-$(CONFIG_EGREP) += grep.o
54 //kbuild:lib-$(CONFIG_FGREP) += grep.o
55
56 #include "libbb.h"
57 #include "common_bufsiz.h"
58 #include "xregex.h"
59
60
61 /* options */
62 //usage:#define grep_trivial_usage
63 //usage:       "[-HhnlLoqvsriwFE"
64 //usage:        IF_EXTRA_COMPAT("z")
65 //usage:       "] [-m N] "
66 //usage:        IF_FEATURE_GREP_CONTEXT("[-A/B/C N] ")
67 //usage:       "PATTERN/-e PATTERN.../-f FILE [FILE]..."
68 //usage:#define grep_full_usage "\n\n"
69 //usage:       "Search for PATTERN in FILEs (or stdin)\n"
70 //usage:     "\n        -H      Add 'filename:' prefix"
71 //usage:     "\n        -h      Do not add 'filename:' prefix"
72 //usage:     "\n        -n      Add 'line_no:' prefix"
73 //usage:     "\n        -l      Show only names of files that match"
74 //usage:     "\n        -L      Show only names of files that don't match"
75 //usage:     "\n        -c      Show only count of matching lines"
76 //usage:     "\n        -o      Show only the matching part of line"
77 //usage:     "\n        -q      Quiet. Return 0 if PATTERN is found, 1 otherwise"
78 //usage:     "\n        -v      Select non-matching lines"
79 //usage:     "\n        -s      Suppress open and read errors"
80 //usage:     "\n        -r      Recurse"
81 //usage:     "\n        -i      Ignore case"
82 //usage:     "\n        -w      Match whole words only"
83 //usage:     "\n        -x      Match whole lines only"
84 //usage:     "\n        -F      PATTERN is a literal (not regexp)"
85 //usage:     "\n        -E      PATTERN is an extended regexp"
86 //usage:        IF_EXTRA_COMPAT(
87 //usage:     "\n        -z      Input is NUL terminated"
88 //usage:        )
89 //usage:     "\n        -m N    Match up to N times per file"
90 //usage:        IF_FEATURE_GREP_CONTEXT(
91 //usage:     "\n        -A N    Print N lines of trailing context"
92 //usage:     "\n        -B N    Print N lines of leading context"
93 //usage:     "\n        -C N    Same as '-A N -B N'"
94 //usage:        )
95 //usage:     "\n        -e PTRN Pattern to match"
96 //usage:     "\n        -f FILE Read pattern from file"
97 //usage:
98 //usage:#define grep_example_usage
99 //usage:       "$ grep root /etc/passwd\n"
100 //usage:       "root:x:0:0:root:/root:/bin/bash\n"
101 //usage:       "$ grep ^[rR]oo. /etc/passwd\n"
102 //usage:       "root:x:0:0:root:/root:/bin/bash\n"
103 //usage:
104 //usage:#define egrep_trivial_usage NOUSAGE_STR
105 //usage:#define egrep_full_usage ""
106 //usage:#define fgrep_trivial_usage NOUSAGE_STR
107 //usage:#define fgrep_full_usage ""
108
109 #define OPTSTR_GREP \
110         "lnqvscFiHhe:*f:*Lorm:+wx" \
111         IF_FEATURE_GREP_CONTEXT("A:+B:+C:+") \
112         "E" \
113         IF_EXTRA_COMPAT("z") \
114         "aI"
115 /* ignored: -a "assume all files to be text" */
116 /* ignored: -I "assume binary files have no matches" */
117 enum {
118         OPTBIT_l, /* list matched file names only */
119         OPTBIT_n, /* print line# */
120         OPTBIT_q, /* quiet - exit(EXIT_SUCCESS) of first match */
121         OPTBIT_v, /* invert the match, to select non-matching lines */
122         OPTBIT_s, /* suppress errors about file open errors */
123         OPTBIT_c, /* count matches per file (suppresses normal output) */
124         OPTBIT_F, /* literal match */
125         OPTBIT_i, /* case-insensitive */
126         OPTBIT_H, /* force filename display */
127         OPTBIT_h, /* inhibit filename display */
128         OPTBIT_e, /* -e PATTERN */
129         OPTBIT_f, /* -f FILE_WITH_PATTERNS */
130         OPTBIT_L, /* list unmatched file names only */
131         OPTBIT_o, /* show only matching parts of lines */
132         OPTBIT_r, /* recurse dirs */
133         OPTBIT_m, /* -m MAX_MATCHES */
134         OPTBIT_w, /* -w whole word match */
135         OPTBIT_x, /* -x whole line match */
136         IF_FEATURE_GREP_CONTEXT(    OPTBIT_A ,) /* -A NUM: after-match context */
137         IF_FEATURE_GREP_CONTEXT(    OPTBIT_B ,) /* -B NUM: before-match context */
138         IF_FEATURE_GREP_CONTEXT(    OPTBIT_C ,) /* -C NUM: -A and -B combined */
139         OPTBIT_E, /* extended regexp */
140         IF_EXTRA_COMPAT(            OPTBIT_z ,) /* input is NUL terminated */
141         OPT_l = 1 << OPTBIT_l,
142         OPT_n = 1 << OPTBIT_n,
143         OPT_q = 1 << OPTBIT_q,
144         OPT_v = 1 << OPTBIT_v,
145         OPT_s = 1 << OPTBIT_s,
146         OPT_c = 1 << OPTBIT_c,
147         OPT_F = 1 << OPTBIT_F,
148         OPT_i = 1 << OPTBIT_i,
149         OPT_H = 1 << OPTBIT_H,
150         OPT_h = 1 << OPTBIT_h,
151         OPT_e = 1 << OPTBIT_e,
152         OPT_f = 1 << OPTBIT_f,
153         OPT_L = 1 << OPTBIT_L,
154         OPT_o = 1 << OPTBIT_o,
155         OPT_r = 1 << OPTBIT_r,
156         OPT_m = 1 << OPTBIT_m,
157         OPT_w = 1 << OPTBIT_w,
158         OPT_x = 1 << OPTBIT_x,
159         OPT_A = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_A)) + 0,
160         OPT_B = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_B)) + 0,
161         OPT_C = IF_FEATURE_GREP_CONTEXT(    (1 << OPTBIT_C)) + 0,
162         OPT_E = 1 << OPTBIT_E,
163         OPT_z = IF_EXTRA_COMPAT(            (1 << OPTBIT_z)) + 0,
164 };
165
166 #define PRINT_FILES_WITH_MATCHES    (option_mask32 & OPT_l)
167 #define PRINT_LINE_NUM              (option_mask32 & OPT_n)
168 #define BE_QUIET                    (option_mask32 & OPT_q)
169 #define SUPPRESS_ERR_MSGS           (option_mask32 & OPT_s)
170 #define PRINT_MATCH_COUNTS          (option_mask32 & OPT_c)
171 #define FGREP_FLAG                  (option_mask32 & OPT_F)
172 #define PRINT_FILES_WITHOUT_MATCHES (option_mask32 & OPT_L)
173 #define NUL_DELIMITED               (option_mask32 & OPT_z)
174
175 struct globals {
176         int max_matches;
177 #if !ENABLE_EXTRA_COMPAT
178         int reflags;
179 #else
180         RE_TRANSLATE_TYPE case_fold; /* RE_TRANSLATE_TYPE is [[un]signed] char* */
181 #endif
182         smalluint invert_search;
183         smalluint print_filename;
184         smalluint open_errors;
185 #if ENABLE_FEATURE_GREP_CONTEXT
186         smalluint did_print_line;
187         int lines_before;
188         int lines_after;
189         char **before_buf;
190         IF_EXTRA_COMPAT(size_t *before_buf_size;)
191         int last_line_printed;
192 #endif
193         /* globals used internally */
194         llist_t *pattern_head;   /* growable list of patterns to match */
195         const char *cur_file;    /* the current file we are reading */
196 } FIX_ALIASING;
197 #define G (*(struct globals*)bb_common_bufsiz1)
198 #define INIT_G() do { \
199         setup_common_bufsiz(); \
200         BUILD_BUG_ON(sizeof(G) > COMMON_BUFSIZE); \
201 } while (0)
202 #define max_matches       (G.max_matches         )
203 #if !ENABLE_EXTRA_COMPAT
204 # define reflags          (G.reflags             )
205 #else
206 # define case_fold        (G.case_fold           )
207 /* http://www.delorie.com/gnu/docs/regex/regex_46.html */
208 # define reflags           re_syntax_options
209 # undef REG_NOSUB
210 # undef REG_EXTENDED
211 # undef REG_ICASE
212 # define REG_NOSUB    bug:is:here /* should not be used */
213 /* Just RE_SYNTAX_EGREP is not enough, need to enable {n[,[m]]} too */
214 # define REG_EXTENDED (RE_SYNTAX_EGREP | RE_INTERVALS | RE_NO_BK_BRACES)
215 # define REG_ICASE    bug:is:here /* should not be used */
216 #endif
217 #define invert_search     (G.invert_search       )
218 #define print_filename    (G.print_filename      )
219 #define open_errors       (G.open_errors         )
220 #define did_print_line    (G.did_print_line      )
221 #define lines_before      (G.lines_before        )
222 #define lines_after       (G.lines_after         )
223 #define before_buf        (G.before_buf          )
224 #define before_buf_size   (G.before_buf_size     )
225 #define last_line_printed (G.last_line_printed   )
226 #define pattern_head      (G.pattern_head        )
227 #define cur_file          (G.cur_file            )
228
229
230 typedef struct grep_list_data_t {
231         char *pattern;
232 /* for GNU regex, matched_range must be persistent across grep_file() calls */
233 #if !ENABLE_EXTRA_COMPAT
234         regex_t compiled_regex;
235         regmatch_t matched_range;
236 #else
237         struct re_pattern_buffer compiled_regex;
238         struct re_registers matched_range;
239 #endif
240 #define ALLOCATED 1
241 #define COMPILED 2
242         int flg_mem_allocated_compiled;
243 } grep_list_data_t;
244
245 #if !ENABLE_EXTRA_COMPAT
246 #define print_line(line, line_len, linenum, decoration) \
247         print_line(line, linenum, decoration)
248 #endif
249 static void print_line(const char *line, size_t line_len, int linenum, char decoration)
250 {
251 #if ENABLE_FEATURE_GREP_CONTEXT
252         /* Happens when we go to next file, immediately hit match
253          * and try to print prev context... from prev file! Don't do it */
254         if (linenum < 1)
255                 return;
256         /* possibly print the little '--' separator */
257         if ((lines_before || lines_after) && did_print_line
258          && last_line_printed != linenum - 1
259         ) {
260                 puts("--");
261         }
262         /* guard against printing "--" before first line of first file */
263         did_print_line = 1;
264         last_line_printed = linenum;
265 #endif
266         if (print_filename)
267                 printf("%s%c", cur_file, decoration);
268         if (PRINT_LINE_NUM)
269                 printf("%i%c", linenum, decoration);
270         /* Emulate weird GNU grep behavior with -ov */
271         if ((option_mask32 & (OPT_v|OPT_o)) != (OPT_v|OPT_o)) {
272 #if !ENABLE_EXTRA_COMPAT
273                 puts(line);
274 #else
275                 fwrite(line, 1, line_len, stdout);
276                 putchar(NUL_DELIMITED ? '\0' : '\n');
277 #endif
278         }
279 }
280
281 #if ENABLE_EXTRA_COMPAT
282 /* Unlike getline, this one removes trailing '\n' */
283 static ssize_t FAST_FUNC bb_getline(char **line_ptr, size_t *line_alloc_len, FILE *file)
284 {
285         ssize_t res_sz;
286         char *line;
287         int delim = (NUL_DELIMITED ? '\0' : '\n');
288
289         res_sz = getdelim(line_ptr, line_alloc_len, delim, file);
290         line = *line_ptr;
291
292         if (res_sz > 0) {
293                 if (line[res_sz - 1] == delim)
294                         line[--res_sz] = '\0';
295         } else {
296                 free(line); /* uclibc allocates a buffer even on EOF. WTF? */
297         }
298         return res_sz;
299 }
300 #endif
301
302 static int grep_file(FILE *file)
303 {
304         smalluint found;
305         int linenum = 0;
306         int nmatches = 0;
307 #if !ENABLE_EXTRA_COMPAT
308         char *line;
309 #else
310         char *line = NULL;
311         ssize_t line_len;
312         size_t line_alloc_len;
313 # define rm_so start[0]
314 # define rm_eo end[0]
315 #endif
316 #if ENABLE_FEATURE_GREP_CONTEXT
317         int print_n_lines_after = 0;
318         int curpos = 0; /* track where we are in the circular 'before' buffer */
319         int idx = 0; /* used for iteration through the circular buffer */
320 #else
321         enum { print_n_lines_after = 0 };
322 #endif
323
324         while (
325 #if !ENABLE_EXTRA_COMPAT
326                 (line = xmalloc_fgetline(file)) != NULL
327 #else
328                 (line_len = bb_getline(&line, &line_alloc_len, file)) >= 0
329 #endif
330         ) {
331                 llist_t *pattern_ptr = pattern_head;
332                 grep_list_data_t *gl = gl; /* for gcc */
333
334                 linenum++;
335                 found = 0;
336                 while (pattern_ptr) {
337                         gl = (grep_list_data_t *)pattern_ptr->data;
338                         if (FGREP_FLAG) {
339                                 char *match;
340                                 char *str = line;
341  opt_f_again:
342                                 match = ((option_mask32 & OPT_i)
343                                         ? strcasestr(str, gl->pattern)
344                                         : strstr(str, gl->pattern)
345                                         );
346                                 if (match) {
347                                         if (option_mask32 & OPT_x) {
348                                                 if (match != str)
349                                                         goto opt_f_not_found;
350                                                 if (str[strlen(gl->pattern)] != '\0')
351                                                         goto opt_f_not_found;
352                                         } else
353                                         if (option_mask32 & OPT_w) {
354                                                 char c = (match != str) ? match[-1] : ' ';
355                                                 if (!isalnum(c) && c != '_') {
356                                                         c = match[strlen(gl->pattern)];
357                                                         if (!c || (!isalnum(c) && c != '_'))
358                                                                 goto opt_f_found;
359                                                 }
360                                                 str = match + 1;
361                                                 goto opt_f_again;
362                                         }
363  opt_f_found:
364                                         found = 1;
365  opt_f_not_found: ;
366                                 }
367                         } else {
368 #if ENABLE_EXTRA_COMPAT
369                                 unsigned start_pos;
370 #else
371                                 int match_flg;
372 #endif
373                                 char *match_at;
374
375                                 if (!(gl->flg_mem_allocated_compiled & COMPILED)) {
376                                         gl->flg_mem_allocated_compiled |= COMPILED;
377 #if !ENABLE_EXTRA_COMPAT
378                                         xregcomp(&gl->compiled_regex, gl->pattern, reflags);
379 #else
380                                         memset(&gl->compiled_regex, 0, sizeof(gl->compiled_regex));
381                                         gl->compiled_regex.translate = case_fold; /* for -i */
382                                         if (re_compile_pattern(gl->pattern, strlen(gl->pattern), &gl->compiled_regex))
383                                                 bb_error_msg_and_die("bad regex '%s'", gl->pattern);
384 #endif
385                                 }
386 #if !ENABLE_EXTRA_COMPAT
387                                 gl->matched_range.rm_so = 0;
388                                 gl->matched_range.rm_eo = 0;
389                                 match_flg = 0;
390 #else
391                                 start_pos = 0;
392 #endif
393                                 match_at = line;
394  opt_w_again:
395 //bb_error_msg("'%s' start_pos:%d line_len:%d", match_at, start_pos, line_len);
396                                 if (
397 #if !ENABLE_EXTRA_COMPAT
398                                         regexec(&gl->compiled_regex, match_at, 1, &gl->matched_range, match_flg) == 0
399 #else
400                                         re_search(&gl->compiled_regex, match_at, line_len,
401                                                         start_pos, /*range:*/ line_len,
402                                                         &gl->matched_range) >= 0
403 #endif
404                                 ) {
405                                         if (option_mask32 & OPT_x) {
406                                                 found = (gl->matched_range.rm_so == 0
407                                                          && match_at[gl->matched_range.rm_eo] == '\0');
408                                         } else
409                                         if (!(option_mask32 & OPT_w)) {
410                                                 found = 1;
411                                         } else {
412                                                 char c = ' ';
413                                                 if (match_at > line || gl->matched_range.rm_so != 0) {
414                                                         c = match_at[gl->matched_range.rm_so - 1];
415                                                 }
416                                                 if (!isalnum(c) && c != '_') {
417                                                         c = match_at[gl->matched_range.rm_eo];
418                                                 }
419                                                 if (!isalnum(c) && c != '_') {
420                                                         found = 1;
421                                                 } else {
422                         /*
423                          * Why check gl->matched_range.rm_eo?
424                          * Zero-length match makes -w skip the line:
425                          * "echo foo | grep ^" prints "foo",
426                          * "echo foo | grep -w ^" prints nothing.
427                          * Without such check, we can loop forever.
428                          */
429 #if !ENABLE_EXTRA_COMPAT
430                                                         if (gl->matched_range.rm_eo != 0) {
431                                                                 match_at += gl->matched_range.rm_eo;
432                                                                 match_flg |= REG_NOTBOL;
433                                                                 goto opt_w_again;
434                                                         }
435 #else
436                                                         if (gl->matched_range.rm_eo > start_pos) {
437                                                                 start_pos = gl->matched_range.rm_eo;
438                                                                 goto opt_w_again;
439                                                         }
440 #endif
441                                                 }
442                                         }
443                                 }
444                         }
445                         /* If it's non-inverted search, we can stop
446                          * at first match */
447                         if (found && !invert_search)
448                                 goto do_found;
449                         pattern_ptr = pattern_ptr->link;
450                 } /* while (pattern_ptr) */
451
452                 if (found ^ invert_search) {
453  do_found:
454                         /* keep track of matches */
455                         nmatches++;
456
457                         /* quiet/print (non)matching file names only? */
458                         if (option_mask32 & (OPT_q|OPT_l|OPT_L)) {
459                                 free(line); /* we don't need line anymore */
460                                 if (BE_QUIET) {
461                                         /* manpage says about -q:
462                                          * "exit immediately with zero status
463                                          * if any match is found,
464                                          * even if errors were detected" */
465                                         exit(EXIT_SUCCESS);
466                                 }
467                                 /* if we're just printing filenames, we stop after the first match */
468                                 if (PRINT_FILES_WITH_MATCHES) {
469                                         puts(cur_file);
470                                         /* fall through to "return 1" */
471                                 }
472                                 /* OPT_L aka PRINT_FILES_WITHOUT_MATCHES: return early */
473                                 return 1; /* one match */
474                         }
475
476 #if ENABLE_FEATURE_GREP_CONTEXT
477                         /* Were we printing context and saw next (unwanted) match? */
478                         if ((option_mask32 & OPT_m) && nmatches > max_matches)
479                                 break;
480 #endif
481
482                         /* print the matched line */
483                         if (PRINT_MATCH_COUNTS == 0) {
484 #if ENABLE_FEATURE_GREP_CONTEXT
485                                 int prevpos = (curpos == 0) ? lines_before - 1 : curpos - 1;
486
487                                 /* if we were told to print 'before' lines and there is at least
488                                  * one line in the circular buffer, print them */
489                                 if (lines_before && before_buf[prevpos] != NULL) {
490                                         int first_buf_entry_line_num = linenum - lines_before;
491
492                                         /* advance to the first entry in the circular buffer, and
493                                          * figure out the line number is of the first line in the
494                                          * buffer */
495                                         idx = curpos;
496                                         while (before_buf[idx] == NULL) {
497                                                 idx = (idx + 1) % lines_before;
498                                                 first_buf_entry_line_num++;
499                                         }
500
501                                         /* now print each line in the buffer, clearing them as we go */
502                                         while (before_buf[idx] != NULL) {
503                                                 print_line(before_buf[idx], before_buf_size[idx], first_buf_entry_line_num, '-');
504                                                 free(before_buf[idx]);
505                                                 before_buf[idx] = NULL;
506                                                 idx = (idx + 1) % lines_before;
507                                                 first_buf_entry_line_num++;
508                                         }
509                                 }
510
511                                 /* make a note that we need to print 'after' lines */
512                                 print_n_lines_after = lines_after;
513 #endif
514                                 if (option_mask32 & OPT_o) {
515                                         if (FGREP_FLAG) {
516                                                 /* -Fo just prints the pattern
517                                                  * (unless -v: -Fov doesnt print anything at all) */
518                                                 if (found)
519                                                         print_line(gl->pattern, strlen(gl->pattern), linenum, ':');
520                                         } else while (1) {
521                                                 unsigned start = gl->matched_range.rm_so;
522                                                 unsigned end = gl->matched_range.rm_eo;
523                                                 unsigned len = end - start;
524                                                 char old = line[end];
525                                                 line[end] = '\0';
526                                                 /* Empty match is not printed: try "echo test | grep -o ''" */
527                                                 if (len != 0)
528                                                         print_line(line + start, len, linenum, ':');
529                                                 if (old == '\0')
530                                                         break;
531                                                 line[end] = old;
532                                                 if (len == 0)
533                                                         end++;
534 #if !ENABLE_EXTRA_COMPAT
535                                                 if (regexec(&gl->compiled_regex, line + end,
536                                                                 1, &gl->matched_range, REG_NOTBOL) != 0)
537                                                         break;
538                                                 gl->matched_range.rm_so += end;
539                                                 gl->matched_range.rm_eo += end;
540 #else
541                                                 if (re_search(&gl->compiled_regex, line, line_len,
542                                                                 end, line_len - end,
543                                                                 &gl->matched_range) < 0)
544                                                         break;
545 #endif
546                                         }
547                                 } else {
548                                         print_line(line, line_len, linenum, ':');
549                                 }
550                         }
551                 }
552 #if ENABLE_FEATURE_GREP_CONTEXT
553                 else { /* no match */
554                         /* if we need to print some context lines after the last match, do so */
555                         if (print_n_lines_after) {
556                                 print_line(line, strlen(line), linenum, '-');
557                                 print_n_lines_after--;
558                         } else if (lines_before) {
559                                 /* Add the line to the circular 'before' buffer */
560                                 free(before_buf[curpos]);
561                                 before_buf[curpos] = line;
562                                 IF_EXTRA_COMPAT(before_buf_size[curpos] = line_len;)
563                                 curpos = (curpos + 1) % lines_before;
564                                 /* avoid free(line) - we took the line */
565                                 line = NULL;
566                         }
567                 }
568
569 #endif /* ENABLE_FEATURE_GREP_CONTEXT */
570 #if !ENABLE_EXTRA_COMPAT
571                 free(line);
572 #endif
573                 /* Did we print all context after last requested match? */
574                 if ((option_mask32 & OPT_m)
575                  && !print_n_lines_after
576                  && nmatches == max_matches
577                 ) {
578                         break;
579                 }
580         } /* while (read line) */
581
582         /* special-case file post-processing for options where we don't print line
583          * matches, just filenames and possibly match counts */
584
585         /* grep -c: print [filename:]count, even if count is zero */
586         if (PRINT_MATCH_COUNTS) {
587                 if (print_filename)
588                         printf("%s:", cur_file);
589                 printf("%d\n", nmatches);
590         }
591
592         /* grep -L: print just the filename */
593         if (PRINT_FILES_WITHOUT_MATCHES) {
594                 /* nmatches is zero, no need to check it:
595                  * we return 1 early if we detected a match
596                  * and PRINT_FILES_WITHOUT_MATCHES is set */
597                 puts(cur_file);
598         }
599
600         return nmatches;
601 }
602
603 #if ENABLE_FEATURE_CLEAN_UP
604 #define new_grep_list_data(p, m) add_grep_list_data(p, m)
605 static char *add_grep_list_data(char *pattern, int flg_used_mem)
606 #else
607 #define new_grep_list_data(p, m) add_grep_list_data(p)
608 static char *add_grep_list_data(char *pattern)
609 #endif
610 {
611         grep_list_data_t *gl = xzalloc(sizeof(*gl));
612         gl->pattern = pattern;
613 #if ENABLE_FEATURE_CLEAN_UP
614         gl->flg_mem_allocated_compiled = flg_used_mem;
615 #else
616         /*gl->flg_mem_allocated_compiled = 0;*/
617 #endif
618         return (char *)gl;
619 }
620
621 static void load_regexes_from_file(llist_t *fopt)
622 {
623         while (fopt) {
624                 char *line;
625                 FILE *fp;
626                 llist_t *cur = fopt;
627                 char *ffile = cur->data;
628
629                 fopt = cur->link;
630                 free(cur);
631                 fp = xfopen_stdin(ffile);
632                 while ((line = xmalloc_fgetline(fp)) != NULL) {
633                         llist_add_to(&pattern_head,
634                                 new_grep_list_data(line, ALLOCATED));
635                 }
636                 fclose_if_not_stdin(fp);
637         }
638 }
639
640 static int FAST_FUNC file_action_grep(const char *filename,
641                         struct stat *statbuf UNUSED_PARAM,
642                         void* matched,
643                         int depth UNUSED_PARAM)
644 {
645         FILE *file = fopen_for_read(filename);
646         if (file == NULL) {
647                 if (!SUPPRESS_ERR_MSGS)
648                         bb_simple_perror_msg(filename);
649                 open_errors = 1;
650                 return 0;
651         }
652         cur_file = filename;
653         *(int*)matched += grep_file(file);
654         fclose(file);
655         return 1;
656 }
657
658 static int grep_dir(const char *dir)
659 {
660         int matched = 0;
661         recursive_action(dir,
662                 /* recurse=yes */ ACTION_RECURSE |
663                 /* followLinks=command line only */ ACTION_FOLLOWLINKS_L0 |
664                 /* depthFirst=yes */ ACTION_DEPTHFIRST,
665                 /* fileAction= */ file_action_grep,
666                 /* dirAction= */ NULL,
667                 /* userData= */ &matched,
668                 /* depth= */ 0);
669         return matched;
670 }
671
672 int grep_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
673 int grep_main(int argc UNUSED_PARAM, char **argv)
674 {
675         FILE *file;
676         int matched;
677         llist_t *fopt = NULL;
678 #if ENABLE_FEATURE_GREP_CONTEXT
679         int Copt, opts;
680 #endif
681         INIT_G();
682
683         /* For grep, exitcode of 1 is "not found". Other errors are 2: */
684         xfunc_error_retval = 2;
685
686         /* do normal option parsing */
687 #if ENABLE_FEATURE_GREP_CONTEXT
688         /* -H unsets -h; -C unsets -A,-B; -e,-f are lists;
689          * -m,-A,-B,-C have numeric param */
690         opt_complementary = "H-h:C-AB";
691         opts = getopt32(argv,
692                 OPTSTR_GREP,
693                 &pattern_head, &fopt, &max_matches,
694                 &lines_after, &lines_before, &Copt);
695
696         if (opts & OPT_C) {
697                 /* -C unsets prev -A and -B, but following -A or -B
698                  * may override it */
699                 if (!(opts & OPT_A)) /* not overridden */
700                         lines_after = Copt;
701                 if (!(opts & OPT_B)) /* not overridden */
702                         lines_before = Copt;
703         }
704         /* sanity checks */
705         if (opts & (OPT_c|OPT_q|OPT_l|OPT_L)) {
706                 option_mask32 &= ~OPT_n;
707                 lines_before = 0;
708                 lines_after = 0;
709         } else if (lines_before > 0) {
710                 if (lines_before > INT_MAX / sizeof(long long))
711                         lines_before = INT_MAX / sizeof(long long);
712                 /* overflow in (lines_before * sizeof(x)) is prevented (above) */
713                 before_buf = xzalloc(lines_before * sizeof(before_buf[0]));
714                 IF_EXTRA_COMPAT(before_buf_size = xzalloc(lines_before * sizeof(before_buf_size[0]));)
715         }
716 #else
717         /* with auto sanity checks */
718         /* -H unsets -h; -c,-q or -l unset -n; -e,-f are lists; -m N */
719         opt_complementary = "H-h:c-n:q-n:l-n:";
720         getopt32(argv, OPTSTR_GREP,
721                 &pattern_head, &fopt, &max_matches);
722 #endif
723         invert_search = ((option_mask32 & OPT_v) != 0); /* 0 | 1 */
724
725         {       /* convert char **argv to grep_list_data_t */
726                 llist_t *cur;
727                 for (cur = pattern_head; cur; cur = cur->link)
728                         cur->data = new_grep_list_data(cur->data, 0);
729         }
730         if (option_mask32 & OPT_f) {
731                 load_regexes_from_file(fopt);
732                 if (!pattern_head) { /* -f EMPTY_FILE? */
733                         /* GNU grep treats it as "nothing matches" */
734                         llist_add_to(&pattern_head, new_grep_list_data((char*) "", 0));
735                         invert_search ^= 1;
736                 }
737         }
738
739         if (ENABLE_FGREP && applet_name[0] == 'f')
740                 option_mask32 |= OPT_F;
741
742 #if !ENABLE_EXTRA_COMPAT
743         if (!(option_mask32 & (OPT_o | OPT_w | OPT_x)))
744                 reflags = REG_NOSUB;
745 #endif
746
747         if ((ENABLE_EGREP && applet_name[0] == 'e')
748          || (option_mask32 & OPT_E)
749         ) {
750                 reflags |= REG_EXTENDED;
751         }
752 #if ENABLE_EXTRA_COMPAT
753         else {
754                 reflags = RE_SYNTAX_GREP;
755         }
756 #endif
757
758         if (option_mask32 & OPT_i) {
759 #if !ENABLE_EXTRA_COMPAT
760                 reflags |= REG_ICASE;
761 #else
762                 int i;
763                 case_fold = xmalloc(256);
764                 for (i = 0; i < 256; i++)
765                         case_fold[i] = (unsigned char)i;
766                 for (i = 'a'; i <= 'z'; i++)
767                         case_fold[i] = (unsigned char)(i - ('a' - 'A'));
768 #endif
769         }
770
771         argv += optind;
772
773         /* if we didn't get a pattern from -e and no command file was specified,
774          * first parameter should be the pattern. no pattern, no worky */
775         if (pattern_head == NULL) {
776                 char *pattern;
777                 if (*argv == NULL)
778                         bb_show_usage();
779                 pattern = new_grep_list_data(*argv++, 0);
780                 llist_add_to(&pattern_head, pattern);
781         }
782
783         /* argv[0..(argc-1)] should be names of file to grep through. If
784          * there is more than one file to grep, we will print the filenames. */
785         if (argv[0] && argv[1])
786                 print_filename = 1;
787         /* -H / -h of course override */
788         if (option_mask32 & OPT_H)
789                 print_filename = 1;
790         if (option_mask32 & OPT_h)
791                 print_filename = 0;
792
793         /* If no files were specified, or '-' was specified, take input from
794          * stdin. Otherwise, we grep through all the files specified. */
795         matched = 0;
796         do {
797                 cur_file = *argv;
798                 file = stdin;
799                 if (!cur_file || LONE_DASH(cur_file)) {
800                         cur_file = "(standard input)";
801                 } else {
802                         if (option_mask32 & OPT_r) {
803                                 struct stat st;
804                                 if (stat(cur_file, &st) == 0 && S_ISDIR(st.st_mode)) {
805                                         if (!(option_mask32 & OPT_h))
806                                                 print_filename = 1;
807                                         matched += grep_dir(cur_file);
808                                         goto grep_done;
809                                 }
810                         }
811                         /* else: fopen(dir) will succeed, but reading won't */
812                         file = fopen_for_read(cur_file);
813                         if (file == NULL) {
814                                 if (!SUPPRESS_ERR_MSGS)
815                                         bb_simple_perror_msg(cur_file);
816                                 open_errors = 1;
817                                 continue;
818                         }
819                 }
820                 matched += grep_file(file);
821                 fclose_if_not_stdin(file);
822  grep_done: ;
823         } while (*argv && *++argv);
824
825         /* destroy all the elments in the pattern list */
826         if (ENABLE_FEATURE_CLEAN_UP) {
827                 while (pattern_head) {
828                         llist_t *pattern_head_ptr = pattern_head;
829                         grep_list_data_t *gl = (grep_list_data_t *)pattern_head_ptr->data;
830
831                         pattern_head = pattern_head->link;
832                         if (gl->flg_mem_allocated_compiled & ALLOCATED)
833                                 free(gl->pattern);
834                         if (gl->flg_mem_allocated_compiled & COMPILED)
835                                 regfree(&gl->compiled_regex);
836                         free(gl);
837                         free(pattern_head_ptr);
838                 }
839         }
840         /* 0 = success, 1 = failed, 2 = error */
841         if (open_errors)
842                 return 2;
843         return !matched; /* invert return value: 0 = success, 1 = failed */
844 }