sed: implement ",+N" range end
authorDenys Vlasenko <vda.linux@googlemail.com>
Fri, 17 Apr 2015 12:24:55 +0000 (14:24 +0200)
committerDenys Vlasenko <vda.linux@googlemail.com>
Fri, 17 Apr 2015 12:24:55 +0000 (14:24 +0200)
function                                             old     new   delta
add_cmd                                             1115    1173     +58
process_files                                       2226    2253     +27
sed_main                                             696     702      +6
------------------------------------------------------------------------------
(add/remove: 0/0 grow/shrink: 3/0 up/down: 91/0)               Total: 91 bytes

Signed-off-by: Denys Vlasenko <vda.linux@googlemail.com>
editors/sed.c
testsuite/sed.tests

index 2c64ad500918473c8fa76d2d87ddf8d2032fbbca..7bbf820d8571627ef127d771cb4e67aac4938580 100644 (file)
@@ -53,6 +53,7 @@
  * Reference
  * http://www.opengroup.org/onlinepubs/007904975/utilities/sed.html
  * http://pubs.opengroup.org/onlinepubs/9699919799/utilities/sed.html
+ * http://sed.sourceforge.net/sedfaq3.html
  */
 
 //config:config SED
@@ -109,7 +110,8 @@ typedef struct sed_cmd_s {
        regex_t *sub_match;     /* For 's/sub_match/string/' */
        int beg_line;           /* 'sed 1p'   0 == apply commands to all lines */
        int beg_line_orig;      /* copy of the above, needed for -i */
-       int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($) */
+       int end_line;           /* 'sed 1,3p' 0 == one line only. -1 = last line ($). -2-N = +N */
+       int end_line_orig;
 
        FILE *sw_file;          /* File (sw) command writes to, -1 for none. */
        char *string;           /* Data string for (saicytb) commands. */
@@ -640,10 +642,29 @@ static void add_cmd(const char *cmdstr)
                        int idx;
 
                        cmdstr++;
-                       idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
-                       if (!idx)
+                       if (*cmdstr == '+' && isdigit(cmdstr[1])) {
+                               /* http://sed.sourceforge.net/sedfaq3.html#s3.3
+                                * Under GNU sed 3.02+, ssed, and sed15+, <address2>
+                                * may also be a notation of the form +num,
+                                * indicating the next num lines after <address1> is
+                                * matched.
+                                * GNU sed 4.2.1 accepts even "+" (meaning "+0").
+                                * We don't (we check for isdigit, see above), think
+                                * about the "+-3" case.
+                                */
+                               char *end;
+                               /* code is smaller compared to using &cmdstr here: */
+                               idx = strtol(cmdstr+1, &end, 10);
+                               sed_cmd->end_line = -2 - idx;
+                               cmdstr = end;
+                       } else {
+                               idx = get_address(cmdstr, &sed_cmd->end_line, &sed_cmd->end_match);
+                               cmdstr += idx;
+                               idx--; /* if 0, trigger error check below */
+                       }
+                       if (idx < 0)
                                bb_error_msg_and_die("no address after comma");
-                       cmdstr += idx;
+                       sed_cmd->end_line_orig = sed_cmd->end_line;
                }
 
                /* skip whitespace before the command */
@@ -1089,10 +1110,19 @@ static void process_files(void)
                /* Is this line the end of the current match? */
 
                if (matched) {
+                       if (sed_cmd->end_line <= -2) {
+                               /* address2 is +N, i.e. N lines from beg_line */
+                               sed_cmd->end_line = linenum + (-sed_cmd->end_line - 2);
+                       }
                        /* once matched, "n,xxx" range is dead, disabling it */
                        if (sed_cmd->beg_line > 0) {
                                sed_cmd->beg_line = -2;
                        }
+                       dbg("end1:%d", sed_cmd->end_line ? sed_cmd->end_line == -1
+                                               ? !next_line : (sed_cmd->end_line <= linenum)
+                                       : !sed_cmd->end_match);
+                       dbg("end2:%d", sed_cmd->end_match && old_matched
+                                       && !regexec(sed_cmd->end_match,pattern_space, 0, NULL, 0));
                        sed_cmd->in_match = !(
                                /* has the ending line come, or is this a single address command? */
                                (sed_cmd->end_line
@@ -1551,9 +1581,10 @@ int sed_main(int argc UNUSED_PARAM, char **argv)
                        free(G.outname);
                        G.outname = NULL;
 
-                       /* Re-enable disabled range matches */
+                       /* Fix disabled range matches and mangled ",+N" ranges */
                        for (sed_cmd = G.sed_cmd_head; sed_cmd; sed_cmd = sed_cmd->next) {
                                sed_cmd->beg_line = sed_cmd->beg_line_orig;
+                               sed_cmd->end_line = sed_cmd->end_line_orig;
                        }
                }
                /* Here, to handle "sed 'cmds' nonexistent_file" case we did:
index 19f2915ced55e4152868e35b1df13a56122fb1c0..34479e55f090f6bae5c86b0909ff1de6fec70346 100755 (executable)
@@ -333,6 +333,38 @@ testing "sed s///NUM test" \
        "sed -e 's/a/b/2; s/a/c/g'" \
        "cb\n" "" "aa\n"
 
+testing "sed /regex/,N{...} addresses work" \
+       "sed /^2/,2{d}" \
+       "1\n3\n4\n5\n" \
+       "" \
+       "1\n2\n3\n4\n5\n"
+
+testing "sed /regex/,+N{...} addresses work" \
+       "sed /^2/,+2{d}" \
+       "1\n5\n" \
+       "" \
+       "1\n2\n3\n4\n5\n"
+
+testing "sed /regex/,+N{...} -i works" \
+       "cat - >input2; sed /^4/,+2{d} -i input input2; echo \$?; cat input input2; rm input2" \
+       "0\n""1\n2\n3\n7\n8\n""1\n2\n7\n8\n" \
+       "1\n2\n3\n4\n5\n6\n7\n8\n" \
+       "1\n2\n4\n5\n6\n7\n8\n" \
+
+# GNU sed 4.2.1 would also accept "/^4/,+{d}" with the same meaning, we don't
+testing "sed /regex/,+0{...} -i works" \
+       "cat - >input2; sed /^4/,+0{d} -i input input2; echo \$?; cat input input2; rm input2" \
+       "0\n""1\n2\n3\n5\n6\n7\n8\n""1\n2\n5\n6\n7\n8\n" \
+       "1\n2\n3\n4\n5\n6\n7\n8\n" \
+       "1\n2\n4\n5\n6\n7\n8\n" \
+
+# GNU sed 4.2.1 would also accept "/^4/,+d" with the same meaning, we don't
+testing "sed /regex/,+0<cmd> -i works" \
+       "cat - >input2; sed /^4/,+0d -i input input2; echo \$?; cat input input2; rm input2" \
+       "0\n""1\n2\n3\n5\n6\n7\n8\n""1\n2\n5\n6\n7\n8\n" \
+       "1\n2\n3\n4\n5\n6\n7\n8\n" \
+       "1\n2\n4\n5\n6\n7\n8\n" \
+
 # testing "description" "commands" "result" "infile" "stdin"
 
 exit $FAILCOUNT