From: Denys Vlasenko Date: Thu, 27 Oct 2016 21:51:19 +0000 (+0200) Subject: ash: [JOBS] Fix dowait signal race X-Git-Tag: 1_26_0~104 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=458c1f218bb6a6bd0325f0b0cedea5ab0980dbc3;p=oweals%2Fbusybox.git ash: [JOBS] Fix dowait signal race Upstream commit: Date: Sun, 22 Feb 2009 18:10:01 +0800 [JOBS] Fix dowait signal race This test program by Alexey Gladkov can cause dash to enter an infinite loop in waitcmd. #!/bin/dash trap "echo TRAP" USR1 stub() { echo ">>> STUB $1" >&2 sleep $1 echo "<<< STUB $1" >&2 kill -USR1 $$ } stub 3 & stub 2 & until { echo "###"; wait; } do echo "*** $?" done The problem is that if we get a signal after the wait3 system call has returned but before we get to INTON in dowait, then we can jump back up to the top and lose the exit status. So if we then wait for the job that has just exited, then it'll stay there forever. I made the original change that caused this bug to fix pretty much the same bug but in the opposite direction. That is, if we get a signal after we enter wait3 but before we hit the kernel then it too can cause the wait to go on forever (assuming the child doesn't exit). In fact this is pretty much exactly the scenario that you'll find in glibc's documentation on pause(). The solution is given there too, in the form of sigsuspend, which is the only way to do the check and wait atomically. So this patch fixes Alexey's race without reintroducing the old bug by converting the blocking wait3 to a sigsuspend. In order to do this we need to set a signal handler for SIGCHLD, so the code has been modified to always do that. Signed-off-by: Herbert Xu I failed to reproduce the bug (it requires precise timing), but it seems real. function old new delta dowait 284 463 +179 setsignal 301 326 +25 signal_handler 59 76 +17 ash_main 1481 1487 +6 localcmd 350 348 -2 ------------------------------------------------------------------------------ (add/remove: 0/0 grow/shrink: 4/1 up/down: 227/-2) Total: 225 bytes Signed-off-by: Denys Vlasenko --- diff --git a/shell/ash.c b/shell/ash.c index b060d227a..1ef02b8be 100644 --- a/shell/ash.c +++ b/shell/ash.c @@ -295,6 +295,7 @@ struct globals_misc { volatile int suppress_int; /* counter */ volatile /*sig_atomic_t*/ smallint pending_int; /* 1 = got SIGINT */ + volatile /*sig_atomic_t*/ smallint got_sigchld; /* 1 = got SIGCHLD */ /* last pending signal */ volatile /*sig_atomic_t*/ smallint pending_sig; smallint exception_type; /* kind of exception (0..5) */ @@ -370,6 +371,7 @@ extern struct globals_misc *const ash_ptr_to_globals_misc; #define exception_type (G_misc.exception_type ) #define suppress_int (G_misc.suppress_int ) #define pending_int (G_misc.pending_int ) +#define got_sigchld (G_misc.got_sigchld ) #define pending_sig (G_misc.pending_sig ) #define isloginsh (G_misc.isloginsh ) #define nullstr (G_misc.nullstr ) @@ -3388,7 +3390,14 @@ ignoresig(int signo) static void signal_handler(int signo) { + if (signo == SIGCHLD) { + got_sigchld = 1; + if (!trap[SIGCHLD]) + return; + } + gotsig[signo - 1] = 1; + pending_sig = signo; if (signo == SIGINT && !trap[SIGINT]) { if (!suppress_int) { @@ -3396,8 +3405,6 @@ signal_handler(int signo) raise_interrupt(); /* does not return */ } pending_int = 1; - } else { - pending_sig = signo; } } @@ -3455,6 +3462,9 @@ setsignal(int signo) //whereas we have to restore it to what shell got on entry //from the parent. See comment above + if (signo == SIGCHLD) + new_act = S_CATCH; + t = &sigmode[signo - 1]; cur_act = *t; if (cur_act == 0) { @@ -3507,6 +3517,7 @@ setsignal(int signo) /* mode flags for dowait */ #define DOWAIT_NONBLOCK 0 #define DOWAIT_BLOCK 1 +#define DOWAIT_BLOCK_OR_SIG 2 #if JOBS /* pgrp of shell on invocation */ @@ -3927,6 +3938,33 @@ sprint_status48(char *s, int status, int sigonly) return col; } +static int +wait_block_or_sig(int *status, int wait_flags) +{ + sigset_t mask; + int pid; + + do { + /* Poll all children for changes in their state */ + got_sigchld = 0; + pid = waitpid(-1, status, wait_flags | WNOHANG); + if (pid != 0) + break; /* Error (e.g. EINTR) or pid */ + + /* No child is ready. Sleep until interesting signal is received */ + sigfillset(&mask); + sigprocmask(SIG_SETMASK, &mask, &mask); + while (!got_sigchld && !pending_sig) + sigsuspend(&mask); + sigprocmask(SIG_SETMASK, &mask, NULL); + + /* If it was SIGCHLD, poll children again */ + } while (got_sigchld); + + return pid; +} + + static int dowait(int block, struct job *job) { @@ -3934,26 +3972,43 @@ dowait(int block, struct job *job) int pid; int status; struct job *jp; - struct job *thisjob; + struct job *thisjob = NULL; TRACE(("dowait(0x%x) called\n", block)); - /* Do a wait system call. If job control is compiled in, we accept - * stopped processes. - * NB: _not_ safe_waitpid, we need to detect EINTR. - */ wait_flags = 0; if (block == DOWAIT_NONBLOCK) wait_flags = WNOHANG; + /* If job control is compiled in, we accept stopped processes too. */ if (doing_jobctl) wait_flags |= WUNTRACED; - pid = waitpid(-1, &status, wait_flags); + + /* It's wrong to call waitpid() outside of INT_OFF region: + * signal can arrive just after syscall return and handler can + * longjmp away, losing stop/exit notification processing. + * Thus, for "jobs" builtin, and for waiting for a fg job, + * we call waitpid() (blocking or non-blocking) inside INT_OFF. + * + * However, for "wait" builtin it is wrong to simply call waitpid() + * in INT_OFF region: "wait" needs to wait for any running job + * to change state, but should exit on any trap too. + * In INT_OFF region, a signal just before syscall entry can set + * pending_sig valiables, but we can't check them, and we would + * either enter a sleeping waitpid() (BUG), or need to busy-loop. + * Because of this, we run inside INT_OFF, but use a special routine + * which combines waitpid() and sigsuspend(). + */ + INT_OFF; + if (block == DOWAIT_BLOCK_OR_SIG) + pid = wait_block_or_sig(&status, wait_flags); + else + /* NB: _not_ safe_waitpid, we need to detect EINTR. */ + pid = waitpid(-1, &status, wait_flags); TRACE(("wait returns pid=%d, status=0x%x, errno=%d(%s)\n", pid, status, errno, strerror(errno))); if (pid <= 0) - return pid; + goto out; - INT_OFF; thisjob = NULL; for (jp = curjob; jp; jp = jp->prev_job) { int jobstate; @@ -4217,7 +4272,7 @@ waitcmd(int argc UNUSED_PARAM, char **argv) * with an exit status greater than 128, immediately after which * the trap is executed." */ - dowait(DOWAIT_BLOCK, NULL); ///DOWAIT_WAITCMD + dowait(DOWAIT_BLOCK_OR_SIG, NULL); /* if child sends us a signal *and immediately exits*, * dowait() returns pid > 0. Check this case, * not "if (dowait() < 0)"! @@ -4244,7 +4299,7 @@ waitcmd(int argc UNUSED_PARAM, char **argv) } /* loop until process terminated or stopped */ while (job->state == JOBRUNNING) { - dowait(DOWAIT_BLOCK, NULL); ///DOWAIT_WAITCMD + dowait(DOWAIT_BLOCK_OR_SIG, NULL); if (pending_sig) goto sigout; } @@ -13113,7 +13168,9 @@ init(void) /* we will never free this */ basepf.next_to_pgetc = basepf.buf = ckmalloc(IBUFSIZ); - signal(SIGCHLD, SIG_DFL); + sigmode[SIGCHLD - 1] = S_DFL; + setsignal(SIGCHLD); + /* bash re-enables SIGHUP which is SIG_IGNed on entry. * Try: "trap '' HUP; bash; echo RET" and type "kill -HUP $$" */