patch from njk: make x86_64 __uniclone branchless.
authorRich Felker <dalias@aerifal.cx>
Wed, 16 Feb 2011 14:49:54 +0000 (09:49 -0500)
committerRich Felker <dalias@aerifal.cx>
Wed, 16 Feb 2011 14:49:54 +0000 (09:49 -0500)
src/thread/x86_64/clone.s

index 5141005102333472e70333c05c0e8abf76d8f2f4..3cefd93351ed60092c0baee84dcd5725e0fd131d 100644 (file)
@@ -4,6 +4,7 @@
 .type   __uniclone,%function
 /* rdi = child_stack, rsi = start, rdx = pthread_struct */
 __uniclone:
+        subq    $8,%rsp         /* pad parent stack to prevent branch later */
         subq    $16,%rdi        /* grow child_stack */
         mov     %rsi,8(%rdi)    /* push start onto child_stack as return ptr */
         mov     %rdx,0(%rdi)    /* push pthread_struct onto child_stack */
@@ -13,10 +14,9 @@ __uniclone:
         movl    $56,%eax        /* clone syscall number */
         movl    $0x7d0f00,%edi  /* rdi = flags */
         mov     %r10,%rdx       /* rdx = parent_id */
-       syscall                 /* clone(flags, child_stack, parent_id,
-                                *       child_id, tls) */
-       test    %rax,%rax
-       jnz     1f              /* if we're in the parent -> goto 1f */
-        pop     %rdi            /* restore pthread_struct from child stack */
-1:      ret
+        syscall                 /* clone(flags, child_stack, parent_id,
+                                 *       child_id, tls) */
+        pop     %rdi            /* child stack: restore pthread_struct
+                                 * parent stack: undo rsp displacement */
+        ret
 .size __uniclone,.-__uniclone