am cd834618: am 63b14755: Merge "libc/x86: ensure the stack 16-byte aligned when tasks created"

* commit 'cd834618c4752b61d54ff4005a8baa8219b822e4':
  libc/x86: ensure the stack 16-byte aligned when tasks created
diff --git a/libc/arch-x86/bionic/clone.S b/libc/arch-x86/bionic/clone.S
index 8abb7c8..352d23c 100644
--- a/libc/arch-x86/bionic/clone.S
+++ b/libc/arch-x86/bionic/clone.S
@@ -13,16 +13,21 @@
         pushl   %ebx
         pushl   %ecx
         movl    16(%esp), %ecx
-        movl    20(%esp), %ebx
+
+        # save tls
+        movl    %ecx, %ebx
+        # 16-byte alignment on child stack
+        andl    $~15, %ecx
 
         # insert arguments onto the child stack
         movl    12(%esp), %eax
-        movl    %eax, -12(%ecx)
+        movl    %eax, -16(%ecx)
         movl    24(%esp), %eax
-        movl    %eax, -8(%ecx)
-        movl    %ecx, -4(%ecx)
+        movl    %eax, -12(%ecx)
+        movl    %ebx, -8(%ecx)
 
         subl    $16, %ecx
+        movl    20(%esp), %ebx
         movl    $__NR_clone, %eax
         int     $0x80
         test    %eax, %eax
@@ -40,7 +45,7 @@
         # we're in the child thread now, call __thread_entry
         # with the appropriate arguments on the child stack
         # we already placed most of them
-        jmp     __thread_entry
+        call    __thread_entry
         hlt
 
 2:
diff --git a/libc/arch-x86/bionic/crtbegin_dynamic.S b/libc/arch-x86/bionic/crtbegin_dynamic.S
index 9ba0d2f..177244b 100644
--- a/libc/arch-x86/bionic/crtbegin_dynamic.S
+++ b/libc/arch-x86/bionic/crtbegin_dynamic.S
@@ -51,6 +51,8 @@
 #
 _start:	
         mov     %esp, %eax
+        # before push arguments, align the stack to a 16 byte boundary
+        andl    $~15, %esp
         mov     $1f, %edx
         pushl   %edx
         mov     $0f, %edx
diff --git a/libc/arch-x86/bionic/crtbegin_static.S b/libc/arch-x86/bionic/crtbegin_static.S
index 8e70330..4fffecd 100644
--- a/libc/arch-x86/bionic/crtbegin_static.S
+++ b/libc/arch-x86/bionic/crtbegin_static.S
@@ -51,6 +51,8 @@
 #
 _start:	
         mov     %esp, %eax
+        # before push arguments, align the stack to a 16 byte boundary
+        andl    $~15, %esp
         mov     $1f, %edx
         pushl   %edx
         mov     $0f, %edx