Misc compiler-rt fixes.  Clarify neg implementations to show what is 
actually happening.  Fix mod implementation so it doesn't get 
optimized to a recursive call.  Make x86-32 non-SSE2 shift 
implementation use shld/shrd instead of emulating it (the only x86 processor
where the emulation might be remotely close to justifiable is the Pentium 4).



git-svn-id: https://llvm.org/svn/llvm-project/compiler-rt/trunk@74756 91177308-0d34-0410-b5e6-96231b3b80d8
diff --git a/lib/i386/ashldi3.S b/lib/i386/ashldi3.S
index 3de2dfc..0d9aba4 100644
--- a/lib/i386/ashldi3.S
+++ b/lib/i386/ashldi3.S
@@ -40,23 +40,14 @@
 	movl	  12(%esp),		%ecx	// Load count
 	movl	   8(%esp),		%edx	// Load high
 	movl	   4(%esp),		%eax	// Load low
-	
+
 	testl		$0x20,		%ecx	// If count >= 32
-	jnz			2f					//    goto 2
-	testl		$0x1f,		%ecx	// If count == 0
-	jz			1f					//    goto 1
-	
-	pushl		%ebx
-	movl		%eax,		%ebx	// copy low
+	jnz		1f			//    goto 1
+	shldl		%cl, %eax,	%edx	// left shift high by count
 	shll		%cl,		%eax	// left shift low by count
-	shll		%cl,		%edx	// left shift high by count
-	neg			%cl
-	shrl		%cl,		%ebx	// right shift low by 32 - count
-	orl			%ebx,		%edx	// or the result into the high word
-	popl		%ebx
-1:	ret
-	
-2:	movl		%eax,		%edx	// Move low to high
+	ret
+
+1:	movl		%eax,		%edx	// Move low to high
 	xorl		%eax,		%eax	// clear low
 	shll		%cl,		%edx	// shift high by count - 32
 	ret
diff --git a/lib/i386/ashrdi3.S b/lib/i386/ashrdi3.S
index db01f21..2168014 100644
--- a/lib/i386/ashrdi3.S
+++ b/lib/i386/ashrdi3.S
@@ -52,21 +52,13 @@
 	movl	   4(%esp),		%eax	// Load low
 	
 	testl		$0x20,		%ecx	// If count >= 32
-	jnz			2f					//    goto 2
-	testl		$0x1f,		%ecx	// If count == 0
-	jz			1f					//    goto 1
-	
-	pushl		%ebx
-	movl		%edx,		%ebx	// copy high
-	shrl		%cl,		%eax	// right shift low by count
+	jnz			1f					//    goto 1
+
+	shrdl		%cl, %edx,	%eax	// right shift low by count
 	sarl		%cl,		%edx	// right shift high by count
-	neg			%cl
-	shll		%cl,		%ebx	// left shift high by 32 - count
-	orl			%ebx,		%eax	// or the result into the low word
-	popl		%ebx
-1:	ret
+	ret
 	
-2:	movl		%edx,		%eax	// Move high to low
+1:	movl		%edx,		%eax	// Move high to low
 	sarl		$31,		%edx	// clear high
 	sarl		%cl,		%eax	// shift low by count - 32
 	ret
diff --git a/lib/i386/lshrdi3.S b/lib/i386/lshrdi3.S
index 5992c21..a70113f 100644
--- a/lib/i386/lshrdi3.S
+++ b/lib/i386/lshrdi3.S
@@ -42,21 +42,13 @@
 	movl	   4(%esp),		%eax	// Load low
 	
 	testl		$0x20,		%ecx	// If count >= 32
-	jnz			2f					//    goto 2
-	testl		$0x1f,		%ecx	// If count == 0
-	jz			1f					//    goto 1
-	
-	pushl		%ebx
-	movl		%edx,		%ebx	// copy high
-	shrl		%cl,		%eax	// right shift low by count
+	jnz			1f					//    goto 1
+
+	shrdl		%cl, %edx,	%eax	// right shift low by count
 	shrl		%cl,		%edx	// right shift high by count
-	neg			%cl
-	shll		%cl,		%ebx	// left shift high by 32 - count
-	orl			%ebx,		%eax	// or the result into the low word
-	popl		%ebx
-1:	ret
+	ret
 	
-2:	movl		%edx,		%eax	// Move high to low
+1:	movl		%edx,		%eax	// Move high to low
 	xorl		%edx,		%edx	// clear high
 	shrl		%cl,		%eax	// shift low by count - 32
 	ret
diff --git a/lib/negdi2.c b/lib/negdi2.c
index db2d865..2d5cd63 100644
--- a/lib/negdi2.c
+++ b/lib/negdi2.c
@@ -18,5 +18,7 @@
 di_int
 __negdi2(di_int a)
 {
-    return ~a + 1;
+    // Note: this routine is here for API compatibility; any sane compiler
+    // should expand it inline.
+    return -a;
 }
diff --git a/lib/negti2.c b/lib/negti2.c
index c1c8a12..4244c8b 100644
--- a/lib/negti2.c
+++ b/lib/negti2.c
@@ -20,7 +20,9 @@
 ti_int
 __negti2(ti_int a)
 {
-    return ~a + 1;
+    // Note: this routine is here for API compatibility; any sane compiler
+    // should expand it inline.
+    return -a;
 }
 
 #endif
diff --git a/lib/umodsi3.c b/lib/umodsi3.c
index 06ab39a..115dc74 100644
--- a/lib/umodsi3.c
+++ b/lib/umodsi3.c
@@ -15,8 +15,10 @@
 
 // Returns: a % b
 
+su_int __udivsi3(su_int a, su_int b);
+
 su_int
 __umodsi3(su_int a, su_int b)
 {
-    return a - (a / b) * b;
+    return a - __udivsi3(a, b) * b;
 }