coregrind/vg_helpers.S - fp2-dev/platform/external/valgrind - Gitiles


 ##--------------------------------------------------------------------##
 ##--- Support routines for the JITter output.                      ---##
 ##---                                                 vg_helpers.S ---##
 ##--------------------------------------------------------------------##

 /*
   This file is part of Valgrind, an x86 protected-mode emulator
   designed for debugging and profiling binaries on x86-Unixes.

   Copyright (C) 2000-2002 Julian Seward
      jseward@acm.org
      Julian_Seward@muraroa.demon.co.uk

   This program is free software; you can redistribute it and/or
   modify it under the terms of the GNU General Public License as
   published by the Free Software Foundation; either version 2 of the
   License, or (at your option) any later version.

   This program is distributed in the hope that it will be useful, but
   WITHOUT ANY WARRANTY; without even the implied warranty of
   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
   General Public License for more details.

   You should have received a copy of the GNU General Public License
   along with this program; if not, write to the Free Software
   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
   02111-1307, USA.

   The GNU General Public License is contained in the file LICENSE.
 */

 #include "vg_constants.h"


 /* Various helper routines, for instructions which are just too
    darn tedious for the JITter to output code in-line:

 	* integer division
 	* integer multiplication
         * setting and getting obscure eflags
 	* double-length shifts

    All routines use a standard calling convention designed for
    calling from translations, in which the incoming args are
    underneath the return address, the callee saves _all_ registers,
    and the incoming parameters can be modified, to return results.
 */


 .global VG_(helper_value_check0_fail)
 VG_(helper_value_check0_fail):
 	pushal
 	call	VG_(helperc_value_check0_fail)
 	popal
 	ret

 .global VG_(helper_value_check1_fail)
 VG_(helper_value_check1_fail):
 	pushal
 	call	VG_(helperc_value_check1_fail)
 	popal
 	ret

 .global VG_(helper_value_check2_fail)
 VG_(helper_value_check2_fail):
 	pushal
 	call	VG_(helperc_value_check2_fail)
 	popal
 	ret

 .global VG_(helper_value_check4_fail)
 VG_(helper_value_check4_fail):
 	pushal
 	call	VG_(helperc_value_check4_fail)
 	popal
 	ret


 /* Do a original-code-write check for the address in %ebp. */
 .global VG_(helper_smc_check4)
 VG_(helper_smc_check4):
 #if VG_SMC_FASTCHECK_IN_C

 	# save the live regs
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx
 	pushl	%edx
 	pushl	%esi
 	pushl	%edi

 	pushl	%ebp
 	call	VG_(smc_check4)
 	addl	$4, %esp

 	popl	%edi
 	popl	%esi
 	popl	%edx
 	popl	%ecx
 	popl	%ebx
 	popl	%eax

 	ret
 #else
 	incl	VG_(smc_total_check4s)
 	pushl	%ebp
 	shrl	$VG_SMC_CACHE_SHIFT, %ebp
 	andl	$VG_SMC_CACHE_MASK, %ebp
 	cmpb	$0, VG_(smc_cache)(%ebp)
 	jnz	vg_smc_cache_failure
 	addl	$4, %esp
 	ret
       vg_smc_cache_failure:
 	popl	%ebp
 	pushal
 	pushl	%ebp
 	call	VG_(smc_check4)
 	addl	$4, %esp
 	popal
 	ret
 #endif


 /* Fetch the time-stamp-ctr reg.
    On entry:
 	dummy, replaced by %EAX value
 	dummy, replaced by %EDX value
 	RA   <- %esp
 */
 .global VG_(helper_RDTSC)
 VG_(helper_RDTSC):
 	pushl	%eax
 	pushl	%edx
 	rdtsc
 	movl	%edx, 12(%esp)
 	movl	%eax, 16(%esp)
 	popl	%edx
 	popl	%eax
 	ret


 /* Do the CPUID instruction.
    On entry:
 	dummy, replaced by %EAX value
 	dummy, replaced by %EBX value
 	dummy, replaced by %ECX value
 	dummy, replaced by %EDX value
 	RA   <- %esp

    As emulating a real CPUID is kinda hard, as it
    has to return different values depending on EAX,
    we just pretend to not support CPUID at all until
    it becomes a problem. This will for sure disable
    all MMX / 3dnow checks so they don't bother us
    with code we don't understand. (Dirk <dirk@kde.org>)

    http://www.sandpile.org/ia32/cpuid.htm

    (Later: we instead pretend to be like Werner's P54C P133, that is
     an original pre-MMX Pentium).
    <werner> cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
    <werner> cpuid words (1): 0x52b 0x0 0x0 0x1bf
 */
 .global VG_(helper_CPUID)
 VG_(helper_CPUID):
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx
 	pushl	%edx
 	movl	32(%esp), %eax
 /*
 	cpuid
 */
 /*
         xor     %eax,%eax
         xor     %ebx,%ebx
         xor     %ecx,%ecx
         xor     %edx,%edx
 */
 	cmpl	$0, %eax
 	jz	cpuid__0
 	movl	$0x52b, %eax
 	movl	$0x0,   %ebx
 	movl	$0x0,   %ecx
 	movl	$0x1bf, %edx
 	jmp	cpuid__99
 cpuid__0:
 	movl	$0x1,        %eax
 	movl	$0x756e6547, %ebx
 	movl	$0x6c65746e, %ecx
 	movl	$0x49656e69, %edx
 cpuid__99:

 	movl	%edx, 20(%esp)
 	movl	%ecx, 24(%esp)
 	movl	%ebx, 28(%esp)
 	movl	%eax, 32(%esp)
 	popl	%edx
 	popl	%ecx
 	popl	%ebx
 	popl	%eax
 	ret


 /* Fetch the FPU status register.
    On entry:
 	dummy, replaced by result
 	RA   <- %esp
 */
 .global VG_(helper_fstsw_AX)
 VG_(helper_fstsw_AX):
 	pushl	%eax
 	pushl	%esi
 	movl	VGOFF_(m_fpustate), %esi
 	frstor	(%ebp, %esi, 4)
 	fstsw	%ax
 	popl	%esi
 	movw	%ax, 8(%esp)
 	popl	%eax
 	ret


 /* Copy %ah into %eflags.
    On entry:
 	value of %eax
 	RA   <- %esp
 */
 .global VG_(helper_SAHF)
 VG_(helper_SAHF):
 	pushl	%eax
 	movl	8(%esp), %eax
 	sahf
 	popl	%eax
 	ret


 /* Do %al = DAS(%al).  Note that the passed param has %AL as the least
    significant 8 bits, since it was generated with GETB %AL,
    some-temp.  Fortunately %al is the least significant 8 bits of
    %eax anyway, which is why it's safe to work with %eax as a
    whole.

    On entry:
 	value of %eax
 	RA   <- %esp
 */
 .global VG_(helper_DAS)
 VG_(helper_DAS):
 	pushl	%eax
 	movl	8(%esp), %eax
 	das
  	movl	%eax, 8(%esp)
 	popl	%eax
 	ret


 /* Similarly, do %al = DAA(%al). */
 .global VG_(helper_DAA)
 VG_(helper_DAA):
        pushl   %eax
        movl    8(%esp), %eax
        daa
        movl    %eax, 8(%esp)
        popl    %eax
        ret


 /* Bit scan forwards/reverse.  Sets flags (??).
    On entry:
 	value, replaced by result
 	RA   <- %esp
 */
 .global VG_(helper_bsr)
 VG_(helper_bsr):
 	pushl	%eax
 	bsrl	8(%esp), %eax
 	movl	%eax, 8(%esp)
 	popl	%eax
 	ret

 .global VG_(helper_bsf)
 VG_(helper_bsf):
 	pushl	%eax
 	bsfl	8(%esp), %eax
 	movl	%eax, 8(%esp)
 	popl	%eax
 	ret


 /* Bit test and set/reset/complement.  Sets flags.
    On entry:
 	src
 	dst
 	RA   <- %esp

    NOTE all these are basically misimplemented, since for memory
    operands it appears the index value can be arbitrary, and the
    address should be calculated accordingly.  Here, we assume (by
    forcing the register- and memory- versions to be handled by
    the same helper) that the offset is always in the range
    0 .. word-size-1, or to be more precise by implementing the
    client's memory- version of this using the register- version,
    we impose the condition that the offset is used
    modulo-wordsize.  This is just plain wrong and should be
    fixed.
 */
 .global VG_(helper_bt)
 VG_(helper_bt):
 	pushl	%eax
 	movl	12(%esp), %eax
 	btl	%eax, 8(%esp)
 	popl	%eax
 	ret
 .global VG_(helper_bts)
 VG_(helper_bts):
 	pushl	%eax
 	movl	12(%esp), %eax
 	btsl	%eax, 8(%esp)
 	popl	%eax
 	ret
 .global VG_(helper_btr)
 VG_(helper_btr):
 	pushl	%eax
 	movl	12(%esp), %eax
 	btrl	%eax, 8(%esp)
 	popl	%eax
 	ret
 .global VG_(helper_btc)
 VG_(helper_btc):
 	pushl	%eax
 	movl	12(%esp), %eax
 	btcl	%eax, 8(%esp)
 	popl	%eax
 	ret


 /* 32-bit double-length shift left/right.
    On entry:
 	amount
 	src
 	dst
 	RA   <- %esp
 */
 .global VG_(helper_shldl)
 VG_(helper_shldl):
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx

 	movb	24(%esp), %cl
 	movl	20(%esp), %ebx
 	movl	16(%esp), %eax
 	shldl	%cl, %ebx, %eax
 	movl	%eax, 16(%esp)

 	popl	%ecx
 	popl	%ebx
 	popl	%eax
 	ret

 .global VG_(helper_shldw)
 VG_(helper_shldw):
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx

 	movb	24(%esp), %cl
 	movw	20(%esp), %bx
 	movw	16(%esp), %ax
 	shldw	%cl, %bx, %ax
 	movw	%ax, 16(%esp)

 	popl	%ecx
 	popl	%ebx
 	popl	%eax
 	ret

 .global VG_(helper_shrdl)
 VG_(helper_shrdl):
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx

 	movb	24(%esp), %cl
 	movl	20(%esp), %ebx
 	movl	16(%esp), %eax
 	shrdl	%cl, %ebx, %eax
 	movl	%eax, 16(%esp)

 	popl	%ecx
 	popl	%ebx
 	popl	%eax
 	ret

 .global VG_(helper_shrdw)
 VG_(helper_shrdw):
 	pushl	%eax
 	pushl	%ebx
 	pushl	%ecx

 	movb	24(%esp), %cl
 	movw	20(%esp), %bx
 	movw	16(%esp), %ax
 	shrdw	%cl, %bx, %ax
 	movw	%ax, 16(%esp)

 	popl	%ecx
 	popl	%ebx
 	popl	%eax
 	ret


 /* Get the direction flag, and return either 1 or -1. */
 .global VG_(helper_get_dirflag)
 VG_(helper_get_dirflag):
 	pushfl
 	pushl	%eax

 	pushfl
 	popl	%eax
 	shrl	$10, %eax
 	andl	$1, %eax
 	jnz	L1
 	movl	$1, %eax
 	jmp	L2
 L1:	movl	$-1, %eax
 L2:	movl	%eax, 12(%esp)

 	popl %eax
 	popfl
 	ret


 /* Clear/set the direction flag. */
 .global VG_(helper_CLD)
 VG_(helper_CLD):
 	cld
 	ret

 .global VG_(helper_STD)
 VG_(helper_STD):
 	std
 	ret


 /* Signed 32-to-64 multiply. */
 .globl VG_(helper_imul_32_64)
 VG_(helper_imul_32_64):
 	pushl	%eax
 	pushl	%edx
 	movl	16(%esp), %eax
 	imull	12(%esp)
 	movl	%eax, 16(%esp)
 	movl	%edx, 12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Signed 16-to-32 multiply. */
 .globl VG_(helper_imul_16_32)
 VG_(helper_imul_16_32):
 	pushl	%eax
 	pushl	%edx
 	movw	16(%esp), %ax
 	imulw	12(%esp)
 	movw	%ax, 16(%esp)
 	movw	%dx, 12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Signed 8-to-16 multiply. */
 .globl VG_(helper_imul_8_16)
 VG_(helper_imul_8_16):
 	pushl	%eax
 	pushl	%edx
 	movb	16(%esp), %al
 	imulb	12(%esp)
 	movw	%ax, 16(%esp)
 	popl	%edx
 	popl	%eax
 	ret


 /* Unsigned 32-to-64 multiply. */
 .globl VG_(helper_mul_32_64)
 VG_(helper_mul_32_64):
 	pushl	%eax
 	pushl	%edx
 	movl	16(%esp), %eax
 	mull	12(%esp)
 	movl	%eax, 16(%esp)
 	movl	%edx, 12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Unsigned 16-to-32 multiply. */
 .globl VG_(helper_mul_16_32)
 VG_(helper_mul_16_32):
 	pushl	%eax
 	pushl	%edx
 	movw	16(%esp), %ax
 	mulw	12(%esp)
 	movw	%ax, 16(%esp)
 	movw	%dx, 12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Unsigned 8-to-16 multiply. */
 .globl VG_(helper_mul_8_16)
 VG_(helper_mul_8_16):
 	pushl	%eax
 	pushl	%edx
 	movb	16(%esp), %al
 	mulb	12(%esp)
 	movw	%ax, 16(%esp)
 	popl	%edx
 	popl	%eax
 	ret


 /* Unsigned 64-into-32 divide. */
 .globl	VG_(helper_div_64_32)
 VG_(helper_div_64_32):
 	pushl	%eax
 	pushl	%edx
 	movl	16(%esp),%eax
 	movl	12(%esp),%edx
 	divl	20(%esp)
 	movl	%eax,16(%esp)
 	movl	%edx,12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Signed 64-into-32 divide. */
 .globl	VG_(helper_idiv_64_32)
 VG_(helper_idiv_64_32):
 	pushl	%eax
 	pushl	%edx
 	movl	16(%esp),%eax
 	movl	12(%esp),%edx
 	idivl	20(%esp)
 	movl	%eax,16(%esp)
 	movl	%edx,12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Unsigned 32-into-16 divide. */
 .globl	VG_(helper_div_32_16)
 VG_(helper_div_32_16):
 	pushl	%eax
 	pushl	%edx
 	movw	16(%esp),%ax
 	movw	12(%esp),%dx
 	divw	20(%esp)
 	movw	%ax,16(%esp)
 	movw	%dx,12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Signed 32-into-16 divide. */
 .globl	VG_(helper_idiv_32_16)
 VG_(helper_idiv_32_16):
 	pushl	%eax
 	pushl	%edx
 	movw	16(%esp),%ax
 	movw	12(%esp),%dx
 	idivw	20(%esp)
 	movw	%ax,16(%esp)
 	movw	%dx,12(%esp)
 	popl	%edx
 	popl	%eax
 	ret

 /* Unsigned 16-into-8 divide. */
 .globl	VG_(helper_div_16_8)
 VG_(helper_div_16_8):
 	pushl	%eax
 	movw	12(%esp),%ax
 	divb	16(%esp)
 	movb	%ah,12(%esp)
 	movb	%al,8(%esp)
 	popl	%eax
 	ret

 /* Signed 16-into-8 divide. */
 .globl	VG_(helper_idiv_16_8)
 VG_(helper_idiv_16_8):
 	pushl	%eax
 	movw	12(%esp),%ax
 	idivb	16(%esp)
 	movb	%ah,12(%esp)
 	movb	%al,8(%esp)
 	popl	%eax
 	ret


 ##--------------------------------------------------------------------##
 ##--- end                                             vg_helpers.S ---##
 ##--------------------------------------------------------------------##

	##--------------------------------------------------------------------##
	##--- Support routines for the JITter output. ---##
	##--- vg_helpers.S ---##
	##--------------------------------------------------------------------##

	/*
	This file is part of Valgrind, an x86 protected-mode emulator
	designed for debugging and profiling binaries on x86-Unixes.

	Copyright (C) 2000-2002 Julian Seward
	jseward@acm.org
	Julian_Seward@muraroa.demon.co.uk

	This program is free software; you can redistribute it and/or
	modify it under the terms of the GNU General Public License as
	published by the Free Software Foundation; either version 2 of the
	License, or (at your option) any later version.

	This program is distributed in the hope that it will be useful, but
	WITHOUT ANY WARRANTY; without even the implied warranty of
	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
	General Public License for more details.

	You should have received a copy of the GNU General Public License
	along with this program; if not, write to the Free Software
	Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
	02111-1307, USA.

	The GNU General Public License is contained in the file LICENSE.
	*/

	#include "vg_constants.h"


	/* Various helper routines, for instructions which are just too
	darn tedious for the JITter to output code in-line:

	* integer division
	* integer multiplication
	* setting and getting obscure eflags
	* double-length shifts

	All routines use a standard calling convention designed for
	calling from translations, in which the incoming args are
	underneath the return address, the callee saves _all_ registers,
	and the incoming parameters can be modified, to return results.
	*/


	.global VG_(helper_value_check0_fail)
	VG_(helper_value_check0_fail):
	pushal
	call VG_(helperc_value_check0_fail)
	popal
	ret

	.global VG_(helper_value_check1_fail)
	VG_(helper_value_check1_fail):
	pushal
	call VG_(helperc_value_check1_fail)
	popal
	ret

	.global VG_(helper_value_check2_fail)
	VG_(helper_value_check2_fail):
	pushal
	call VG_(helperc_value_check2_fail)
	popal
	ret

	.global VG_(helper_value_check4_fail)
	VG_(helper_value_check4_fail):
	pushal
	call VG_(helperc_value_check4_fail)
	popal
	ret


	/* Do a original-code-write check for the address in %ebp. */
	.global VG_(helper_smc_check4)
	VG_(helper_smc_check4):
	#if VG_SMC_FASTCHECK_IN_C

	# save the live regs
	pushl %eax
	pushl %ebx
	pushl %ecx
	pushl %edx
	pushl %esi
	pushl %edi

	pushl %ebp
	call VG_(smc_check4)
	addl $4, %esp

	popl %edi
	popl %esi
	popl %edx
	popl %ecx
	popl %ebx
	popl %eax

	ret
	#else
	incl VG_(smc_total_check4s)
	pushl %ebp
	shrl $VG_SMC_CACHE_SHIFT, %ebp
	andl $VG_SMC_CACHE_MASK, %ebp
	cmpb $0, VG_(smc_cache)(%ebp)
	jnz vg_smc_cache_failure
	addl $4, %esp
	ret
	vg_smc_cache_failure:
	popl %ebp
	pushal
	pushl %ebp
	call VG_(smc_check4)
	addl $4, %esp
	popal
	ret
	#endif


	/* Fetch the time-stamp-ctr reg.
	On entry:
	dummy, replaced by %EAX value
	dummy, replaced by %EDX value
	RA <- %esp
	*/
	.global VG_(helper_RDTSC)
	VG_(helper_RDTSC):
	pushl %eax
	pushl %edx
	rdtsc
	movl %edx, 12(%esp)
	movl %eax, 16(%esp)
	popl %edx
	popl %eax
	ret


	/* Do the CPUID instruction.
	On entry:
	dummy, replaced by %EAX value
	dummy, replaced by %EBX value
	dummy, replaced by %ECX value
	dummy, replaced by %EDX value
	RA <- %esp

	As emulating a real CPUID is kinda hard, as it
	has to return different values depending on EAX,
	we just pretend to not support CPUID at all until
	it becomes a problem. This will for sure disable
	all MMX / 3dnow checks so they don't bother us
	with code we don't understand. (Dirk <dirk@kde.org>)

	http://www.sandpile.org/ia32/cpuid.htm

	(Later: we instead pretend to be like Werner's P54C P133, that is
	an original pre-MMX Pentium).
	<werner> cpuid words (0): 0x1 0x756e6547 0x6c65746e 0x49656e69
	<werner> cpuid words (1): 0x52b 0x0 0x0 0x1bf
	*/
	.global VG_(helper_CPUID)
	VG_(helper_CPUID):
	pushl %eax
	pushl %ebx
	pushl %ecx
	pushl %edx
	movl 32(%esp), %eax
	/*
	cpuid
	*/
	/*
	xor %eax,%eax
	xor %ebx,%ebx
	xor %ecx,%ecx
	xor %edx,%edx
	*/
	cmpl $0, %eax
	jz cpuid__0
	movl $0x52b, %eax
	movl $0x0, %ebx
	movl $0x0, %ecx
	movl $0x1bf, %edx
	jmp cpuid__99
	cpuid__0:
	movl $0x1, %eax
	movl $0x756e6547, %ebx
	movl $0x6c65746e, %ecx
	movl $0x49656e69, %edx
	cpuid__99:

	movl %edx, 20(%esp)
	movl %ecx, 24(%esp)
	movl %ebx, 28(%esp)
	movl %eax, 32(%esp)
	popl %edx
	popl %ecx
	popl %ebx
	popl %eax
	ret


	/* Fetch the FPU status register.
	On entry:
	dummy, replaced by result
	RA <- %esp
	*/
	.global VG_(helper_fstsw_AX)
	VG_(helper_fstsw_AX):
	pushl %eax
	pushl %esi
	movl VGOFF_(m_fpustate), %esi
	frstor (%ebp, %esi, 4)
	fstsw %ax
	popl %esi
	movw %ax, 8(%esp)
	popl %eax
	ret


	/* Copy %ah into %eflags.
	On entry:
	value of %eax
	RA <- %esp
	*/
	.global VG_(helper_SAHF)
	VG_(helper_SAHF):
	pushl %eax
	movl 8(%esp), %eax
	sahf
	popl %eax
	ret


	/* Do %al = DAS(%al). Note that the passed param has %AL as the least
	significant 8 bits, since it was generated with GETB %AL,
	some-temp. Fortunately %al is the least significant 8 bits of
	%eax anyway, which is why it's safe to work with %eax as a
	whole.

	On entry:
	value of %eax
	RA <- %esp
	*/
	.global VG_(helper_DAS)
	VG_(helper_DAS):
	pushl %eax
	movl 8(%esp), %eax
	das
	movl %eax, 8(%esp)
	popl %eax
	ret


	/* Similarly, do %al = DAA(%al). */
	.global VG_(helper_DAA)
	VG_(helper_DAA):
	pushl %eax
	movl 8(%esp), %eax
	daa
	movl %eax, 8(%esp)
	popl %eax
	ret


	/* Bit scan forwards/reverse. Sets flags (??).
	On entry:
	value, replaced by result
	RA <- %esp
	*/
	.global VG_(helper_bsr)
	VG_(helper_bsr):
	pushl %eax
	bsrl 8(%esp), %eax
	movl %eax, 8(%esp)
	popl %eax
	ret

	.global VG_(helper_bsf)
	VG_(helper_bsf):
	pushl %eax
	bsfl 8(%esp), %eax
	movl %eax, 8(%esp)
	popl %eax
	ret


	/* Bit test and set/reset/complement. Sets flags.
	On entry:
	src
	dst
	RA <- %esp

	NOTE all these are basically misimplemented, since for memory
	operands it appears the index value can be arbitrary, and the
	address should be calculated accordingly. Here, we assume (by
	forcing the register- and memory- versions to be handled by
	the same helper) that the offset is always in the range
	0 .. word-size-1, or to be more precise by implementing the
	client's memory- version of this using the register- version,
	we impose the condition that the offset is used
	modulo-wordsize. This is just plain wrong and should be
	fixed.
	*/
	.global VG_(helper_bt)
	VG_(helper_bt):
	pushl %eax
	movl 12(%esp), %eax
	btl %eax, 8(%esp)
	popl %eax
	ret
	.global VG_(helper_bts)
	VG_(helper_bts):
	pushl %eax
	movl 12(%esp), %eax
	btsl %eax, 8(%esp)
	popl %eax
	ret
	.global VG_(helper_btr)
	VG_(helper_btr):
	pushl %eax
	movl 12(%esp), %eax
	btrl %eax, 8(%esp)
	popl %eax
	ret
	.global VG_(helper_btc)
	VG_(helper_btc):
	pushl %eax
	movl 12(%esp), %eax
	btcl %eax, 8(%esp)
	popl %eax
	ret


	/* 32-bit double-length shift left/right.
	On entry:
	amount
	src
	dst
	RA <- %esp
	*/
	.global VG_(helper_shldl)
	VG_(helper_shldl):
	pushl %eax
	pushl %ebx
	pushl %ecx

	movb 24(%esp), %cl
	movl 20(%esp), %ebx
	movl 16(%esp), %eax
	shldl %cl, %ebx, %eax
	movl %eax, 16(%esp)

	popl %ecx
	popl %ebx
	popl %eax
	ret

	.global VG_(helper_shldw)
	VG_(helper_shldw):
	pushl %eax
	pushl %ebx
	pushl %ecx

	movb 24(%esp), %cl
	movw 20(%esp), %bx
	movw 16(%esp), %ax
	shldw %cl, %bx, %ax
	movw %ax, 16(%esp)

	popl %ecx
	popl %ebx
	popl %eax
	ret

	.global VG_(helper_shrdl)
	VG_(helper_shrdl):
	pushl %eax
	pushl %ebx
	pushl %ecx

	movb 24(%esp), %cl
	movl 20(%esp), %ebx
	movl 16(%esp), %eax
	shrdl %cl, %ebx, %eax
	movl %eax, 16(%esp)

	popl %ecx
	popl %ebx
	popl %eax
	ret

	.global VG_(helper_shrdw)
	VG_(helper_shrdw):
	pushl %eax
	pushl %ebx
	pushl %ecx

	movb 24(%esp), %cl
	movw 20(%esp), %bx
	movw 16(%esp), %ax
	shrdw %cl, %bx, %ax
	movw %ax, 16(%esp)

	popl %ecx
	popl %ebx
	popl %eax
	ret


	/* Get the direction flag, and return either 1 or -1. */
	.global VG_(helper_get_dirflag)
	VG_(helper_get_dirflag):
	pushfl
	pushl %eax

	pushfl
	popl %eax
	shrl $10, %eax
	andl $1, %eax
	jnz L1
	movl $1, %eax
	jmp L2
	L1: movl $-1, %eax
	L2: movl %eax, 12(%esp)

	popl %eax
	popfl
	ret


	/* Clear/set the direction flag. */
	.global VG_(helper_CLD)
	VG_(helper_CLD):
	cld
	ret

	.global VG_(helper_STD)
	VG_(helper_STD):
	std
	ret



	/* Signed 32-to-64 multiply. */
	.globl VG_(helper_imul_32_64)
	VG_(helper_imul_32_64):
	pushl %eax
	pushl %edx
	movl 16(%esp), %eax
	imull 12(%esp)
	movl %eax, 16(%esp)
	movl %edx, 12(%esp)
	popl %edx
	popl %eax
	ret

	/* Signed 16-to-32 multiply. */
	.globl VG_(helper_imul_16_32)
	VG_(helper_imul_16_32):
	pushl %eax
	pushl %edx
	movw 16(%esp), %ax
	imulw 12(%esp)
	movw %ax, 16(%esp)
	movw %dx, 12(%esp)
	popl %edx
	popl %eax
	ret

	/* Signed 8-to-16 multiply. */
	.globl VG_(helper_imul_8_16)
	VG_(helper_imul_8_16):
	pushl %eax
	pushl %edx
	movb 16(%esp), %al
	imulb 12(%esp)
	movw %ax, 16(%esp)
	popl %edx
	popl %eax
	ret






	/* Unsigned 32-to-64 multiply. */
	.globl VG_(helper_mul_32_64)
	VG_(helper_mul_32_64):
	pushl %eax
	pushl %edx
	movl 16(%esp), %eax
	mull 12(%esp)
	movl %eax, 16(%esp)
	movl %edx, 12(%esp)
	popl %edx
	popl %eax
	ret

	/* Unsigned 16-to-32 multiply. */
	.globl VG_(helper_mul_16_32)
	VG_(helper_mul_16_32):
	pushl %eax
	pushl %edx
	movw 16(%esp), %ax
	mulw 12(%esp)
	movw %ax, 16(%esp)
	movw %dx, 12(%esp)
	popl %edx
	popl %eax
	ret

	/* Unsigned 8-to-16 multiply. */
	.globl VG_(helper_mul_8_16)
	VG_(helper_mul_8_16):
	pushl %eax
	pushl %edx
	movb 16(%esp), %al
	mulb 12(%esp)
	movw %ax, 16(%esp)
	popl %edx
	popl %eax
	ret




	/* Unsigned 64-into-32 divide. */
	.globl VG_(helper_div_64_32)
	VG_(helper_div_64_32):
	pushl %eax
	pushl %edx
	movl 16(%esp),%eax
	movl 12(%esp),%edx
	divl 20(%esp)
	movl %eax,16(%esp)
	movl %edx,12(%esp)
	popl %edx
	popl %eax
	ret

	/* Signed 64-into-32 divide. */
	.globl VG_(helper_idiv_64_32)
	VG_(helper_idiv_64_32):
	pushl %eax
	pushl %edx
	movl 16(%esp),%eax
	movl 12(%esp),%edx
	idivl 20(%esp)
	movl %eax,16(%esp)
	movl %edx,12(%esp)
	popl %edx
	popl %eax
	ret

	/* Unsigned 32-into-16 divide. */
	.globl VG_(helper_div_32_16)
	VG_(helper_div_32_16):
	pushl %eax
	pushl %edx
	movw 16(%esp),%ax
	movw 12(%esp),%dx
	divw 20(%esp)
	movw %ax,16(%esp)
	movw %dx,12(%esp)
	popl %edx
	popl %eax
	ret

	/* Signed 32-into-16 divide. */
	.globl VG_(helper_idiv_32_16)
	VG_(helper_idiv_32_16):
	pushl %eax
	pushl %edx
	movw 16(%esp),%ax
	movw 12(%esp),%dx
	idivw 20(%esp)
	movw %ax,16(%esp)
	movw %dx,12(%esp)
	popl %edx
	popl %eax
	ret

	/* Unsigned 16-into-8 divide. */
	.globl VG_(helper_div_16_8)
	VG_(helper_div_16_8):
	pushl %eax
	movw 12(%esp),%ax
	divb 16(%esp)
	movb %ah,12(%esp)
	movb %al,8(%esp)
	popl %eax
	ret

	/* Signed 16-into-8 divide. */
	.globl VG_(helper_idiv_16_8)
	VG_(helper_idiv_16_8):
	pushl %eax
	movw 12(%esp),%ax
	idivb 16(%esp)
	movb %ah,12(%esp)
	movb %al,8(%esp)
	popl %eax
	ret


	##--------------------------------------------------------------------##
	##--- end vg_helpers.S ---##
	##--------------------------------------------------------------------##