blob: 8e8553c7cdfd499b7650ec4deaf43a71fe7797e7 [file] [log] [blame]
njn3e884182003-04-15 13:03:23 +00001
2/*--------------------------------------------------------------------*/
3/*--- Replacements for strcpy(), memcpy() et al, which run on the ---*/
4/*--- simulated CPU. ---*/
njn1d0825f2006-03-27 11:37:07 +00005/*--- mc_replace_strmem.c ---*/
njn3e884182003-04-15 13:03:23 +00006/*--------------------------------------------------------------------*/
7
8/*
nethercote137bc552003-11-14 17:47:54 +00009 This file is part of MemCheck, a heavyweight Valgrind tool for
njn0e1b5142003-04-15 14:58:06 +000010 detecting memory errors.
njn3e884182003-04-15 13:03:23 +000011
sewardj4d474d02008-02-11 11:34:59 +000012 Copyright (C) 2000-2008 Julian Seward
njn3e884182003-04-15 13:03:23 +000013 jseward@acm.org
14
15 This program is free software; you can redistribute it and/or
16 modify it under the terms of the GNU General Public License as
17 published by the Free Software Foundation; either version 2 of the
18 License, or (at your option) any later version.
19
20 This program is distributed in the hope that it will be useful, but
21 WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
23 General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with this program; if not, write to the Free Software
27 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28 02111-1307, USA.
29
30 The GNU General Public License is contained in the file COPYING.
31*/
32
njnc7561b92005-06-19 01:24:32 +000033#include "pub_tool_basics.h"
njnc7561b92005-06-19 01:24:32 +000034#include "pub_tool_hashtable.h"
njnc7561b92005-06-19 01:24:32 +000035#include "pub_tool_redir.h"
36#include "pub_tool_tooliface.h"
37#include "valgrind.h"
38
njn34419c12003-05-02 17:24:29 +000039#include "mc_include.h"
fitzhardinge98abfc72003-12-16 02:05:15 +000040#include "memcheck.h"
njn3e884182003-04-15 13:03:23 +000041
njn3e884182003-04-15 13:03:23 +000042/* ---------------------------------------------------------------------
njn1f8b3e72005-03-22 04:27:14 +000043 We have our own versions of these functions for two reasons:
44 (a) it allows us to do overlap checking
45 (b) some of the normal versions are hyper-optimised, which fools
46 Memcheck and cause spurious value warnings. Our versions are
47 simpler.
48
njn16eeb4e2005-06-16 03:56:58 +000049 Note that overenthusiastic use of PLT bypassing by the glibc people also
50 means that we need to patch multiple versions of some of the functions to
51 our own implementations.
52
njn1f8b3e72005-03-22 04:27:14 +000053 THEY RUN ON THE SIMD CPU!
njn3e884182003-04-15 13:03:23 +000054 ------------------------------------------------------------------ */
55
sewardjdda830a2003-07-20 22:28:42 +000056/* Figure out if [dst .. dst+dstlen-1] overlaps with
57 [src .. src+srclen-1].
58 We assume that the address ranges do not wrap around
59 (which is safe since on Linux addresses >= 0xC0000000
60 are not accessible and the program will segfault in this
61 circumstance, presumably).
62*/
njn3e884182003-04-15 13:03:23 +000063static __inline__
njnc6168192004-11-29 13:54:10 +000064Bool is_overlap ( void* dst, const void* src, SizeT dstlen, SizeT srclen )
njn3e884182003-04-15 13:03:23 +000065{
sewardjdda830a2003-07-20 22:28:42 +000066 Addr loS, hiS, loD, hiD;
67
68 if (dstlen == 0 || srclen == 0)
69 return False;
70
71 loS = (Addr)src;
72 loD = (Addr)dst;
73 hiS = loS + srclen - 1;
74 hiD = loD + dstlen - 1;
75
76 /* So figure out if [loS .. hiS] overlaps with [loD .. hiD]. */
77 if (loS < loD) {
78 return !(hiS < loD);
79 }
80 else if (loD < loS) {
81 return !(hiD < loS);
82 }
83 else {
84 /* They start at same place. Since we know neither of them has
85 zero length, they must overlap. */
86 return True;
87 }
njn3e884182003-04-15 13:03:23 +000088}
89
njn1f8b3e72005-03-22 04:27:14 +000090// This is a macro rather than a function because we don't want to have an
91// extra function in the stack trace.
njn718d3b12006-12-16 00:54:12 +000092#define RECORD_OVERLAP_ERROR(s, src, dst, len) \
njn1f8b3e72005-03-22 04:27:14 +000093{ \
94 Word unused_res; \
sewardj0ec07f32006-01-12 12:32:32 +000095 VALGRIND_DO_CLIENT_REQUEST(unused_res, 0, \
96 _VG_USERREQ__MEMCHECK_RECORD_OVERLAP_ERROR, \
njn718d3b12006-12-16 00:54:12 +000097 s, src, dst, len, 0); \
njn3e884182003-04-15 13:03:23 +000098}
99
njn16eeb4e2005-06-16 03:56:58 +0000100
101#define STRRCHR(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000102 char* VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* s, int c ); \
103 char* VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* s, int c ) \
njn16eeb4e2005-06-16 03:56:58 +0000104 { \
105 UChar ch = (UChar)((UInt)c); \
106 UChar* p = (UChar*)s; \
107 UChar* last = NULL; \
108 while (True) { \
109 if (*p == ch) last = p; \
110 if (*p == 0) return last; \
111 p++; \
112 } \
njn3e884182003-04-15 13:03:23 +0000113 }
njn3e884182003-04-15 13:03:23 +0000114
njn16eeb4e2005-06-16 03:56:58 +0000115// Apparently rindex() is the same thing as strrchr()
njne6154662009-02-10 04:23:41 +0000116STRRCHR(VG_Z_LIBC_SONAME, strrchr)
117STRRCHR(VG_Z_LIBC_SONAME, rindex)
118STRRCHR(VG_Z_LD_LINUX_SO_2, rindex)
njn16eeb4e2005-06-16 03:56:58 +0000119
120
121#define STRCHR(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000122 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ); \
123 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* s, int c ) \
njn16eeb4e2005-06-16 03:56:58 +0000124 { \
125 UChar ch = (UChar)((UInt)c); \
126 UChar* p = (UChar*)s; \
127 while (True) { \
128 if (*p == ch) return p; \
129 if (*p == 0) return NULL; \
130 p++; \
131 } \
njn3e884182003-04-15 13:03:23 +0000132 }
njn3e884182003-04-15 13:03:23 +0000133
njn16eeb4e2005-06-16 03:56:58 +0000134// Apparently index() is the same thing as strchr()
njne6154662009-02-10 04:23:41 +0000135STRCHR(VG_Z_LIBC_SONAME, strchr)
136STRCHR(VG_Z_LD_LINUX_SO_2, strchr)
137STRCHR(VG_Z_LD_LINUX_X86_64_SO_2, strchr)
138STRCHR(VG_Z_LIBC_SONAME, index)
139STRCHR(VG_Z_LD_LINUX_SO_2, index)
140STRCHR(VG_Z_LD_LINUX_X86_64_SO_2, index)
njn3e884182003-04-15 13:03:23 +0000141
njn3e884182003-04-15 13:03:23 +0000142
njn16eeb4e2005-06-16 03:56:58 +0000143#define STRCAT(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000144 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ); \
145 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ) \
njn16eeb4e2005-06-16 03:56:58 +0000146 { \
147 const Char* src_orig = src; \
148 Char* dst_orig = dst; \
149 while (*dst) dst++; \
150 while (*src) *dst++ = *src++; \
151 *dst = 0; \
sewardjb6c04032007-11-13 20:52:29 +0000152 \
njn16eeb4e2005-06-16 03:56:58 +0000153 /* This is a bit redundant, I think; any overlap and the strcat will */ \
154 /* go forever... or until a seg fault occurs. */ \
155 if (is_overlap(dst_orig, \
156 src_orig, \
157 (Addr)dst-(Addr)dst_orig+1, \
158 (Addr)src-(Addr)src_orig+1)) \
njn718d3b12006-12-16 00:54:12 +0000159 RECORD_OVERLAP_ERROR("strcat", dst_orig, src_orig, 0); \
sewardjb6c04032007-11-13 20:52:29 +0000160 \
njn16eeb4e2005-06-16 03:56:58 +0000161 return dst_orig; \
njn3e884182003-04-15 13:03:23 +0000162 }
njn3e884182003-04-15 13:03:23 +0000163
njne6154662009-02-10 04:23:41 +0000164STRCAT(VG_Z_LIBC_SONAME, strcat)
njn16eeb4e2005-06-16 03:56:58 +0000165
166
167#define STRNCAT(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000168 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
169 ( char* dst, const char* src, SizeT n ); \
170 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
171 ( char* dst, const char* src, SizeT n ) \
njn16eeb4e2005-06-16 03:56:58 +0000172 { \
173 const Char* src_orig = src; \
174 Char* dst_orig = dst; \
175 SizeT m = 0; \
sewardjb6c04032007-11-13 20:52:29 +0000176 \
njn16eeb4e2005-06-16 03:56:58 +0000177 while (*dst) dst++; \
178 while (m < n && *src) { m++; *dst++ = *src++; } /* concat <= n chars */ \
179 *dst = 0; /* always add null */ \
sewardjb6c04032007-11-13 20:52:29 +0000180 \
njn16eeb4e2005-06-16 03:56:58 +0000181 /* This checks for overlap after copying, unavoidable without */ \
182 /* pre-counting lengths... should be ok */ \
183 if (is_overlap(dst_orig, \
184 src_orig, \
185 (Addr)dst-(Addr)dst_orig+1, \
186 (Addr)src-(Addr)src_orig+1)) \
njn718d3b12006-12-16 00:54:12 +0000187 RECORD_OVERLAP_ERROR("strncat", dst_orig, src_orig, n); \
sewardjb6c04032007-11-13 20:52:29 +0000188 \
njn16eeb4e2005-06-16 03:56:58 +0000189 return dst_orig; \
njn3e884182003-04-15 13:03:23 +0000190 }
njn3e884182003-04-15 13:03:23 +0000191
njne6154662009-02-10 04:23:41 +0000192STRNCAT(VG_Z_LIBC_SONAME, strncat)
sewardj31b9ce12006-10-17 01:27:13 +0000193
njn3e884182003-04-15 13:03:23 +0000194
njn16eeb4e2005-06-16 03:56:58 +0000195#define STRNLEN(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000196 SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* str, SizeT n ); \
197 SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname) ( const char* str, SizeT n ) \
njn16eeb4e2005-06-16 03:56:58 +0000198 { \
199 SizeT i = 0; \
200 while (i < n && str[i] != 0) i++; \
201 return i; \
njn3e884182003-04-15 13:03:23 +0000202 }
njn3e884182003-04-15 13:03:23 +0000203
njne6154662009-02-10 04:23:41 +0000204STRNLEN(VG_Z_LIBC_SONAME, strnlen)
njn16eeb4e2005-06-16 03:56:58 +0000205
sewardj3ceec242003-07-30 21:24:25 +0000206
njn5ec15ed2005-08-24 19:55:51 +0000207// Note that this replacement often doesn't get used because gcc inlines
208// calls to strlen() with its own built-in version. This can be very
209// confusing if you aren't expecting it. Other small functions in this file
210// may also be inline by gcc.
njn16eeb4e2005-06-16 03:56:58 +0000211#define STRLEN(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000212 SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ); \
213 SizeT VG_REPLACE_FUNCTION_ZU(soname,fnname)( const char* str ) \
njn16eeb4e2005-06-16 03:56:58 +0000214 { \
215 SizeT i = 0; \
216 while (str[i] != 0) i++; \
217 return i; \
sewardj3ceec242003-07-30 21:24:25 +0000218 }
njn16eeb4e2005-06-16 03:56:58 +0000219
njne6154662009-02-10 04:23:41 +0000220STRLEN(VG_Z_LIBC_SONAME, strlen)
221STRLEN(VG_Z_LD_LINUX_SO_2, strlen)
222STRLEN(VG_Z_LD_LINUX_X86_64_SO_2, strlen)
sewardj31b9ce12006-10-17 01:27:13 +0000223
njn16eeb4e2005-06-16 03:56:58 +0000224
225#define STRCPY(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000226 char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ); \
227 char* VG_REPLACE_FUNCTION_ZU(soname, fnname) ( char* dst, const char* src ) \
njn16eeb4e2005-06-16 03:56:58 +0000228 { \
229 const Char* src_orig = src; \
230 Char* dst_orig = dst; \
sewardjb6c04032007-11-13 20:52:29 +0000231 \
njn16eeb4e2005-06-16 03:56:58 +0000232 while (*src) *dst++ = *src++; \
233 *dst = 0; \
sewardjb6c04032007-11-13 20:52:29 +0000234 \
njn16eeb4e2005-06-16 03:56:58 +0000235 /* This checks for overlap after copying, unavoidable without */ \
236 /* pre-counting length... should be ok */ \
237 if (is_overlap(dst_orig, \
238 src_orig, \
239 (Addr)dst-(Addr)dst_orig+1, \
240 (Addr)src-(Addr)src_orig+1)) \
njn718d3b12006-12-16 00:54:12 +0000241 RECORD_OVERLAP_ERROR("strcpy", dst_orig, src_orig, 0); \
sewardjb6c04032007-11-13 20:52:29 +0000242 \
njn16eeb4e2005-06-16 03:56:58 +0000243 return dst_orig; \
244 }
245
njne6154662009-02-10 04:23:41 +0000246STRCPY(VG_Z_LIBC_SONAME, strcpy)
njn16eeb4e2005-06-16 03:56:58 +0000247
248
249#define STRNCPY(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000250 char* VG_REPLACE_FUNCTION_ZU(soname, fnname) \
251 ( char* dst, const char* src, SizeT n ); \
252 char* VG_REPLACE_FUNCTION_ZU(soname, fnname) \
253 ( char* dst, const char* src, SizeT n ) \
njn16eeb4e2005-06-16 03:56:58 +0000254 { \
255 const Char* src_orig = src; \
256 Char* dst_orig = dst; \
257 SizeT m = 0; \
sewardjb6c04032007-11-13 20:52:29 +0000258 \
njn16eeb4e2005-06-16 03:56:58 +0000259 while (m < n && *src) { m++; *dst++ = *src++; } \
260 /* Check for overlap after copying; all n bytes of dst are relevant, */ \
261 /* but only m+1 bytes of src if terminator was found */ \
262 if (is_overlap(dst_orig, src_orig, n, (m < n) ? m+1 : n)) \
njn718d3b12006-12-16 00:54:12 +0000263 RECORD_OVERLAP_ERROR("strncpy", dst, src, n); \
njn16eeb4e2005-06-16 03:56:58 +0000264 while (m++ < n) *dst++ = 0; /* must pad remainder with nulls */ \
265 \
266 return dst_orig; \
267 }
268
njne6154662009-02-10 04:23:41 +0000269STRNCPY(VG_Z_LIBC_SONAME, strncpy)
njn16eeb4e2005-06-16 03:56:58 +0000270
271
272#define STRNCMP(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000273 int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
274 ( const char* s1, const char* s2, SizeT nmax ); \
275 int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
276 ( const char* s1, const char* s2, SizeT nmax ) \
njn16eeb4e2005-06-16 03:56:58 +0000277 { \
278 SizeT n = 0; \
279 while (True) { \
280 if (n >= nmax) return 0; \
281 if (*s1 == 0 && *s2 == 0) return 0; \
282 if (*s1 == 0) return -1; \
283 if (*s2 == 0) return 1; \
sewardjb6c04032007-11-13 20:52:29 +0000284 \
njn16eeb4e2005-06-16 03:56:58 +0000285 if (*(unsigned char*)s1 < *(unsigned char*)s2) return -1; \
286 if (*(unsigned char*)s1 > *(unsigned char*)s2) return 1; \
sewardjb6c04032007-11-13 20:52:29 +0000287 \
njn16eeb4e2005-06-16 03:56:58 +0000288 s1++; s2++; n++; \
289 } \
290 }
291
njne6154662009-02-10 04:23:41 +0000292STRNCMP(VG_Z_LIBC_SONAME, strncmp)
njn16eeb4e2005-06-16 03:56:58 +0000293
294
295#define STRCMP(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000296 int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
297 ( const char* s1, const char* s2 ); \
298 int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
299 ( const char* s1, const char* s2 ) \
njn16eeb4e2005-06-16 03:56:58 +0000300 { \
301 register unsigned char c1; \
302 register unsigned char c2; \
303 while (True) { \
304 c1 = *(unsigned char *)s1; \
305 c2 = *(unsigned char *)s2; \
306 if (c1 != c2) break; \
307 if (c1 == 0) break; \
308 s1++; s2++; \
309 } \
310 if ((unsigned char)c1 < (unsigned char)c2) return -1; \
311 if ((unsigned char)c1 > (unsigned char)c2) return 1; \
312 return 0; \
313 }
314
njne6154662009-02-10 04:23:41 +0000315STRCMP(VG_Z_LIBC_SONAME, strcmp)
316STRCMP(VG_Z_LD_LINUX_X86_64_SO_2, strcmp)
317STRCMP(VG_Z_LD64_SO_1, strcmp)
njn16eeb4e2005-06-16 03:56:58 +0000318
319
320#define MEMCHR(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000321 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const void *s, int c, SizeT n); \
322 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const void *s, int c, SizeT n) \
njn16eeb4e2005-06-16 03:56:58 +0000323 { \
324 SizeT i; \
325 UChar c0 = (UChar)c; \
326 UChar* p = (UChar*)s; \
327 for (i = 0; i < n; i++) \
328 if (p[i] == c0) return (void*)(&p[i]); \
329 return NULL; \
330 }
331
njne6154662009-02-10 04:23:41 +0000332MEMCHR(VG_Z_LIBC_SONAME, memchr)
njn16eeb4e2005-06-16 03:56:58 +0000333
334
335#define MEMCPY(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000336 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
337 ( void *dst, const void *src, SizeT len ); \
338 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
339 ( void *dst, const void *src, SizeT len ) \
njn16eeb4e2005-06-16 03:56:58 +0000340 { \
341 register char *d; \
342 register char *s; \
sewardjb6c04032007-11-13 20:52:29 +0000343 \
njn16eeb4e2005-06-16 03:56:58 +0000344 if (len == 0) \
345 return dst; \
sewardjb6c04032007-11-13 20:52:29 +0000346 \
njn16eeb4e2005-06-16 03:56:58 +0000347 if (is_overlap(dst, src, len, len)) \
njn718d3b12006-12-16 00:54:12 +0000348 RECORD_OVERLAP_ERROR("memcpy", dst, src, len); \
sewardjb6c04032007-11-13 20:52:29 +0000349 \
njn16eeb4e2005-06-16 03:56:58 +0000350 if ( dst > src ) { \
351 d = (char *)dst + len - 1; \
352 s = (char *)src + len - 1; \
353 while ( len >= 4 ) { \
354 *d-- = *s--; \
355 *d-- = *s--; \
356 *d-- = *s--; \
357 *d-- = *s--; \
358 len -= 4; \
359 } \
360 while ( len-- ) { \
361 *d-- = *s--; \
362 } \
363 } else if ( dst < src ) { \
364 d = (char *)dst; \
365 s = (char *)src; \
366 while ( len >= 4 ) { \
367 *d++ = *s++; \
368 *d++ = *s++; \
369 *d++ = *s++; \
370 *d++ = *s++; \
371 len -= 4; \
372 } \
373 while ( len-- ) { \
374 *d++ = *s++; \
375 } \
376 } \
377 return dst; \
378 }
379
njne6154662009-02-10 04:23:41 +0000380MEMCPY(VG_Z_LIBC_SONAME, memcpy)
381MEMCPY(VG_Z_LD_SO_1, memcpy) /* ld.so.1 */
382MEMCPY(VG_Z_LD64_SO_1, memcpy) /* ld64.so.1 */
sewardjf0b34322007-01-16 21:42:28 +0000383/* icc9 blats these around all over the place. Not only in the main
384 executable but various .so's. They are highly tuned and read
385 memory beyond the source boundary (although work correctly and
386 never go across page boundaries), so give errors when run natively,
387 at least for misaligned source arg. Just intercepting in the exe
388 only until we understand more about the problem. See
389 http://bugs.kde.org/show_bug.cgi?id=139776
390 */
391MEMCPY(NONE, _intel_fast_memcpy)
sewardj31b9ce12006-10-17 01:27:13 +0000392
njn16eeb4e2005-06-16 03:56:58 +0000393
394#define MEMCMP(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000395 int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
396 ( const void *s1V, const void *s2V, SizeT n ); \
397 int VG_REPLACE_FUNCTION_ZU(soname,fnname) \
398 ( const void *s1V, const void *s2V, SizeT n ) \
njn16eeb4e2005-06-16 03:56:58 +0000399 { \
400 int res; \
401 unsigned char a0; \
402 unsigned char b0; \
403 unsigned char* s1 = (unsigned char*)s1V; \
404 unsigned char* s2 = (unsigned char*)s2V; \
sewardjb6c04032007-11-13 20:52:29 +0000405 \
njn16eeb4e2005-06-16 03:56:58 +0000406 while (n != 0) { \
407 a0 = s1[0]; \
408 b0 = s2[0]; \
409 s1 += 1; \
410 s2 += 1; \
411 res = ((int)a0) - ((int)b0); \
412 if (res != 0) \
413 return res; \
414 n -= 1; \
415 } \
416 return 0; \
417 }
418
njne6154662009-02-10 04:23:41 +0000419MEMCMP(VG_Z_LIBC_SONAME, memcmp)
420MEMCMP(VG_Z_LIBC_SONAME, bcmp)
421MEMCMP(VG_Z_LD_SO_1, bcmp)
njn3e884182003-04-15 13:03:23 +0000422
jseward0845ef82003-12-22 22:31:27 +0000423
424/* Copy SRC to DEST, returning the address of the terminating '\0' in
425 DEST. (minor variant of strcpy) */
njn16eeb4e2005-06-16 03:56:58 +0000426#define STPCPY(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000427 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ); \
428 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) ( char* dst, const char* src ) \
njn16eeb4e2005-06-16 03:56:58 +0000429 { \
430 const Char* src_orig = src; \
431 Char* dst_orig = dst; \
sewardjb6c04032007-11-13 20:52:29 +0000432 \
njn16eeb4e2005-06-16 03:56:58 +0000433 while (*src) *dst++ = *src++; \
434 *dst = 0; \
sewardjb6c04032007-11-13 20:52:29 +0000435 \
njn16eeb4e2005-06-16 03:56:58 +0000436 /* This checks for overlap after copying, unavoidable without */ \
437 /* pre-counting length... should be ok */ \
438 if (is_overlap(dst_orig, \
439 src_orig, \
440 (Addr)dst-(Addr)dst_orig+1, \
441 (Addr)src-(Addr)src_orig+1)) \
njn718d3b12006-12-16 00:54:12 +0000442 RECORD_OVERLAP_ERROR("stpcpy", dst_orig, src_orig, 0); \
sewardjb6c04032007-11-13 20:52:29 +0000443 \
njn16eeb4e2005-06-16 03:56:58 +0000444 return dst; \
sewardj44e495f2005-05-12 17:58:28 +0000445 }
njn16eeb4e2005-06-16 03:56:58 +0000446
njne6154662009-02-10 04:23:41 +0000447STPCPY(VG_Z_LIBC_SONAME, stpcpy)
448STPCPY(VG_Z_LD_LINUX_SO_2, stpcpy)
449STPCPY(VG_Z_LD_LINUX_X86_64_SO_2, stpcpy)
njn16eeb4e2005-06-16 03:56:58 +0000450
451
452#define MEMSET(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000453 void* VG_REPLACE_FUNCTION_ZU(soname,fnname)(void *s, Int c, SizeT n); \
454 void* VG_REPLACE_FUNCTION_ZU(soname,fnname)(void *s, Int c, SizeT n) \
njn16eeb4e2005-06-16 03:56:58 +0000455 { \
456 unsigned char *cp = s; \
sewardj487cadb2007-08-25 23:25:00 +0000457 while (n >= 4) { \
458 cp[0] = c; \
459 cp[1] = c; \
460 cp[2] = c; \
461 cp[3] = c; \
462 cp += 4; \
463 n -= 4; \
464 } \
465 while (n--) { \
njn16eeb4e2005-06-16 03:56:58 +0000466 *cp++ = c; \
sewardj487cadb2007-08-25 23:25:00 +0000467 } \
njn16eeb4e2005-06-16 03:56:58 +0000468 return s; \
sewardj44e495f2005-05-12 17:58:28 +0000469 }
njn16eeb4e2005-06-16 03:56:58 +0000470
njne6154662009-02-10 04:23:41 +0000471MEMSET(VG_Z_LIBC_SONAME, memset)
njn16eeb4e2005-06-16 03:56:58 +0000472
473
474#define MEMMOVE(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000475 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
476 (void *dstV, const void *srcV, SizeT n); \
477 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
478 (void *dstV, const void *srcV, SizeT n) \
njn16eeb4e2005-06-16 03:56:58 +0000479 { \
480 SizeT i; \
481 Char* dst = (Char*)dstV; \
482 Char* src = (Char*)srcV; \
483 if (dst < src) { \
484 for (i = 0; i < n; i++) \
485 dst[i] = src[i]; \
486 } \
487 else \
488 if (dst > src) { \
489 for (i = 0; i < n; i++) \
490 dst[n-i-1] = src[n-i-1]; \
491 } \
492 return dst; \
493 }
494
njne6154662009-02-10 04:23:41 +0000495MEMMOVE(VG_Z_LIBC_SONAME, memmove)
sewardj44e495f2005-05-12 17:58:28 +0000496
jseward0845ef82003-12-22 22:31:27 +0000497
sewardj24cb2172007-02-23 09:03:26 +0000498/* glibc 2.5 variant of memmove which checks the dest is big enough.
499 There is no specific part of glibc that this is copied from. */
500#define GLIBC25___MEMMOVE_CHK(soname, fnname) \
501 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
502 (void *dstV, const void *srcV, SizeT n, SizeT destlen); \
503 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
504 (void *dstV, const void *srcV, SizeT n, SizeT destlen) \
505 { \
506 extern void _exit(int status); \
507 SizeT i; \
508 Char* dst = (Char*)dstV; \
509 Char* src = (Char*)srcV; \
510 if (destlen < n) \
511 goto badness; \
512 if (dst < src) { \
513 for (i = 0; i < n; i++) \
514 dst[i] = src[i]; \
515 } \
516 else \
517 if (dst > src) { \
518 for (i = 0; i < n; i++) \
519 dst[n-i-1] = src[n-i-1]; \
520 } \
521 return dst; \
522 badness: \
523 VALGRIND_PRINTF_BACKTRACE( \
524 "*** memmove_chk: buffer overflow detected ***: " \
525 "program terminated"); \
526 _exit(127); \
sewardjc271ec82007-02-27 22:36:14 +0000527 /*NOTREACHED*/ \
528 return NULL; \
sewardj24cb2172007-02-23 09:03:26 +0000529 }
530
njne6154662009-02-10 04:23:41 +0000531GLIBC25___MEMMOVE_CHK(VG_Z_LIBC_SONAME, __memmove_chk)
sewardj24cb2172007-02-23 09:03:26 +0000532
533
sewardj4e9a4b62004-11-23 00:20:17 +0000534/* Find the first occurrence of C in S or the final NUL byte. */
njn16eeb4e2005-06-16 03:56:58 +0000535#define GLIBC232_STRCHRNUL(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000536 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in); \
537 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in) \
njn16eeb4e2005-06-16 03:56:58 +0000538 { \
539 unsigned char c = (unsigned char) c_in; \
540 unsigned char* char_ptr = (unsigned char *)s; \
541 while (1) { \
542 if (*char_ptr == 0) return char_ptr; \
543 if (*char_ptr == c) return char_ptr; \
544 char_ptr++; \
545 } \
sewardj4e9a4b62004-11-23 00:20:17 +0000546 }
njn16eeb4e2005-06-16 03:56:58 +0000547
njne6154662009-02-10 04:23:41 +0000548GLIBC232_STRCHRNUL(VG_Z_LIBC_SONAME, strchrnul)
sewardj4e9a4b62004-11-23 00:20:17 +0000549
550
551/* Find the first occurrence of C in S. */
njn16eeb4e2005-06-16 03:56:58 +0000552#define GLIBC232_RAWMEMCHR(soname, fnname) \
sewardj0ec07f32006-01-12 12:32:32 +0000553 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in); \
554 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) (const char* s, int c_in) \
njn16eeb4e2005-06-16 03:56:58 +0000555 { \
556 unsigned char c = (unsigned char) c_in; \
557 unsigned char* char_ptr = (unsigned char *)s; \
558 while (1) { \
559 if (*char_ptr == c) return char_ptr; \
560 char_ptr++; \
561 } \
sewardj4e9a4b62004-11-23 00:20:17 +0000562 }
njn16eeb4e2005-06-16 03:56:58 +0000563
njne6154662009-02-10 04:23:41 +0000564GLIBC232_RAWMEMCHR(VG_Z_LIBC_SONAME, rawmemchr)
sewardj4e9a4b62004-11-23 00:20:17 +0000565
566
sewardjdc5d8322007-01-28 06:32:01 +0000567/* glibc variant of strcpy that checks the dest is big enough.
568 Copied from glibc-2.5/debug/test-strcpy_chk.c. */
sewardj620e5262006-12-31 00:22:30 +0000569#define GLIBC25___STRCPY_CHK(soname,fnname) \
570 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
571 (char* dst, const char* src, SizeT len); \
572 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
573 (char* dst, const char* src, SizeT len) \
574 { \
575 extern void _exit(int status); \
576 char* ret = dst; \
577 if (! len) \
578 goto badness; \
579 while ((*dst++ = *src++) != '\0') \
580 if (--len == 0) \
581 goto badness; \
582 return ret; \
583 badness: \
584 VALGRIND_PRINTF_BACKTRACE( \
sewardj24cb2172007-02-23 09:03:26 +0000585 "*** strcpy_chk: buffer overflow detected ***: " \
586 "program terminated"); \
sewardj620e5262006-12-31 00:22:30 +0000587 _exit(127); \
588 /*NOTREACHED*/ \
589 return NULL; \
590 }
591
njne6154662009-02-10 04:23:41 +0000592GLIBC25___STRCPY_CHK(VG_Z_LIBC_SONAME, __strcpy_chk)
sewardj620e5262006-12-31 00:22:30 +0000593
594
sewardjdc5d8322007-01-28 06:32:01 +0000595/* glibc variant of stpcpy that checks the dest is big enough.
596 Copied from glibc-2.5/debug/test-stpcpy_chk.c. */
sewardjb8d03852007-01-27 00:49:44 +0000597#define GLIBC25___STPCPY_CHK(soname,fnname) \
598 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
599 (char* dst, const char* src, SizeT len); \
600 char* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
601 (char* dst, const char* src, SizeT len) \
602 { \
603 extern void _exit(int status); \
sewardjdc5d8322007-01-28 06:32:01 +0000604 if (! len) \
605 goto badness; \
606 while ((*dst++ = *src++) != '\0') \
607 if (--len == 0) \
sewardjb8d03852007-01-27 00:49:44 +0000608 goto badness; \
sewardjb8d03852007-01-27 00:49:44 +0000609 return dst - 1; \
610 badness: \
611 VALGRIND_PRINTF_BACKTRACE( \
sewardj24cb2172007-02-23 09:03:26 +0000612 "*** stpcpy_chk: buffer overflow detected ***: " \
613 "program terminated"); \
sewardjb8d03852007-01-27 00:49:44 +0000614 _exit(127); \
615 /*NOTREACHED*/ \
616 return NULL; \
617 }
618
njne6154662009-02-10 04:23:41 +0000619GLIBC25___STPCPY_CHK(VG_Z_LIBC_SONAME, __stpcpy_chk)
sewardjb8d03852007-01-27 00:49:44 +0000620
621
sewardj841b72d2006-12-31 18:55:56 +0000622/* mempcpy */
623#define GLIBC25_MEMPCPY(soname, fnname) \
624 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
625 ( void *dst, const void *src, SizeT len ); \
626 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
627 ( void *dst, const void *src, SizeT len ) \
628 { \
629 register char *d; \
630 register char *s; \
631 SizeT len_saved = len; \
632 \
633 if (len == 0) \
634 return dst; \
635 \
636 if (is_overlap(dst, src, len, len)) \
637 RECORD_OVERLAP_ERROR("mempcpy", dst, src, len); \
638 \
639 if ( dst > src ) { \
640 d = (char *)dst + len - 1; \
641 s = (char *)src + len - 1; \
642 while ( len-- ) { \
643 *d-- = *s--; \
644 } \
645 } else if ( dst < src ) { \
646 d = (char *)dst; \
647 s = (char *)src; \
648 while ( len-- ) { \
649 *d++ = *s++; \
650 } \
651 } \
652 return (void*)( ((char*)dst) + len_saved ); \
653 }
654
njne6154662009-02-10 04:23:41 +0000655GLIBC25_MEMPCPY(VG_Z_LIBC_SONAME, mempcpy)
656GLIBC25_MEMPCPY(VG_Z_LD_SO_1, mempcpy) /* ld.so.1 */
sewardj841b72d2006-12-31 18:55:56 +0000657
658
sewardjb6c04032007-11-13 20:52:29 +0000659#define GLIBC26___MEMCPY_CHK(soname, fnname) \
660 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
661 (void* dst, const void* src, SizeT len, SizeT dstlen ); \
662 void* VG_REPLACE_FUNCTION_ZU(soname,fnname) \
663 (void* dst, const void* src, SizeT len, SizeT dstlen ) \
664 { \
665 extern void _exit(int status); \
666 register char *d; \
667 register char *s; \
668 \
669 if (dstlen < len) goto badness; \
670 \
671 if (len == 0) \
672 return dst; \
673 \
674 if (is_overlap(dst, src, len, len)) \
675 RECORD_OVERLAP_ERROR("memcpy_chk", dst, src, len); \
676 \
677 if ( dst > src ) { \
678 d = (char *)dst + len - 1; \
679 s = (char *)src + len - 1; \
680 while ( len-- ) { \
681 *d-- = *s--; \
682 } \
683 } else if ( dst < src ) { \
684 d = (char *)dst; \
685 s = (char *)src; \
686 while ( len-- ) { \
687 *d++ = *s++; \
688 } \
689 } \
690 return dst; \
691 badness: \
692 VALGRIND_PRINTF_BACKTRACE( \
693 "*** memcpy_chk: buffer overflow detected ***: " \
694 "program terminated"); \
695 _exit(127); \
696 /*NOTREACHED*/ \
697 return NULL; \
698 }
699
njne6154662009-02-10 04:23:41 +0000700GLIBC26___MEMCPY_CHK(VG_Z_LIBC_SONAME, __memcpy_chk)
sewardjb6c04032007-11-13 20:52:29 +0000701
702
sewardj31b9ce12006-10-17 01:27:13 +0000703/*------------------------------------------------------------*/
dirk09beb9e2007-04-19 09:47:32 +0000704/*--- Improve definedness checking of process environment ---*/
705/*------------------------------------------------------------*/
706
sewardjddc00dd2007-11-27 11:42:47 +0000707#if defined(VGO_linux)
708
dirk09beb9e2007-04-19 09:47:32 +0000709/* putenv */
njne6154662009-02-10 04:23:41 +0000710int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, putenv) (char* string);
711int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, putenv) (char* string)
dirk09beb9e2007-04-19 09:47:32 +0000712{
713 OrigFn fn;
714 Word result;
715 const char* p = string;
716 VALGRIND_GET_ORIG_FN(fn);
717 /* Now by walking over the string we magically produce
718 traces when hitting undefined memory. */
719 if (p)
720 while (*p++)
721 ;
722 CALL_FN_W_W(result, fn, string);
723 return result;
724}
725
726/* unsetenv */
njne6154662009-02-10 04:23:41 +0000727int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, unsetenv) (const char* name);
728int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, unsetenv) (const char* name)
dirk09beb9e2007-04-19 09:47:32 +0000729{
730 OrigFn fn;
731 Word result;
732 const char* p = name;
733 VALGRIND_GET_ORIG_FN(fn);
734 /* Now by walking over the string we magically produce
735 traces when hitting undefined memory. */
736 if (p)
737 while (*p++)
738 ;
739 CALL_FN_W_W(result, fn, name);
740 return result;
741}
742
743/* setenv */
njne6154662009-02-10 04:23:41 +0000744int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, setenv)
dirk09beb9e2007-04-19 09:47:32 +0000745 (const char* name, const char* value, int overwrite);
njne6154662009-02-10 04:23:41 +0000746int VG_WRAP_FUNCTION_ZU(VG_Z_LIBC_SONAME, setenv)
dirk09beb9e2007-04-19 09:47:32 +0000747 (const char* name, const char* value, int overwrite)
748{
749 OrigFn fn;
750 Word result;
751 const char* p;
752 VALGRIND_GET_ORIG_FN(fn);
753 /* Now by walking over the string we magically produce
754 traces when hitting undefined memory. */
755 if (name)
756 for (p = name; *p; p++)
757 ;
758 if (value)
759 for (p = value; *p; p++)
760 ;
761 VALGRIND_CHECK_VALUE_IS_DEFINED (overwrite);
762 CALL_FN_W_WWW(result, fn, name, value, overwrite);
763 return result;
764}
765
sewardjddc00dd2007-11-27 11:42:47 +0000766#endif /* defined(VGO_linux) */
767
768
dirk09beb9e2007-04-19 09:47:32 +0000769/*------------------------------------------------------------*/
sewardj31b9ce12006-10-17 01:27:13 +0000770/*--- AIX stuff only after this point ---*/
771/*------------------------------------------------------------*/
772
sewardjddc00dd2007-11-27 11:42:47 +0000773/* Generate replacements for strcat, strncat, strcpy, strncpy, strcmp
sewardj31b9ce12006-10-17 01:27:13 +0000774 in the given soname. */
sewardjddc00dd2007-11-27 11:42:47 +0000775#define Str5FNs(_soname) \
sewardj31b9ce12006-10-17 01:27:13 +0000776 STRCAT(_soname, strcat) \
777 STRNCAT(_soname, strncat) \
778 STRCPY(_soname, strcpy) \
sewardjddc00dd2007-11-27 11:42:47 +0000779 STRNCPY(_soname, strncpy) \
780 STRCMP(_soname, strcmp)
sewardj31b9ce12006-10-17 01:27:13 +0000781
782#if defined(VGP_ppc32_aix5)
sewardjddc00dd2007-11-27 11:42:47 +0000783Str5FNs(NONE) /* in main exe */
784Str5FNs(libCZdaZLshrcoreZdoZR) /* libC.a(shrcore.o) */
785Str5FNs(libX11ZdaZLshr4ZdoZR) /* libX11.a(shr4.o) */
786Str5FNs(libXmZdaZLshrZaZdoZR) /* libXm.a(shr*.o) */
787Str5FNs(libXtZdaZLshr4ZdoZR) /* libXt.a(shr4.o) */
788Str5FNs(libppeZurZdaZLdynamicZdoZR) /* libppe_r.a(dynamic.o) */
789Str5FNs(libodmZdaZLshrZdoZR) /* libodm.a(shr.o) */
790Str5FNs(libmpiZurZdaZLmpicoreZurZdoZR) /* libmpi_r.a(mpicore_r.o) */
791Str5FNs(libmpiZurZdaZLmpipoeZurZdoZR) /* libmpi_r.a(mpipoe_r.o) */
792Str5FNs(libmpiZurZdaZLmpciZurZdoZR) /* libmpi_r.a(mpci_r.o) */
793Str5FNs(libslurmZdso) /* libslurm.so */
794Str5FNs(libglibZdso) /* libglib.so */
795Str5FNs(libIMZdaZLshrZdoZR) /* libIM.a(shr.o) */
796Str5FNs(libiconvZdaZLshr4ZdoZR) /* libiconv.a(shr4.o) */
797Str5FNs(libGLZdaZLshrZdoZR) /* libGL.a(shr.o) */
798Str5FNs(libgdkZdso) /* libgdk.so */
799Str5FNs(libcursesZdaZLshr42ZdoZR) /* libcurses.a(shr42.o) */
800Str5FNs(libqtZda) /* libqt.a */
sewardjfd4b6f42007-11-29 03:08:32 +0000801Str5FNs(ZaZLlibglibZhZaZdsoZaZR) /* *(libglib-*.so*) */
802Str5FNs(ZaZLlibfontconfigZdsoZaZR) /* *(libfontconfig.so*) */
803Str5FNs(libQtZaa) /* libQt*.a */
sewardj31b9ce12006-10-17 01:27:13 +0000804#endif
805#if defined(VGP_ppc64_aix5)
sewardjddc00dd2007-11-27 11:42:47 +0000806Str5FNs(NONE) /* in main exe */
807Str5FNs(libX11ZdaZLshrZu64ZdoZR) /* libX11.a(shr_64.o) */
808Str5FNs(libiconvZdaZLshr4Zu64ZdoZR) /* libiconv.a(shr4_64.o) */
809Str5FNs(libGLZdaZLshrZu64ZdoZR) /* libGL.a(shr_64.o) */
810Str5FNs(libppeZurZdaZLdynamic64ZdoZR) /* libppe_r.a(dynamic64.o) */
811Str5FNs(libodmZdaZLshrZu64ZdoZR) /* libodm.a(shr_64.o) */
812Str5FNs(libmpiZurZdaZLmpicore64ZurZdoZR) /* libmpi_r.a(mpicore64_r.o) */
813Str5FNs(libmpiZurZdaZLmpipoe64ZurZdoZR) /* libmpi_r.a(mpipoe64_r.o) */
814Str5FNs(libCZdaZLshrcoreZu64ZdoZR) /* libC.a(shrcore_64.o) */
815Str5FNs(libmpiZurZdaZLmpci64ZurZdoZR) /* libmpi_r.a(mpci64_r.o) */
816Str5FNs(libqtZda) /* libqt.a */
sewardjfd4b6f42007-11-29 03:08:32 +0000817Str5FNs(ZaZLlibglibZhZaZdsoZaZR) /* *(libglib-*.so*) */
818Str5FNs(ZaZLlibfontconfigZdsoZaZR) /* *(libfontconfig.so*) */
819Str5FNs(libQtZaa) /* libQt*.a */
sewardj31b9ce12006-10-17 01:27:13 +0000820#endif
821
822
823/* AIX's libm contains a sqrt implementation which does a nasty thing:
824 it loads the initial estimate of the root into a FP register, but
825 only the upper half of the number is initialised data. Hence the
826 least significant 32 mantissa bits are undefined, and it then uses
827 Newton-Raphson iteration to compute the final, defined result.
828 This fools memcheck completely; the only solution I can think of is
829 provide our own substitute. The _FAST variant is almost right
830 except the result is not correctly rounded. The _EXACT variant,
831 which is selected by default, is always right; but it's also pretty
832 darn slow. */
833
834#if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
835#define SQRT_FAST(soname, fnname) \
836 double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ); \
837 double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ) \
838 { \
839 static UInt T1[32] = \
840 { 0, 1024, 3062, 5746, 9193, 13348, \
841 18162, 23592, 29598, 36145, 43202, 50740, \
842 58733, 67158, 75992, 85215, 83599, 71378, \
843 60428, 50647, 41945, 34246, 27478, 21581, \
844 16499, 12183, 8588, 5674, 3403, 1742, \
845 661, 130 }; \
846 UInt x0, x1, sign, expo, mant0, bIGENDIAN = 1; \
847 union { UInt w[2]; double d; } u; \
848 u.d = x; \
849 x0 = u.w[1 - bIGENDIAN]; /* high half */ \
850 x1 = u.w[bIGENDIAN]; /* low half */ \
851 sign = x0 >> 31; \
852 expo = (x0 >> 20) & 0x7FF; \
853 mant0 = x0 & 0xFFFFF; \
854 if ( (sign == 0 && expo >= 1 && expo <= 0x7FE) /* +normal */ \
855 || (sign == 0 && expo == 0 \
856 && (mant0 | x1) > 0) /* +denorm */) { \
857 /* common case; do Newton-Raphson */ \
858 /* technically k should be signed int32, but since we're \
859 always entering here with x > 0, doesn't matter that it's \
860 unsigned. */ \
861 double y; \
862 UInt k = (x0>>1) + 0x1ff80000; \
863 u.w[1 - bIGENDIAN] = k - T1[31&(k>>15)]; \
864 u.w[bIGENDIAN] = 0; \
865 y = u.d; \
866 y = (y+x/y)/2.0 ; \
867 y = (y+x/y)/2.0 ; \
868 y = y-(y-x/y)/2.0 ; \
869 return y; \
870 } \
871 if ( (sign == 1 && expo >= 1 && expo <= 0x7FE) /* -normal */ \
872 || (sign == 1 && expo == 0 \
873 && (mant0 | x1) > 0) /* -denorm */) { \
874 u.w[1 - bIGENDIAN] = 0xFFF00000; \
875 u.w[bIGENDIAN] = 0x1; \
876 return u.d; /* -Inf -> NaN */ \
877 } \
878 if ((expo | mant0 | x1) == 0) \
879 return x; /* +/-zero -> self */ \
880 if (expo == 0x7FF && (mant0 | x1) == 0) { \
881 if (sign == 0) \
882 return x; /* +Inf -> self */ \
883 u.w[1 - bIGENDIAN] = 0xFFF00000; \
884 u.w[bIGENDIAN] = 0x1; \
885 return u.d; /* -Inf -> NaN */ \
886 } \
887 /* must be +/- NaN */ \
888 return x; /* +/-NaN -> self */ \
889 }
890
891#define SQRT_EXACT(soname, fnname) \
892 /* \
893 * ==================================================== \
894 * Copyright (C) 1993 by Sun Microsystems, Inc. All rights reserved. \
895 * \
896 * Developed at SunPro, a Sun Microsystems, Inc. business. \
897 * Permission to use, copy, modify, and distribute this \
898 * software is freely granted, provided that this notice \
899 * is preserved. \
900 * ==================================================== \
901 */ \
902 /* \
903 * Return correctly rounded sqrt. \
904 * ------------------------------------------ \
905 * | Use the hardware sqrt if you have one | \
906 * ------------------------------------------ \
907 * Method: \
908 * Bit by bit method using integer arithmetic. (Slow, but portable) \
909 * 1. Normalization \
910 * Scale x to y in [1,4) with even powers of 2: \
911 * find an integer k such that 1 <= (y=x*2^(2k)) < 4, then \
912 * sqrt(x) = 2^k * sqrt(y) \
913 * 2. Bit by bit computation \
914 * Let q = sqrt(y) truncated to i bit after binary point (q = 1), \
915 * i 0 \
916 * i+1 2 \
917 * s = 2*q , and y = 2 * ( y - q ). (1) \
918 * i i i i \
919 * \
920 * To compute q from q , one checks whether \
921 * i+1 i \
922 * \
923 * -(i+1) 2 \
924 * (q + 2 ) <= y. (2) \
925 * i \
926 * -(i+1) \
927 * If (2) is false, then q = q ; otherwise q = q + 2 . \
928 * i+1 i i+1 i \
929 * \
930 * With some algebric manipulation, it is not difficult to see \
931 * that (2) is equivalent to \
932 * -(i+1) \
933 * s + 2 <= y (3) \
934 * i i \
935 * \
936 * The advantage of (3) is that s and y can be computed by \
937 * i i \
938 * the following recurrence formula: \
939 * if (3) is false \
940 * \
941 * s = s , y = y ; (4) \
942 * i+1 i i+1 i \
943 * \
944 * otherwise, \
945 * -i -(i+1) \
946 * s = s + 2 , y = y - s - 2 (5) \
947 * i+1 i i+1 i i \
948 * \
949 * \
950 * One may easily use induction to prove (4) and (5). \
951 * Note. Since the left hand side of (3) contain only i+2 bits, \
952 * it does not necessary to do a full (53-bit) comparison \
953 * in (3). \
954 * 3. Final rounding \
955 * After generating the 53 bits result, we compute one more bit. \
956 * Together with the remainder, we can decide whether the \
957 * result is exact, bigger than 1/2ulp, or less than 1/2ulp \
958 * (it will never equal to 1/2ulp). \
959 * The rounding mode can be detected by checking whether \
960 * huge + tiny is equal to huge, and whether huge - tiny is \
961 * equal to huge for some floating point number "huge" and "tiny". \
962 * \
963 * Special cases: \
964 * sqrt(+-0) = +-0 ... exact \
965 * sqrt(inf) = inf \
966 * sqrt(-ve) = NaN ... with invalid signal \
967 * sqrt(NaN) = NaN ... with invalid signal for signaling NaN \
968 * \
969 */ \
970 double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ); \
971 double VG_REPLACE_FUNCTION_ZU(soname,fnname)( double x ) \
972 { \
973 const Int bIGENDIAN = 1; \
974 const double one = 1.0, tiny=1.0e-300; \
975 double z; \
976 Int sign = (Int)0x80000000; \
977 Int ix0,s0,q,m,t,i; \
978 UInt r,t1,s1,ix1,q1; \
979 union { UInt w[2]; double d; } u; \
980 u.d = x; \
981 ix0 = u.w[1-bIGENDIAN]; \
982 ix1 = u.w[bIGENDIAN]; \
983 \
984 /* take care of Inf and NaN */ \
985 if((ix0&0x7ff00000)==0x7ff00000) { \
986 return x*x+x; /* sqrt(NaN)=NaN, sqrt(+inf)=+inf \
987 sqrt(-inf)=sNaN */ \
988 } \
989 /* take care of zero */ \
990 if(ix0<=0) { \
991 if(((ix0&(~sign))|ix1)==0) return x;/* sqrt(+-0) = +-0 */ \
992 else if(ix0<0) \
993 return (x-x)/(x-x); /* sqrt(-ve) = sNaN */ \
994 } \
995 /* normalize x */ \
996 m = (ix0>>20); \
997 if(m==0) { /* subnormal x */ \
998 while(ix0==0) { \
999 m -= 21; \
1000 ix0 |= (ix1>>11); ix1 <<= 21; \
1001 } \
1002 for(i=0;(ix0&0x00100000)==0;i++) ix0<<=1; \
1003 m -= i-1; \
1004 ix0 |= (ix1>>(32-i)); \
1005 ix1 <<= i; \
1006 } \
1007 m -= 1023; /* unbias exponent */ \
1008 ix0 = (ix0&0x000fffff)|0x00100000; \
1009 if(m&1){ /* odd m, double x to make it even */ \
1010 ix0 += ix0 + ((ix1&sign)>>31); \
1011 ix1 += ix1; \
1012 } \
1013 m >>= 1; /* m = [m/2] */ \
1014 /* generate sqrt(x) bit by bit */ \
1015 ix0 += ix0 + ((ix1&sign)>>31); \
1016 ix1 += ix1; \
1017 q = q1 = s0 = s1 = 0; /* [q,q1] = sqrt(x) */ \
1018 r = 0x00200000; /* r = moving bit from right to left */ \
1019 while(r!=0) { \
1020 t = s0+r; \
1021 if(t<=ix0) { \
1022 s0 = t+r; \
1023 ix0 -= t; \
1024 q += r; \
1025 } \
1026 ix0 += ix0 + ((ix1&sign)>>31); \
1027 ix1 += ix1; \
1028 r>>=1; \
1029 } \
1030 r = sign; \
1031 while(r!=0) { \
1032 t1 = s1+r; \
1033 t = s0; \
1034 if((t<ix0)||((t==ix0)&&(t1<=ix1))) { \
1035 s1 = t1+r; \
1036 if(((t1&sign)==sign)&&(s1&sign)==0) s0 += 1; \
1037 ix0 -= t; \
1038 if (ix1 < t1) ix0 -= 1; \
1039 ix1 -= t1; \
1040 q1 += r; \
1041 } \
1042 ix0 += ix0 + ((ix1&sign)>>31); \
1043 ix1 += ix1; \
1044 r>>=1; \
1045 } \
1046 /* use floating add to find out rounding direction */ \
1047 if((ix0|ix1)!=0) { \
1048 z = one-tiny; /* trigger inexact flag */ \
1049 if (z>=one) { \
1050 z = one+tiny; \
1051 if (q1==(UInt)0xffffffff) { q1=0; q += 1;} \
1052 else if (z>one) { \
1053 if (q1==(UInt)0xfffffffe) q+=1; \
1054 q1+=2; \
1055 } else \
1056 q1 += (q1&1); \
1057 } \
1058 } \
1059 ix0 = (q>>1)+0x3fe00000; \
1060 ix1 = q1>>1; \
1061 if ((q&1)==1) ix1 |= sign; \
1062 ix0 += (m <<20); \
1063 ix0 = u.w[1-bIGENDIAN] = ix0; \
1064 ix1 = u.w[bIGENDIAN] = ix1; \
1065 z = u.d; \
1066 return z; \
1067 }
1068
1069#if 0
1070SQRT_FAST(NONE, sqrt) /* xlC generates these */
1071SQRT_FAST(NONE, _sqrt) /* xlf generates these */
1072#else
1073SQRT_EXACT(NONE, sqrt) /* xlC generates these */
1074SQRT_EXACT(NONE, _sqrt) /* xlf generates these */
1075#endif
1076
1077#endif /* defined(VGP_ppc32_aix5) */
1078
njn3e884182003-04-15 13:03:23 +00001079/*--------------------------------------------------------------------*/
njn46275862005-03-24 04:00:03 +00001080/*--- end ---*/
njn3e884182003-04-15 13:03:23 +00001081/*--------------------------------------------------------------------*/