Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 1 | #include <string.h> |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 2 | #include <stdint.h> |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 3 | |
| 4 | void *memset(void *dest, int c, size_t n) |
| 5 | { |
| 6 | unsigned char *s = dest; |
Rich Felker | a543369 | 2013-08-27 18:08:29 -0400 | [diff] [blame] | 7 | size_t k; |
| 8 | |
| 9 | /* Fill head and tail with minimal branching. Each |
| 10 | * conditional ensures that all the subsequently used |
| 11 | * offsets are well-defined and in the dest region. */ |
| 12 | |
| 13 | if (!n) return dest; |
Rich Felker | 9d4c902 | 2017-08-29 19:53:50 -0400 | [diff] [blame] | 14 | s[0] = c; |
| 15 | s[n-1] = c; |
Rich Felker | a543369 | 2013-08-27 18:08:29 -0400 | [diff] [blame] | 16 | if (n <= 2) return dest; |
Rich Felker | 9d4c902 | 2017-08-29 19:53:50 -0400 | [diff] [blame] | 17 | s[1] = c; |
| 18 | s[2] = c; |
| 19 | s[n-2] = c; |
| 20 | s[n-3] = c; |
Rich Felker | a543369 | 2013-08-27 18:08:29 -0400 | [diff] [blame] | 21 | if (n <= 6) return dest; |
Rich Felker | 9d4c902 | 2017-08-29 19:53:50 -0400 | [diff] [blame] | 22 | s[3] = c; |
| 23 | s[n-4] = c; |
Rich Felker | a543369 | 2013-08-27 18:08:29 -0400 | [diff] [blame] | 24 | if (n <= 8) return dest; |
| 25 | |
| 26 | /* Advance pointer to align it at a 4-byte boundary, |
| 27 | * and truncate n to a multiple of 4. The previous code |
| 28 | * already took care of any head/tail that get cut off |
| 29 | * by the alignment. */ |
| 30 | |
| 31 | k = -(uintptr_t)s & 3; |
| 32 | s += k; |
| 33 | n -= k; |
| 34 | n &= -4; |
| 35 | |
| 36 | #ifdef __GNUC__ |
| 37 | typedef uint32_t __attribute__((__may_alias__)) u32; |
| 38 | typedef uint64_t __attribute__((__may_alias__)) u64; |
| 39 | |
| 40 | u32 c32 = ((u32)-1)/255 * (unsigned char)c; |
| 41 | |
| 42 | /* In preparation to copy 32 bytes at a time, aligned on |
| 43 | * an 8-byte bounary, fill head/tail up to 28 bytes each. |
| 44 | * As in the initial byte-based head/tail fill, each |
| 45 | * conditional below ensures that the subsequent offsets |
| 46 | * are valid (e.g. !(n<=24) implies n>=28). */ |
| 47 | |
| 48 | *(u32 *)(s+0) = c32; |
| 49 | *(u32 *)(s+n-4) = c32; |
| 50 | if (n <= 8) return dest; |
| 51 | *(u32 *)(s+4) = c32; |
| 52 | *(u32 *)(s+8) = c32; |
| 53 | *(u32 *)(s+n-12) = c32; |
| 54 | *(u32 *)(s+n-8) = c32; |
| 55 | if (n <= 24) return dest; |
| 56 | *(u32 *)(s+12) = c32; |
| 57 | *(u32 *)(s+16) = c32; |
| 58 | *(u32 *)(s+20) = c32; |
| 59 | *(u32 *)(s+24) = c32; |
| 60 | *(u32 *)(s+n-28) = c32; |
| 61 | *(u32 *)(s+n-24) = c32; |
| 62 | *(u32 *)(s+n-20) = c32; |
| 63 | *(u32 *)(s+n-16) = c32; |
| 64 | |
| 65 | /* Align to a multiple of 8 so we can fill 64 bits at a time, |
| 66 | * and avoid writing the same bytes twice as much as is |
| 67 | * practical without introducing additional branching. */ |
| 68 | |
| 69 | k = 24 + ((uintptr_t)s & 4); |
| 70 | s += k; |
| 71 | n -= k; |
| 72 | |
| 73 | /* If this loop is reached, 28 tail bytes have already been |
| 74 | * filled, so any remainder when n drops below 32 can be |
| 75 | * safely ignored. */ |
| 76 | |
| 77 | u64 c64 = c32 | ((u64)c32 << 32); |
| 78 | for (; n >= 32; n-=32, s+=32) { |
| 79 | *(u64 *)(s+0) = c64; |
| 80 | *(u64 *)(s+8) = c64; |
| 81 | *(u64 *)(s+16) = c64; |
| 82 | *(u64 *)(s+24) = c64; |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 83 | } |
Rich Felker | a543369 | 2013-08-27 18:08:29 -0400 | [diff] [blame] | 84 | #else |
| 85 | /* Pure C fallback with no aliasing violations. */ |
| 86 | for (; n; n--, s++) *s = c; |
| 87 | #endif |
| 88 | |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 89 | return dest; |
| 90 | } |