| /* |
| * Copyright © 2012 Siarhei Siamashka <siarhei.siamashka@gmail.com> |
| * |
| * Based on the public domain implementation of small noncryptographic PRNG |
| * authored by Bob Jenkins: http://burtleburtle.net/bob/rand/smallprng.html |
| * |
| * Permission is hereby granted, free of charge, to any person obtaining a |
| * copy of this software and associated documentation files (the "Software"), |
| * to deal in the Software without restriction, including without limitation |
| * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
| * and/or sell copies of the Software, and to permit persons to whom the |
| * Software is furnished to do so, subject to the following conditions: |
| * |
| * The above copyright notice and this permission notice (including the next |
| * paragraph) shall be included in all copies or substantial portions of the |
| * Software. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
| * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
| * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
| * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
| * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
| * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER |
| * DEALINGS IN THE SOFTWARE. |
| */ |
| |
| #include "utils.h" |
| #include "utils-prng.h" |
| |
| #if defined(GCC_VECTOR_EXTENSIONS_SUPPORTED) && defined(__SSE2__) |
| #include <xmmintrin.h> |
| #endif |
| |
| void smallprng_srand_r (smallprng_t *x, uint32_t seed) |
| { |
| uint32_t i; |
| x->a = 0xf1ea5eed, x->b = x->c = x->d = seed; |
| for (i = 0; i < 20; ++i) |
| smallprng_rand_r (x); |
| } |
| |
| /* |
| * Set a 32-bit seed for PRNG |
| * |
| * LCG is used here for generating independent seeds for different |
| * smallprng instances (in the case if smallprng is also used for |
| * generating these seeds, "Big Crush" test from TestU01 detects |
| * some problems in the glued 'prng_rand_128_r' output data). |
| * Actually we might be even better using some cryptographic |
| * hash for this purpose, but LCG seems to be also enough for |
| * passing "Big Crush". |
| */ |
| void prng_srand_r (prng_t *x, uint32_t seed) |
| { |
| #ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED |
| int i; |
| prng_rand_128_data_t dummy; |
| smallprng_srand_r (&x->p0, seed); |
| x->a[0] = x->a[1] = x->a[2] = x->a[3] = 0xf1ea5eed; |
| x->b[0] = x->c[0] = x->d[0] = (seed = seed * 1103515245 + 12345); |
| x->b[1] = x->c[1] = x->d[1] = (seed = seed * 1103515245 + 12345); |
| x->b[2] = x->c[2] = x->d[2] = (seed = seed * 1103515245 + 12345); |
| x->b[3] = x->c[3] = x->d[3] = (seed = seed * 1103515245 + 12345); |
| for (i = 0; i < 20; ++i) |
| prng_rand_128_r (x, &dummy); |
| #else |
| smallprng_srand_r (&x->p0, seed); |
| smallprng_srand_r (&x->p1, (seed = seed * 1103515245 + 12345)); |
| smallprng_srand_r (&x->p2, (seed = seed * 1103515245 + 12345)); |
| smallprng_srand_r (&x->p3, (seed = seed * 1103515245 + 12345)); |
| smallprng_srand_r (&x->p4, (seed = seed * 1103515245 + 12345)); |
| #endif |
| } |
| |
| static force_inline void |
| store_rand_128_data (void *addr, prng_rand_128_data_t *d, int aligned) |
| { |
| #ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED |
| if (aligned) |
| { |
| *(uint8x16 *)addr = d->vb; |
| return; |
| } |
| else |
| { |
| #ifdef __SSE2__ |
| /* workaround for http://gcc.gnu.org/PR55614 */ |
| _mm_storeu_si128 (addr, _mm_loadu_si128 ((__m128i *)d)); |
| return; |
| #endif |
| } |
| #endif |
| /* we could try something better for unaligned writes (packed attribute), |
| * but GCC is not very reliable: http://gcc.gnu.org/PR55454 */ |
| memcpy (addr, d, 16); |
| } |
| |
| /* |
| * Helper function and the actual code for "prng_randmemset_r" function |
| */ |
| static force_inline void |
| randmemset_internal (prng_t *prng, |
| uint8_t *buf, |
| size_t size, |
| prng_randmemset_flags_t flags, |
| int aligned) |
| { |
| prng_t local_prng = *prng; |
| prng_rand_128_data_t randdata; |
| size_t i; |
| |
| while (size >= 16) |
| { |
| prng_rand_128_data_t t; |
| if (flags == 0) |
| { |
| prng_rand_128_r (&local_prng, &randdata); |
| } |
| else |
| { |
| prng_rand_128_r (&local_prng, &t); |
| prng_rand_128_r (&local_prng, &randdata); |
| #ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED |
| if (flags & RANDMEMSET_MORE_FF) |
| { |
| const uint8x16 const_C0 = |
| { |
| 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, |
| 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0, 0xC0 |
| }; |
| randdata.vb |= (t.vb >= const_C0); |
| } |
| if (flags & RANDMEMSET_MORE_00) |
| { |
| const uint8x16 const_40 = |
| { |
| 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, |
| 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40, 0x40 |
| }; |
| randdata.vb &= (t.vb >= const_40); |
| } |
| if (flags & RANDMEMSET_MORE_FFFFFFFF) |
| { |
| const uint32x4 const_C0000000 = |
| { |
| 0xC0000000, 0xC0000000, 0xC0000000, 0xC0000000 |
| }; |
| randdata.vw |= ((t.vw << 30) >= const_C0000000); |
| } |
| if (flags & RANDMEMSET_MORE_00000000) |
| { |
| const uint32x4 const_40000000 = |
| { |
| 0x40000000, 0x40000000, 0x40000000, 0x40000000 |
| }; |
| randdata.vw &= ((t.vw << 30) >= const_40000000); |
| } |
| #else |
| #define PROCESS_ONE_LANE(i) \ |
| if (flags & RANDMEMSET_MORE_FF) \ |
| { \ |
| uint32_t mask_ff = (t.w[i] & (t.w[i] << 1)) & 0x80808080; \ |
| mask_ff |= mask_ff >> 1; \ |
| mask_ff |= mask_ff >> 2; \ |
| mask_ff |= mask_ff >> 4; \ |
| randdata.w[i] |= mask_ff; \ |
| } \ |
| if (flags & RANDMEMSET_MORE_00) \ |
| { \ |
| uint32_t mask_00 = (t.w[i] | (t.w[i] << 1)) & 0x80808080; \ |
| mask_00 |= mask_00 >> 1; \ |
| mask_00 |= mask_00 >> 2; \ |
| mask_00 |= mask_00 >> 4; \ |
| randdata.w[i] &= mask_00; \ |
| } \ |
| if (flags & RANDMEMSET_MORE_FFFFFFFF) \ |
| { \ |
| int32_t mask_ff = ((t.w[i] << 30) & (t.w[i] << 31)) & \ |
| 0x80000000; \ |
| randdata.w[i] |= mask_ff >> 31; \ |
| } \ |
| if (flags & RANDMEMSET_MORE_00000000) \ |
| { \ |
| int32_t mask_00 = ((t.w[i] << 30) | (t.w[i] << 31)) & \ |
| 0x80000000; \ |
| randdata.w[i] &= mask_00 >> 31; \ |
| } |
| |
| PROCESS_ONE_LANE (0) |
| PROCESS_ONE_LANE (1) |
| PROCESS_ONE_LANE (2) |
| PROCESS_ONE_LANE (3) |
| #endif |
| } |
| if (is_little_endian ()) |
| { |
| store_rand_128_data (buf, &randdata, aligned); |
| buf += 16; |
| } |
| else |
| { |
| #ifdef GCC_VECTOR_EXTENSIONS_SUPPORTED |
| const uint8x16 bswap_shufflemask = |
| { |
| 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 |
| }; |
| randdata.vb = __builtin_shuffle (randdata.vb, bswap_shufflemask); |
| store_rand_128_data (buf, &randdata, aligned); |
| buf += 16; |
| #else |
| uint8_t t1, t2, t3, t4; |
| #define STORE_ONE_LANE(i) \ |
| t1 = randdata.b[i * 4 + 3]; \ |
| t2 = randdata.b[i * 4 + 2]; \ |
| t3 = randdata.b[i * 4 + 1]; \ |
| t4 = randdata.b[i * 4 + 0]; \ |
| *buf++ = t1; \ |
| *buf++ = t2; \ |
| *buf++ = t3; \ |
| *buf++ = t4; |
| |
| STORE_ONE_LANE (0) |
| STORE_ONE_LANE (1) |
| STORE_ONE_LANE (2) |
| STORE_ONE_LANE (3) |
| #endif |
| } |
| size -= 16; |
| } |
| i = 0; |
| while (i < size) |
| { |
| uint8_t randbyte = prng_rand_r (&local_prng) & 0xFF; |
| if (flags != 0) |
| { |
| uint8_t t = prng_rand_r (&local_prng) & 0xFF; |
| if ((flags & RANDMEMSET_MORE_FF) && (t >= 0xC0)) |
| randbyte = 0xFF; |
| if ((flags & RANDMEMSET_MORE_00) && (t < 0x40)) |
| randbyte = 0x00; |
| if (i % 4 == 0 && i + 4 <= size) |
| { |
| t = prng_rand_r (&local_prng) & 0xFF; |
| if ((flags & RANDMEMSET_MORE_FFFFFFFF) && (t >= 0xC0)) |
| { |
| memset(&buf[i], 0xFF, 4); |
| i += 4; |
| continue; |
| } |
| if ((flags & RANDMEMSET_MORE_00000000) && (t < 0x40)) |
| { |
| memset(&buf[i], 0x00, 4); |
| i += 4; |
| continue; |
| } |
| } |
| } |
| buf[i] = randbyte; |
| i++; |
| } |
| *prng = local_prng; |
| } |
| |
| /* |
| * Fill memory buffer with random data. Flags argument may be used |
| * to tweak some statistics properties: |
| * RANDMEMSET_MORE_00 - set ~25% of bytes to 0x00 |
| * RANDMEMSET_MORE_FF - set ~25% of bytes to 0xFF |
| * RANDMEMSET_MORE_00000000 - ~25% chance for 00000000 4-byte clusters |
| * RANDMEMSET_MORE_FFFFFFFF - ~25% chance for FFFFFFFF 4-byte clusters |
| */ |
| void prng_randmemset_r (prng_t *prng, |
| void *voidbuf, |
| size_t size, |
| prng_randmemset_flags_t flags) |
| { |
| uint8_t *buf = (uint8_t *)voidbuf; |
| if ((uintptr_t)buf & 15) |
| { |
| /* unaligned buffer */ |
| if (flags == 0) |
| randmemset_internal (prng, buf, size, 0, 0); |
| else if (flags == RANDMEMSET_MORE_00_AND_FF) |
| randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 0); |
| else |
| randmemset_internal (prng, buf, size, flags, 0); |
| } |
| else |
| { |
| /* aligned buffer */ |
| if (flags == 0) |
| randmemset_internal (prng, buf, size, 0, 1); |
| else if (flags == RANDMEMSET_MORE_00_AND_FF) |
| randmemset_internal (prng, buf, size, RANDMEMSET_MORE_00_AND_FF, 1); |
| else |
| randmemset_internal (prng, buf, size, flags, 1); |
| } |
| } |