| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 1 | /* ****************************************************************** | 
 | 2 |    mem.h | 
 | 3 |    low-level memory access routines | 
 | 4 |    Copyright (C) 2013-2015, Yann Collet. | 
 | 5 |  | 
 | 6 |    BSD 2-Clause License (http://www.opensource.org/licenses/bsd-license.php) | 
 | 7 |  | 
 | 8 |    Redistribution and use in source and binary forms, with or without | 
 | 9 |    modification, are permitted provided that the following conditions are | 
 | 10 |    met: | 
 | 11 |  | 
 | 12 |        * Redistributions of source code must retain the above copyright | 
 | 13 |    notice, this list of conditions and the following disclaimer. | 
 | 14 |        * Redistributions in binary form must reproduce the above | 
 | 15 |    copyright notice, this list of conditions and the following disclaimer | 
 | 16 |    in the documentation and/or other materials provided with the | 
 | 17 |    distribution. | 
 | 18 |  | 
 | 19 |    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS | 
 | 20 |    "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT | 
 | 21 |    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR | 
 | 22 |    A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT | 
 | 23 |    OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, | 
 | 24 |    SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT | 
 | 25 |    LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, | 
 | 26 |    DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | 
 | 27 |    THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | 
 | 28 |    (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | 
 | 29 |    OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | 
 | 30 |  | 
 | 31 |     You can contact the author at : | 
 | 32 |     - FSE source repository : https://github.com/Cyan4973/FiniteStateEntropy | 
 | 33 |     - Public forum : https://groups.google.com/forum/#!forum/lz4c | 
 | 34 | ****************************************************************** */ | 
 | 35 | #ifndef MEM_H_MODULE | 
 | 36 | #define MEM_H_MODULE | 
 | 37 |  | 
 | 38 | #if defined (__cplusplus) | 
 | 39 | extern "C" { | 
 | 40 | #endif | 
 | 41 |  | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 42 | /*-**************************************** | 
 | 43 | *  Dependencies | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 44 | ******************************************/ | 
 | 45 | #include <stddef.h>    /* size_t, ptrdiff_t */ | 
 | 46 | #include <string.h>    /* memcpy */ | 
 | 47 |  | 
 | 48 |  | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 49 | /*-**************************************** | 
 | 50 | *  Compiler specifics | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 51 | ******************************************/ | 
| Yann Collet | 89db5e0 | 2015-11-13 11:27:46 +0100 | [diff] [blame] | 52 | #if defined(__GNUC__) | 
 | 53 | #  define MEM_STATIC static __attribute__((unused)) | 
 | 54 | #elif defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 55 | #  define MEM_STATIC static inline | 
 | 56 | #elif defined(_MSC_VER) | 
 | 57 | #  define MEM_STATIC static __inline | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 58 | #else | 
 | 59 | #  define MEM_STATIC static  /* this version may generate warnings for unused static functions; disable the relevant warning */ | 
 | 60 | #endif | 
 | 61 |  | 
 | 62 |  | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 63 | /*-************************************************************** | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 64 | *  Basic Types | 
 | 65 | *****************************************************************/ | 
 | 66 | #if defined (__cplusplus) || (defined (__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) /* C99 */) | 
 | 67 | # include <stdint.h> | 
 | 68 |   typedef  uint8_t BYTE; | 
 | 69 |   typedef uint16_t U16; | 
 | 70 |   typedef  int16_t S16; | 
 | 71 |   typedef uint32_t U32; | 
 | 72 |   typedef  int32_t S32; | 
 | 73 |   typedef uint64_t U64; | 
 | 74 |   typedef  int64_t S64; | 
 | 75 | #else | 
 | 76 |   typedef unsigned char       BYTE; | 
 | 77 |   typedef unsigned short      U16; | 
 | 78 |   typedef   signed short      S16; | 
 | 79 |   typedef unsigned int        U32; | 
 | 80 |   typedef   signed int        S32; | 
 | 81 |   typedef unsigned long long  U64; | 
 | 82 |   typedef   signed long long  S64; | 
 | 83 | #endif | 
 | 84 |  | 
 | 85 |  | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 86 | /*-************************************************************** | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 87 | *  Memory I/O | 
 | 88 | *****************************************************************/ | 
| Yann Collet | 4488661 | 2016-02-11 04:17:50 +0100 | [diff] [blame] | 89 | /* MEM_FORCE_MEMORY_ACCESS : | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 90 |  * By default, access to unaligned memory is controlled by `memcpy()`, which is safe and portable. | 
 | 91 |  * Unfortunately, on some target/compiler combinations, the generated assembly is sub-optimal. | 
 | 92 |  * The below switch allow to select different access method for improved performance. | 
 | 93 |  * Method 0 (default) : use `memcpy()`. Safe and portable. | 
 | 94 |  * Method 1 : `__packed` statement. It depends on compiler extension (ie, not portable). | 
 | 95 |  *            This method is safe if your compiler supports it, and *generally* as fast or faster than `memcpy`. | 
 | 96 |  * Method 2 : direct access. This method is portable but violate C standard. | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 97 |  *            It can generate buggy code on targets depending on alignment. | 
 | 98 |  *            In some circumstances, it's the only known way to get the most performance (ie GCC + ARMv6) | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 99 |  * See http://fastcompression.blogspot.fr/2015/08/accessing-unaligned-memory.html for details. | 
 | 100 |  * Prefer these methods in priority order (0 > 1 > 2) | 
 | 101 |  */ | 
 | 102 | #ifndef MEM_FORCE_MEMORY_ACCESS   /* can be defined externally, on command line for example */ | 
 | 103 | #  if defined(__GNUC__) && ( defined(__ARM_ARCH_6__) || defined(__ARM_ARCH_6J__) || defined(__ARM_ARCH_6K__) || defined(__ARM_ARCH_6Z__) || defined(__ARM_ARCH_6ZK__) || defined(__ARM_ARCH_6T2__) ) | 
 | 104 | #    define MEM_FORCE_MEMORY_ACCESS 2 | 
 | 105 | #  elif defined(__INTEL_COMPILER) || \ | 
 | 106 |   (defined(__GNUC__) && ( defined(__ARM_ARCH_7__) || defined(__ARM_ARCH_7A__) || defined(__ARM_ARCH_7R__) || defined(__ARM_ARCH_7M__) || defined(__ARM_ARCH_7S__) )) | 
 | 107 | #    define MEM_FORCE_MEMORY_ACCESS 1 | 
 | 108 | #  endif | 
 | 109 | #endif | 
 | 110 |  | 
 | 111 | MEM_STATIC unsigned MEM_32bits(void) { return sizeof(void*)==4; } | 
 | 112 | MEM_STATIC unsigned MEM_64bits(void) { return sizeof(void*)==8; } | 
 | 113 |  | 
 | 114 | MEM_STATIC unsigned MEM_isLittleEndian(void) | 
 | 115 | { | 
 | 116 |     const union { U32 u; BYTE c[4]; } one = { 1 };   /* don't use static : performance detrimental  */ | 
 | 117 |     return one.c[0]; | 
 | 118 | } | 
 | 119 |  | 
 | 120 | #if defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==2) | 
 | 121 |  | 
| Yann Collet | 7d36028 | 2016-02-12 00:07:30 +0100 | [diff] [blame] | 122 | /* violates C standard, by lying on structure alignment. | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 123 | Only use if no other choice to achieve best performance on target platform */ | 
 | 124 | MEM_STATIC U16 MEM_read16(const void* memPtr) { return *(const U16*) memPtr; } | 
 | 125 | MEM_STATIC U32 MEM_read32(const void* memPtr) { return *(const U32*) memPtr; } | 
 | 126 | MEM_STATIC U64 MEM_read64(const void* memPtr) { return *(const U64*) memPtr; } | 
| Yann Collet | 7d36028 | 2016-02-12 00:07:30 +0100 | [diff] [blame] | 127 | MEM_STATIC U64 MEM_readST(const void* memPtr) { return *(const size_t*) memPtr; } | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 128 |  | 
 | 129 | MEM_STATIC void MEM_write16(void* memPtr, U16 value) { *(U16*)memPtr = value; } | 
 | 130 | MEM_STATIC void MEM_write32(void* memPtr, U32 value) { *(U32*)memPtr = value; } | 
 | 131 | MEM_STATIC void MEM_write64(void* memPtr, U64 value) { *(U64*)memPtr = value; } | 
 | 132 |  | 
 | 133 | #elif defined(MEM_FORCE_MEMORY_ACCESS) && (MEM_FORCE_MEMORY_ACCESS==1) | 
 | 134 |  | 
 | 135 | /* __pack instructions are safer, but compiler specific, hence potentially problematic for some compilers */ | 
 | 136 | /* currently only defined for gcc and icc */ | 
| Yann Collet | 7d36028 | 2016-02-12 00:07:30 +0100 | [diff] [blame] | 137 | typedef union { U16 u16; U32 u32; U64 u64; size_t st; } __attribute__((packed)) unalign; | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 138 |  | 
 | 139 | MEM_STATIC U16 MEM_read16(const void* ptr) { return ((const unalign*)ptr)->u16; } | 
 | 140 | MEM_STATIC U32 MEM_read32(const void* ptr) { return ((const unalign*)ptr)->u32; } | 
 | 141 | MEM_STATIC U64 MEM_read64(const void* ptr) { return ((const unalign*)ptr)->u64; } | 
| Yann Collet | 7d36028 | 2016-02-12 00:07:30 +0100 | [diff] [blame] | 142 | MEM_STATIC U64 MEM_readST(const void* ptr) { return ((const unalign*)ptr)->st; } | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 143 |  | 
 | 144 | MEM_STATIC void MEM_write16(void* memPtr, U16 value) { ((unalign*)memPtr)->u16 = value; } | 
 | 145 | MEM_STATIC void MEM_write32(void* memPtr, U32 value) { ((unalign*)memPtr)->u32 = value; } | 
 | 146 | MEM_STATIC void MEM_write64(void* memPtr, U64 value) { ((unalign*)memPtr)->u64 = value; } | 
 | 147 |  | 
 | 148 | #else | 
 | 149 |  | 
 | 150 | /* default method, safe and standard. | 
 | 151 |    can sometimes prove slower */ | 
 | 152 |  | 
 | 153 | MEM_STATIC U16 MEM_read16(const void* memPtr) | 
 | 154 | { | 
 | 155 |     U16 val; memcpy(&val, memPtr, sizeof(val)); return val; | 
 | 156 | } | 
 | 157 |  | 
 | 158 | MEM_STATIC U32 MEM_read32(const void* memPtr) | 
 | 159 | { | 
 | 160 |     U32 val; memcpy(&val, memPtr, sizeof(val)); return val; | 
 | 161 | } | 
 | 162 |  | 
 | 163 | MEM_STATIC U64 MEM_read64(const void* memPtr) | 
 | 164 | { | 
 | 165 |     U64 val; memcpy(&val, memPtr, sizeof(val)); return val; | 
 | 166 | } | 
 | 167 |  | 
| Yann Collet | 7d36028 | 2016-02-12 00:07:30 +0100 | [diff] [blame] | 168 | MEM_STATIC size_t MEM_readST(const void* memPtr) | 
 | 169 | { | 
 | 170 |     size_t val; memcpy(&val, memPtr, sizeof(val)); return val; | 
 | 171 | } | 
 | 172 |  | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 173 | MEM_STATIC void MEM_write16(void* memPtr, U16 value) | 
 | 174 | { | 
 | 175 |     memcpy(memPtr, &value, sizeof(value)); | 
 | 176 | } | 
 | 177 |  | 
 | 178 | MEM_STATIC void MEM_write32(void* memPtr, U32 value) | 
 | 179 | { | 
 | 180 |     memcpy(memPtr, &value, sizeof(value)); | 
 | 181 | } | 
 | 182 |  | 
 | 183 | MEM_STATIC void MEM_write64(void* memPtr, U64 value) | 
 | 184 | { | 
 | 185 |     memcpy(memPtr, &value, sizeof(value)); | 
 | 186 | } | 
 | 187 |  | 
| Yann Collet | 4488661 | 2016-02-11 04:17:50 +0100 | [diff] [blame] | 188 | #endif /* MEM_FORCE_MEMORY_ACCESS */ | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 189 |  | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 190 | MEM_STATIC U16 MEM_readLE16(const void* memPtr) | 
 | 191 | { | 
 | 192 |     if (MEM_isLittleEndian()) | 
 | 193 |         return MEM_read16(memPtr); | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 194 |     else { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 195 |         const BYTE* p = (const BYTE*)memPtr; | 
 | 196 |         return (U16)(p[0] + (p[1]<<8)); | 
 | 197 |     } | 
 | 198 | } | 
 | 199 |  | 
 | 200 | MEM_STATIC void MEM_writeLE16(void* memPtr, U16 val) | 
 | 201 | { | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 202 |     if (MEM_isLittleEndian()) { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 203 |         MEM_write16(memPtr, val); | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 204 |     } else { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 205 |         BYTE* p = (BYTE*)memPtr; | 
 | 206 |         p[0] = (BYTE)val; | 
 | 207 |         p[1] = (BYTE)(val>>8); | 
 | 208 |     } | 
 | 209 | } | 
 | 210 |  | 
 | 211 | MEM_STATIC U32 MEM_readLE32(const void* memPtr) | 
 | 212 | { | 
 | 213 |     if (MEM_isLittleEndian()) | 
 | 214 |         return MEM_read32(memPtr); | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 215 |     else { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 216 |         const BYTE* p = (const BYTE*)memPtr; | 
 | 217 |         return (U32)((U32)p[0] + ((U32)p[1]<<8) + ((U32)p[2]<<16) + ((U32)p[3]<<24)); | 
 | 218 |     } | 
 | 219 | } | 
 | 220 |  | 
 | 221 | MEM_STATIC void MEM_writeLE32(void* memPtr, U32 val32) | 
 | 222 | { | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 223 |     if (MEM_isLittleEndian()) { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 224 |         MEM_write32(memPtr, val32); | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 225 |     } else { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 226 |         BYTE* p = (BYTE*)memPtr; | 
 | 227 |         p[0] = (BYTE)val32; | 
 | 228 |         p[1] = (BYTE)(val32>>8); | 
 | 229 |         p[2] = (BYTE)(val32>>16); | 
 | 230 |         p[3] = (BYTE)(val32>>24); | 
 | 231 |     } | 
 | 232 | } | 
 | 233 |  | 
 | 234 | MEM_STATIC U64 MEM_readLE64(const void* memPtr) | 
 | 235 | { | 
 | 236 |     if (MEM_isLittleEndian()) | 
 | 237 |         return MEM_read64(memPtr); | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 238 |     else { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 239 |         const BYTE* p = (const BYTE*)memPtr; | 
 | 240 |         return (U64)((U64)p[0] + ((U64)p[1]<<8) + ((U64)p[2]<<16) + ((U64)p[3]<<24) | 
 | 241 |                      + ((U64)p[4]<<32) + ((U64)p[5]<<40) + ((U64)p[6]<<48) + ((U64)p[7]<<56)); | 
 | 242 |     } | 
 | 243 | } | 
 | 244 |  | 
 | 245 | MEM_STATIC void MEM_writeLE64(void* memPtr, U64 val64) | 
 | 246 | { | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 247 |     if (MEM_isLittleEndian()) { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 248 |         MEM_write64(memPtr, val64); | 
| Yann Collet | ae7aa06 | 2016-02-03 02:46:46 +0100 | [diff] [blame] | 249 |     } else { | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 250 |         BYTE* p = (BYTE*)memPtr; | 
 | 251 |         p[0] = (BYTE)val64; | 
 | 252 |         p[1] = (BYTE)(val64>>8); | 
 | 253 |         p[2] = (BYTE)(val64>>16); | 
 | 254 |         p[3] = (BYTE)(val64>>24); | 
 | 255 |         p[4] = (BYTE)(val64>>32); | 
 | 256 |         p[5] = (BYTE)(val64>>40); | 
 | 257 |         p[6] = (BYTE)(val64>>48); | 
 | 258 |         p[7] = (BYTE)(val64>>56); | 
 | 259 |     } | 
 | 260 | } | 
 | 261 |  | 
 | 262 | MEM_STATIC size_t MEM_readLEST(const void* memPtr) | 
 | 263 | { | 
 | 264 |     if (MEM_32bits()) | 
 | 265 |         return (size_t)MEM_readLE32(memPtr); | 
 | 266 |     else | 
 | 267 |         return (size_t)MEM_readLE64(memPtr); | 
 | 268 | } | 
 | 269 |  | 
 | 270 | MEM_STATIC void MEM_writeLEST(void* memPtr, size_t val) | 
 | 271 | { | 
 | 272 |     if (MEM_32bits()) | 
 | 273 |         MEM_writeLE32(memPtr, (U32)val); | 
 | 274 |     else | 
 | 275 |         MEM_writeLE64(memPtr, (U64)val); | 
 | 276 | } | 
 | 277 |  | 
| inikep | e2446b0 | 2016-03-07 10:07:08 +0100 | [diff] [blame] | 278 |  /* function safe only for comparisons */ | 
 | 279 | MEM_STATIC U32 MEM_readMINMATCH(const void* memPtr, U32 length) | 
 | 280 | { | 
 | 281 |     switch (length) | 
 | 282 |     { | 
 | 283 |     default : | 
 | 284 |     case 4 : return MEM_read32(memPtr); | 
 | 285 |     case 3 : if (MEM_isLittleEndian()) | 
 | 286 |                 return MEM_read32(memPtr)<<8; | 
 | 287 |              else | 
 | 288 |                 return MEM_read32(memPtr)>>8; | 
 | 289 |     } | 
 | 290 | } | 
 | 291 |  | 
| Yann Collet | b1f3f4b | 2015-10-18 22:18:32 +0100 | [diff] [blame] | 292 | #if defined (__cplusplus) | 
 | 293 | } | 
 | 294 | #endif | 
 | 295 |  | 
 | 296 | #endif /* MEM_H_MODULE */ | 
 | 297 |  |