Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 1 | #include <stdio.h> |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 2 | #include <stdlib.h> |
| 3 | #include <stdarg.h> |
| 4 | #include <ctype.h> |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 5 | #include <wchar.h> |
| 6 | #include <wctype.h> |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 7 | #include <limits.h> |
| 8 | #include <string.h> |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 9 | |
| 10 | #include "stdio_impl.h" |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 11 | #include "shgetc.h" |
| 12 | #include "intscan.h" |
| 13 | #include "floatscan.h" |
Isaac Dunham | 14f0272 | 2013-04-05 23:20:28 -0700 | [diff] [blame] | 14 | #include "libc.h" |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 15 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 16 | #define SIZE_hh -2 |
| 17 | #define SIZE_h -1 |
| 18 | #define SIZE_def 0 |
| 19 | #define SIZE_l 1 |
| 20 | #define SIZE_L 2 |
| 21 | #define SIZE_ll 3 |
| 22 | |
| 23 | static void store_int(void *dest, int size, unsigned long long i) |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 24 | { |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 25 | if (!dest) return; |
| 26 | switch (size) { |
| 27 | case SIZE_hh: |
| 28 | *(char *)dest = i; |
| 29 | break; |
| 30 | case SIZE_h: |
| 31 | *(short *)dest = i; |
| 32 | break; |
| 33 | case SIZE_def: |
| 34 | *(int *)dest = i; |
| 35 | break; |
| 36 | case SIZE_l: |
| 37 | *(long *)dest = i; |
| 38 | break; |
| 39 | case SIZE_ll: |
| 40 | *(long long *)dest = i; |
| 41 | break; |
| 42 | } |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 43 | } |
| 44 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 45 | static void *arg_n(va_list ap, unsigned int n) |
| 46 | { |
| 47 | void *p; |
| 48 | unsigned int i; |
| 49 | va_list ap2; |
| 50 | va_copy(ap2, ap); |
| 51 | for (i=n; i>1; i--) va_arg(ap2, void *); |
| 52 | p = va_arg(ap2, void *); |
| 53 | va_end(ap2); |
| 54 | return p; |
| 55 | } |
| 56 | |
| 57 | static int in_set(const wchar_t *set, int c) |
| 58 | { |
| 59 | int j; |
| 60 | const wchar_t *p = set; |
| 61 | if (*p == '-') { |
| 62 | if (c=='-') return 1; |
| 63 | p++; |
| 64 | } else if (*p == ']') { |
| 65 | if (c==']') return 1; |
| 66 | p++; |
| 67 | } |
| 68 | for (; *p && *p != ']'; p++) { |
| 69 | if (*p=='-' && p[1] && p[1] != ']') |
| 70 | for (j=p++[-1]; j<*p; j++) |
| 71 | if (c==j) return 1; |
| 72 | if (c==*p) return 1; |
| 73 | } |
| 74 | return 0; |
| 75 | } |
| 76 | |
| 77 | #if 1 |
| 78 | #undef getwc |
| 79 | #define getwc(f) \ |
| 80 | ((f)->rpos < (f)->rend && *(f)->rpos < 128 ? *(f)->rpos++ : (getwc)(f)) |
| 81 | |
| 82 | #undef ungetwc |
| 83 | #define ungetwc(c,f) \ |
Rich Felker | 0072251 | 2012-04-17 19:37:31 -0400 | [diff] [blame] | 84 | ((f)->rend && (c)<128U ? *--(f)->rpos : ungetwc((c),(f))) |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 85 | #endif |
| 86 | |
Rich Felker | 400c5e5 | 2012-09-06 22:44:55 -0400 | [diff] [blame] | 87 | int vfwscanf(FILE *restrict f, const wchar_t *restrict fmt, va_list ap) |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 88 | { |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 89 | int width; |
| 90 | int size; |
| 91 | int alloc; |
| 92 | const wchar_t *p; |
| 93 | int c, t; |
| 94 | char *s; |
| 95 | wchar_t *wcs; |
| 96 | void *dest=NULL; |
| 97 | int invert; |
| 98 | int matches=0; |
| 99 | off_t pos = 0, cnt; |
| 100 | static const char size_pfx[][3] = { "hh", "h", "", "l", "L", "ll" }; |
| 101 | char tmp[3*sizeof(int)+10]; |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 102 | const wchar_t *set; |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 103 | size_t i, k; |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 104 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 105 | FLOCK(f); |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 106 | |
Rich Felker | 536c6d5 | 2015-06-13 05:17:16 +0000 | [diff] [blame] | 107 | fwide(f, 1); |
Rich Felker | 984c25b | 2014-07-02 12:09:48 -0400 | [diff] [blame] | 108 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 109 | for (p=fmt; *p; p++) { |
| 110 | |
Rich Felker | 1d92cdd | 2013-07-20 00:21:11 -0400 | [diff] [blame] | 111 | alloc = 0; |
| 112 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 113 | if (iswspace(*p)) { |
| 114 | while (iswspace(p[1])) p++; |
| 115 | while (iswspace((c=getwc(f)))) pos++; |
| 116 | ungetwc(c, f); |
| 117 | continue; |
| 118 | } |
| 119 | if (*p != '%' || p[1] == '%') { |
Bartosz Brachaczek | 9255dad | 2017-07-09 23:00:18 +0200 | [diff] [blame] | 120 | if (*p == '%') { |
| 121 | p++; |
| 122 | while (iswspace((c=getwc(f)))) pos++; |
| 123 | } else { |
| 124 | c = getwc(f); |
| 125 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 126 | if (c!=*p) { |
| 127 | ungetwc(c, f); |
| 128 | if (c<0) goto input_fail; |
| 129 | goto match_fail; |
| 130 | } |
| 131 | pos++; |
| 132 | continue; |
| 133 | } |
| 134 | |
| 135 | p++; |
| 136 | if (*p=='*') { |
| 137 | dest = 0; p++; |
| 138 | } else if (iswdigit(*p) && p[1]=='$') { |
| 139 | dest = arg_n(ap, *p-'0'); p+=2; |
| 140 | } else { |
| 141 | dest = va_arg(ap, void *); |
| 142 | } |
| 143 | |
| 144 | for (width=0; iswdigit(*p); p++) { |
| 145 | width = 10*width + *p - '0'; |
| 146 | } |
| 147 | |
| 148 | if (*p=='m') { |
Rich Felker | f0328a5 | 2013-08-31 22:52:41 -0400 | [diff] [blame] | 149 | wcs = 0; |
| 150 | s = 0; |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 151 | alloc = !!dest; |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 152 | p++; |
| 153 | } else { |
| 154 | alloc = 0; |
| 155 | } |
| 156 | |
| 157 | size = SIZE_def; |
| 158 | switch (*p++) { |
| 159 | case 'h': |
| 160 | if (*p == 'h') p++, size = SIZE_hh; |
| 161 | else size = SIZE_h; |
| 162 | break; |
| 163 | case 'l': |
| 164 | if (*p == 'l') p++, size = SIZE_ll; |
| 165 | else size = SIZE_l; |
| 166 | break; |
| 167 | case 'j': |
| 168 | size = SIZE_ll; |
| 169 | break; |
| 170 | case 'z': |
| 171 | case 't': |
| 172 | size = SIZE_l; |
| 173 | break; |
| 174 | case 'L': |
| 175 | size = SIZE_L; |
| 176 | break; |
| 177 | case 'd': case 'i': case 'o': case 'u': case 'x': |
| 178 | case 'a': case 'e': case 'f': case 'g': |
| 179 | case 'A': case 'E': case 'F': case 'G': case 'X': |
| 180 | case 's': case 'c': case '[': |
| 181 | case 'S': case 'C': |
| 182 | case 'p': case 'n': |
| 183 | p--; |
| 184 | break; |
| 185 | default: |
| 186 | goto fmt_fail; |
| 187 | } |
| 188 | |
| 189 | t = *p; |
| 190 | |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 191 | /* Transform S,C -> ls,lc */ |
| 192 | if ((t&0x2f)==3) { |
| 193 | size = SIZE_l; |
| 194 | t |= 32; |
| 195 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 196 | |
Rich Felker | bdeb184 | 2012-04-17 23:35:49 -0400 | [diff] [blame] | 197 | if (t != 'n') { |
| 198 | if (t != '[' && (t|32) != 'c') |
| 199 | while (iswspace((c=getwc(f)))) pos++; |
| 200 | else |
| 201 | c=getwc(f); |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 202 | if (c < 0) goto input_fail; |
| 203 | ungetwc(c, f); |
| 204 | } |
| 205 | |
| 206 | switch (t) { |
| 207 | case 'n': |
| 208 | store_int(dest, size, pos); |
| 209 | /* do not increment match count, etc! */ |
| 210 | continue; |
| 211 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 212 | case 's': |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 213 | case 'c': |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 214 | case '[': |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 215 | if (t == 'c') { |
| 216 | if (width<1) width = 1; |
| 217 | invert = 1; |
| 218 | set = L""; |
| 219 | } else if (t == 's') { |
| 220 | invert = 1; |
Rich Felker | 733d1ea | 2017-03-14 15:06:58 -0400 | [diff] [blame] | 221 | static const wchar_t spaces[] = { |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 222 | ' ', '\t', '\n', '\r', 11, 12, 0x0085, |
| 223 | 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, |
| 224 | 0x2006, 0x2008, 0x2009, 0x200a, |
| 225 | 0x2028, 0x2029, 0x205f, 0x3000, 0 }; |
Rich Felker | 733d1ea | 2017-03-14 15:06:58 -0400 | [diff] [blame] | 226 | set = spaces; |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 227 | } else { |
| 228 | if (*++p == '^') p++, invert = 1; |
| 229 | else invert = 0; |
| 230 | set = p; |
| 231 | if (*p==']') p++; |
| 232 | while (*p!=']') { |
| 233 | if (!*p) goto fmt_fail; |
| 234 | p++; |
| 235 | } |
| 236 | } |
| 237 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 238 | s = (size == SIZE_def) ? dest : 0; |
| 239 | wcs = (size == SIZE_l) ? dest : 0; |
| 240 | |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 241 | int gotmatch = 0; |
| 242 | |
Rich Felker | 9ab180f | 2012-04-17 22:15:33 -0400 | [diff] [blame] | 243 | if (width < 1) width = -1; |
| 244 | |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 245 | i = 0; |
| 246 | if (alloc) { |
| 247 | k = t=='c' ? width+1U : 31; |
| 248 | if (size == SIZE_l) { |
| 249 | wcs = malloc(k*sizeof(wchar_t)); |
| 250 | if (!wcs) goto alloc_fail; |
| 251 | } else { |
| 252 | s = malloc(k); |
| 253 | if (!s) goto alloc_fail; |
| 254 | } |
| 255 | } |
Rich Felker | 99fbf4c | 2012-04-17 21:17:09 -0400 | [diff] [blame] | 256 | while (width) { |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 257 | if ((c=getwc(f))<0) break; |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 258 | if (in_set(set, c) == invert) |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 259 | break; |
| 260 | if (wcs) { |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 261 | wcs[i++] = c; |
| 262 | if (alloc && i==k) { |
| 263 | k += k+1; |
| 264 | wchar_t *tmp = realloc(wcs, k*sizeof(wchar_t)); |
| 265 | if (!tmp) goto alloc_fail; |
| 266 | wcs = tmp; |
| 267 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 268 | } else if (size != SIZE_l) { |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 269 | int l = wctomb(s?s+i:tmp, c); |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 270 | if (l<0) goto input_fail; |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 271 | i += l; |
| 272 | if (alloc && i > k-4) { |
| 273 | k += k+1; |
| 274 | char *tmp = realloc(s, k); |
| 275 | if (!tmp) goto alloc_fail; |
| 276 | s = tmp; |
| 277 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 278 | } |
| 279 | pos++; |
Rich Felker | 9ab180f | 2012-04-17 22:15:33 -0400 | [diff] [blame] | 280 | width-=(width>0); |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 281 | gotmatch=1; |
| 282 | } |
Rich Felker | de80ea9 | 2013-06-05 16:53:26 -0400 | [diff] [blame] | 283 | if (width) { |
| 284 | ungetwc(c, f); |
| 285 | if (t == 'c' || !gotmatch) goto match_fail; |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 286 | } |
| 287 | |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 288 | if (alloc) { |
| 289 | if (size == SIZE_l) *(wchar_t **)dest = wcs; |
| 290 | else *(char **)dest = s; |
| 291 | } |
Rich Felker | ef55078 | 2013-06-22 17:23:45 -0400 | [diff] [blame] | 292 | if (t != 'c') { |
| 293 | if (wcs) wcs[i] = 0; |
| 294 | if (s) s[i] = 0; |
| 295 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 296 | break; |
| 297 | |
| 298 | case 'd': case 'i': case 'o': case 'u': case 'x': |
| 299 | case 'a': case 'e': case 'f': case 'g': |
| 300 | case 'A': case 'E': case 'F': case 'G': case 'X': |
| 301 | case 'p': |
| 302 | if (width < 1) width = 0; |
| 303 | snprintf(tmp, sizeof tmp, "%.*s%.0d%s%c%%lln", |
| 304 | 1+!dest, "%*", width, size_pfx[size+2], t); |
| 305 | cnt = 0; |
| 306 | if (fscanf(f, tmp, dest?dest:&cnt, &cnt) == -1) |
| 307 | goto input_fail; |
| 308 | else if (!cnt) |
| 309 | goto match_fail; |
| 310 | pos += cnt; |
| 311 | break; |
| 312 | default: |
| 313 | goto fmt_fail; |
| 314 | } |
| 315 | |
| 316 | if (dest) matches++; |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 317 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 318 | if (0) { |
| 319 | fmt_fail: |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 320 | alloc_fail: |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 321 | input_fail: |
| 322 | if (!matches) matches--; |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 323 | match_fail: |
Rich Felker | e039db2 | 2013-06-06 00:26:17 -0400 | [diff] [blame] | 324 | if (alloc) { |
| 325 | free(s); |
| 326 | free(wcs); |
| 327 | } |
| 328 | } |
Rich Felker | 73ec1d0 | 2012-04-17 14:19:46 -0400 | [diff] [blame] | 329 | FUNLOCK(f); |
| 330 | return matches; |
Rich Felker | 0b44a03 | 2011-02-12 00:22:29 -0500 | [diff] [blame] | 331 | } |
Isaac Dunham | 14f0272 | 2013-04-05 23:20:28 -0700 | [diff] [blame] | 332 | |
| 333 | weak_alias(vfwscanf,__isoc99_vfwscanf); |