blob: 18dfcb282407802f8fa9509051cea8df0b54b699 [file] [log] [blame]
Kristian Monsen5ab50182010-05-14 18:53:44 +01001/***************************************************************************
2 * _ _ ____ _
3 * Project ___| | | | _ \| |
4 * / __| | | | |_) | |
5 * | (__| |_| | _ <| |___
6 * \___|\___/|_| \_\_____|
7 *
Elliott Hughescac39802018-04-27 16:19:43 -07008 * Copyright (C) 1998 - 2018, Daniel Stenberg, <daniel@haxx.se>, et al.
Kristian Monsen5ab50182010-05-14 18:53:44 +01009 *
10 * This software is licensed as described in the file COPYING, which
11 * you should have received as part of this distribution. The terms
Alex Deymod15eaac2016-06-28 14:49:26 -070012 * are also available at https://curl.haxx.se/docs/copyright.html.
Kristian Monsen5ab50182010-05-14 18:53:44 +010013 *
14 * You may opt to use, copy, modify, merge, publish, distribute and/or sell
15 * copies of the Software, and permit persons to whom the Software is
16 * furnished to do so, under the terms of the COPYING file.
17 *
18 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
19 * KIND, either express or implied.
20 *
21 ***************************************************************************/
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -070022
23#include "curl_setup.h"
Kristian Monsen5ab50182010-05-14 18:53:44 +010024
25#ifndef CURL_DISABLE_HTTP
Kristian Monsen5ab50182010-05-14 18:53:44 +010026
27#include "urldata.h" /* it includes http_chunks.h */
28#include "sendf.h" /* for the client write stuff */
29
30#include "content_encoding.h"
31#include "http.h"
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -070032#include "non-ascii.h" /* for Curl_convert_to_network prototype */
33#include "strtoofft.h"
34#include "warnless.h"
35
36/* The last #include files should be: */
Kristian Monsen5ab50182010-05-14 18:53:44 +010037#include "curl_memory.h"
Kristian Monsen5ab50182010-05-14 18:53:44 +010038#include "memdebug.h"
39
40/*
41 * Chunk format (simplified):
42 *
43 * <HEX SIZE>[ chunk extension ] CRLF
44 * <DATA> CRLF
45 *
46 * Highlights from RFC2616 section 3.6 say:
47
48 The chunked encoding modifies the body of a message in order to
49 transfer it as a series of chunks, each with its own size indicator,
50 followed by an OPTIONAL trailer containing entity-header fields. This
51 allows dynamically produced content to be transferred along with the
52 information necessary for the recipient to verify that it has
53 received the full message.
54
55 Chunked-Body = *chunk
56 last-chunk
57 trailer
58 CRLF
59
60 chunk = chunk-size [ chunk-extension ] CRLF
61 chunk-data CRLF
62 chunk-size = 1*HEX
63 last-chunk = 1*("0") [ chunk-extension ] CRLF
64
65 chunk-extension= *( ";" chunk-ext-name [ "=" chunk-ext-val ] )
66 chunk-ext-name = token
67 chunk-ext-val = token | quoted-string
68 chunk-data = chunk-size(OCTET)
69 trailer = *(entity-header CRLF)
70
71 The chunk-size field is a string of hex digits indicating the size of
72 the chunk. The chunked encoding is ended by any chunk whose size is
73 zero, followed by the trailer, which is terminated by an empty line.
74
75 */
76
Elliott Hughes1ef06ba2018-05-30 15:43:58 -070077#ifdef CURL_DOES_CONVERSIONS
78/* Check for an ASCII hex digit.
79 We avoid the use of ISXDIGIT to accommodate non-ASCII hosts. */
80static bool Curl_isxdigit_ascii(char digit)
81{
82 return (digit >= 0x30 && digit <= 0x39) /* 0-9 */
83 || (digit >= 0x41 && digit <= 0x46) /* A-F */
84 || (digit >= 0x61 && digit <= 0x66); /* a-f */
85}
86#else
87#define Curl_isxdigit_ascii(x) Curl_isxdigit(x)
88#endif
89
Kristian Monsen5ab50182010-05-14 18:53:44 +010090void Curl_httpchunk_init(struct connectdata *conn)
91{
92 struct Curl_chunker *chunk = &conn->chunk;
Alex Deymo486467e2017-12-19 19:04:07 +010093 chunk->hexindex = 0; /* start at 0 */
94 chunk->dataleft = 0; /* no data left yet! */
Kristian Monsen5ab50182010-05-14 18:53:44 +010095 chunk->state = CHUNK_HEX; /* we get hex first! */
96}
97
98/*
99 * chunk_read() returns a OK for normal operations, or a positive return code
100 * for errors. STOP means this sequence of chunks is complete. The 'wrote'
101 * argument is set to tell the caller how many bytes we actually passed to the
102 * client (for byte-counting and whatever).
103 *
104 * The states and the state-machine is further explained in the header file.
105 *
106 * This function always uses ASCII hex values to accommodate non-ASCII hosts.
107 * For example, 0x0d and 0x0a are used instead of '\r' and '\n'.
108 */
109CHUNKcode Curl_httpchunk_read(struct connectdata *conn,
110 char *datap,
111 ssize_t datalen,
112 ssize_t *wrotep)
113{
Alex Deymo486467e2017-12-19 19:04:07 +0100114 CURLcode result = CURLE_OK;
Alex Deymoe3149cc2016-10-05 11:18:42 -0700115 struct Curl_easy *data = conn->data;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100116 struct Curl_chunker *ch = &conn->chunk;
117 struct SingleRequest *k = &data->req;
118 size_t piece;
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700119 curl_off_t length = (curl_off_t)datalen;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100120 size_t *wrote = (size_t *)wrotep;
121
122 *wrote = 0; /* nothing's written yet */
123
124 /* the original data is written to the client, but we go on with the
125 chunk read process, to properly calculate the content length*/
126 if(data->set.http_te_skip && !k->ignorebody) {
127 result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, datalen);
128 if(result)
129 return CHUNKE_WRITE_ERROR;
130 }
131
132 while(length) {
133 switch(ch->state) {
134 case CHUNK_HEX:
Elliott Hughes1ef06ba2018-05-30 15:43:58 -0700135 if(Curl_isxdigit_ascii(*datap)) {
Kristian Monsen5ab50182010-05-14 18:53:44 +0100136 if(ch->hexindex < MAXNUM_SIZE) {
137 ch->hexbuffer[ch->hexindex] = *datap;
138 datap++;
139 length--;
140 ch->hexindex++;
141 }
142 else {
143 return CHUNKE_TOO_LONG_HEX; /* longer hex than we support */
144 }
145 }
146 else {
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700147 char *endptr;
148 if(0 == ch->hexindex)
Kristian Monsen5ab50182010-05-14 18:53:44 +0100149 /* This is illegal data, we received junk where we expected
150 a hexadecimal digit. */
151 return CHUNKE_ILLEGAL_HEX;
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700152
Kristian Monsen5ab50182010-05-14 18:53:44 +0100153 /* length and datap are unmodified */
Alex Deymo486467e2017-12-19 19:04:07 +0100154 ch->hexbuffer[ch->hexindex] = 0;
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700155
Kristian Monsen5ab50182010-05-14 18:53:44 +0100156 /* convert to host encoding before calling strtoul */
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700157 result = Curl_convert_from_network(conn->data, ch->hexbuffer,
Kristian Monsen5ab50182010-05-14 18:53:44 +0100158 ch->hexindex);
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700159 if(result) {
Kristian Monsen5ab50182010-05-14 18:53:44 +0100160 /* Curl_convert_from_network calls failf if unsuccessful */
161 /* Treat it as a bad hex character */
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700162 return CHUNKE_ILLEGAL_HEX;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100163 }
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700164
Alex Deymo486467e2017-12-19 19:04:07 +0100165 if(curlx_strtoofft(ch->hexbuffer, &endptr, 16, &ch->datasize))
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700166 return CHUNKE_ILLEGAL_HEX;
167 ch->state = CHUNK_LF; /* now wait for the CRLF */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100168 }
169 break;
170
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700171 case CHUNK_LF:
172 /* waiting for the LF after a chunk size */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100173 if(*datap == 0x0a) {
174 /* we're now expecting data to come, unless size was zero! */
175 if(0 == ch->datasize) {
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700176 ch->state = CHUNK_TRAILER; /* now check for trailers */
Alex Deymo486467e2017-12-19 19:04:07 +0100177 conn->trlPos = 0;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100178 }
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700179 else
Kristian Monsen5ab50182010-05-14 18:53:44 +0100180 ch->state = CHUNK_DATA;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100181 }
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700182
Kristian Monsen5ab50182010-05-14 18:53:44 +0100183 datap++;
184 length--;
185 break;
186
187 case CHUNK_DATA:
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700188 /* We expect 'datasize' of data. We have 'length' right now, it can be
189 more or less than 'datasize'. Get the smallest piece.
Kristian Monsen5ab50182010-05-14 18:53:44 +0100190 */
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700191 piece = curlx_sotouz((ch->datasize >= length)?length:ch->datasize);
Kristian Monsen5ab50182010-05-14 18:53:44 +0100192
193 /* Write the data portion available */
Elliott Hughescac39802018-04-27 16:19:43 -0700194 if(!conn->data->set.http_te_skip && !k->ignorebody) {
195 if(!conn->data->set.http_ce_skip && k->writer_stack)
196 result = Curl_unencode_write(conn, k->writer_stack, datap, piece);
197 else
Alex Deymo486467e2017-12-19 19:04:07 +0100198 result = Curl_client_write(conn, CLIENTWRITE_BODY, datap, piece);
Kristian Monsen5ab50182010-05-14 18:53:44 +0100199
Elliott Hughescac39802018-04-27 16:19:43 -0700200 if(result)
201 return CHUNKE_WRITE_ERROR;
202 }
Kristian Monsen5ab50182010-05-14 18:53:44 +0100203
204 *wrote += piece;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100205 ch->datasize -= piece; /* decrease amount left to expect */
206 datap += piece; /* move read pointer forward */
207 length -= piece; /* decrease space left in this round */
208
209 if(0 == ch->datasize)
210 /* end of data this round, we now expect a trailing CRLF */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100211 ch->state = CHUNK_POSTLF;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100212 break;
213
214 case CHUNK_POSTLF:
215 if(*datap == 0x0a) {
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700216 /* The last one before we go back to hex state and start all over. */
217 Curl_httpchunk_init(conn); /* sets state back to CHUNK_HEX */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100218 }
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700219 else if(*datap != 0x0d)
Kristian Monsen5ab50182010-05-14 18:53:44 +0100220 return CHUNKE_BAD_CHUNK;
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700221 datap++;
222 length--;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100223 break;
224
225 case CHUNK_TRAILER:
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700226 if((*datap == 0x0d) || (*datap == 0x0a)) {
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700227 /* this is the end of a trailer, but if the trailer was zero bytes
228 there was no trailer and we move on */
229
230 if(conn->trlPos) {
231 /* we allocate trailer with 3 bytes extra room to fit this */
Alex Deymo486467e2017-12-19 19:04:07 +0100232 conn->trailer[conn->trlPos++] = 0x0d;
233 conn->trailer[conn->trlPos++] = 0x0a;
234 conn->trailer[conn->trlPos] = 0;
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700235
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700236 /* Convert to host encoding before calling Curl_client_write */
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700237 result = Curl_convert_from_network(conn->data, conn->trailer,
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700238 conn->trlPos);
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700239 if(result)
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700240 /* Curl_convert_from_network calls failf if unsuccessful */
241 /* Treat it as a bad chunk */
242 return CHUNKE_BAD_CHUNK;
243
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700244 if(!data->set.http_te_skip) {
245 result = Curl_client_write(conn, CLIENTWRITE_HEADER,
246 conn->trailer, conn->trlPos);
247 if(result)
248 return CHUNKE_WRITE_ERROR;
249 }
Alex Deymo486467e2017-12-19 19:04:07 +0100250 conn->trlPos = 0;
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700251 ch->state = CHUNK_TRAILER_CR;
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700252 if(*datap == 0x0a)
253 /* already on the LF */
254 break;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100255 }
256 else {
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700257 /* no trailer, we're on the final CRLF pair */
258 ch->state = CHUNK_TRAILER_POSTCR;
259 break; /* don't advance the pointer */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100260 }
Kristian Monsen5ab50182010-05-14 18:53:44 +0100261 }
Kristian Monsen5ab50182010-05-14 18:53:44 +0100262 else {
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700263 /* conn->trailer is assumed to be freed in url.c on a
264 connection basis */
265 if(conn->trlPos >= conn->trlMax) {
266 /* we always allocate three extra bytes, just because when the full
267 header has been received we append CRLF\0 */
268 char *ptr;
269 if(conn->trlMax) {
270 conn->trlMax *= 2;
271 ptr = realloc(conn->trailer, conn->trlMax + 3);
272 }
273 else {
Alex Deymo486467e2017-12-19 19:04:07 +0100274 conn->trlMax = 128;
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700275 ptr = malloc(conn->trlMax + 3);
276 }
277 if(!ptr)
278 return CHUNKE_OUT_OF_MEMORY;
279 conn->trailer = ptr;
280 }
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700281 conn->trailer[conn->trlPos++]=*datap;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100282 }
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700283 datap++;
284 length--;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100285 break;
286
287 case CHUNK_TRAILER_CR:
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700288 if(*datap == 0x0a) {
Kristian Monsen5ab50182010-05-14 18:53:44 +0100289 ch->state = CHUNK_TRAILER_POSTCR;
290 datap++;
291 length--;
292 }
293 else
294 return CHUNKE_BAD_CHUNK;
295 break;
296
297 case CHUNK_TRAILER_POSTCR:
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700298 /* We enter this state when a CR should arrive so we expect to
299 have to first pass a CR before we wait for LF */
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700300 if((*datap != 0x0d) && (*datap != 0x0a)) {
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700301 /* not a CR then it must be another header in the trailer */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100302 ch->state = CHUNK_TRAILER;
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700303 break;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100304 }
Kristian Monsen5ab50182010-05-14 18:53:44 +0100305 if(*datap == 0x0d) {
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700306 /* skip if CR */
Kristian Monsen5ab50182010-05-14 18:53:44 +0100307 datap++;
308 length--;
309 }
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700310 /* now wait for the final LF */
311 ch->state = CHUNK_STOP;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100312 break;
313
314 case CHUNK_STOP:
315 if(*datap == 0x0a) {
316 length--;
317
318 /* Record the length of any data left in the end of the buffer
319 even if there's no more chunks to read */
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700320 ch->dataleft = curlx_sotouz(length);
Kristian Monsen5ab50182010-05-14 18:53:44 +0100321
Kristian Monsen5ab50182010-05-14 18:53:44 +0100322 return CHUNKE_STOP; /* return stop */
323 }
Lucas Eckels9bd90e62012-08-06 15:07:02 -0700324 else
Kristian Monsen5ab50182010-05-14 18:53:44 +0100325 return CHUNKE_BAD_CHUNK;
Kristian Monsen5ab50182010-05-14 18:53:44 +0100326 }
327 }
328 return CHUNKE_OK;
329}
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700330
331const char *Curl_chunked_strerror(CHUNKcode code)
332{
Elliott Hughes82be86d2017-09-20 17:00:17 -0700333 switch(code) {
Bertrand SIMONNETe6cd7382015-07-01 15:39:44 -0700334 default:
335 return "OK";
336 case CHUNKE_TOO_LONG_HEX:
337 return "Too long hexadecimal number";
338 case CHUNKE_ILLEGAL_HEX:
339 return "Illegal or missing hexadecimal sequence";
340 case CHUNKE_BAD_CHUNK:
341 return "Malformed encoding found";
342 case CHUNKE_WRITE_ERROR:
343 return "Write error";
344 case CHUNKE_BAD_ENCODING:
345 return "Bad content-encoding found";
346 case CHUNKE_OUT_OF_MEMORY:
347 return "Out of memory";
348 }
349}
350
Kristian Monsen5ab50182010-05-14 18:53:44 +0100351#endif /* CURL_DISABLE_HTTP */