blob: 8e636bd398a3987d6fa138e91bc145b9c54b6d7c [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Eric Andersen79757c92001-04-05 21:45:54 +000013struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000014 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000016 const char *path;
17 const char *user;
18 char *host;
19 int port;
20 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000021};
22
Denis Vlasenko77105632007-09-24 15:04:00 +000023
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020024/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000025struct globals {
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000029 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010031 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000032#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020033#if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
35#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020036 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010038 /* Local downloads do benefit from big buffer.
39 * With 512 byte buffer, it was measured to be
40 * an order of magnitude slower than with big one.
41 */
42 uint64_t just_to_align_next_member;
43 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010044} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010045#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020046#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010047 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020048 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
49} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000050
51
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052/* Must match option string! */
53enum {
54 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020055 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056 WGET_OPT_QUIET = (1 << 2),
57 WGET_OPT_OUTNAME = (1 << 3),
58 WGET_OPT_PREFIX = (1 << 4),
59 WGET_OPT_PROXY = (1 << 5),
60 WGET_OPT_USER_AGENT = (1 << 6),
61 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
62 WGET_OPT_RETRIES = (1 << 8),
63 WGET_OPT_PASSIVE = (1 << 9),
64 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
65 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
66};
67
68enum {
69 PROGRESS_START = -1,
70 PROGRESS_END = 0,
71 PROGRESS_BUMP = 1,
72};
Denis Vlasenko9cade082006-11-21 10:43:02 +000073#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000074static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000075{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020076 if (option_mask32 & WGET_OPT_QUIET)
77 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000078
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020079 if (flag == PROGRESS_START)
Magnus Dammf5914992009-11-08 16:34:43 +010080 bb_progress_init(&G.pmt);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000081
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010082 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020083 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000084
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020085 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020086 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010087 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088 }
89}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000091static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000092#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000093
Denis Vlasenko47ddd012007-09-24 18:24:17 +000094
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020095/* IPv6 knows scoped address types i.e. link and site local addresses. Link
96 * local addresses can have a scope identifier to specify the
97 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
98 * identifier is only valid on a single node.
99 *
100 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
101 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
102 * in the Host header as invalid requests, see
103 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
104 */
105static void strip_ipv6_scope_id(char *host)
106{
107 char *scope, *cp;
108
109 /* bbox wget actually handles IPv6 addresses without [], like
110 * wget "http://::1/xxx", but this is not standard.
111 * To save code, _here_ we do not support it. */
112
113 if (host[0] != '[')
114 return; /* not IPv6 */
115
116 scope = strchr(host, '%');
117 if (!scope)
118 return;
119
120 /* Remove the IPv6 zone identifier from the host address */
121 cp = strchr(host, ']');
122 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
123 /* malformed address (not "[xx]:nn" or "[xx]") */
124 return;
125 }
126
127 /* cp points to "]...", scope points to "%eth0]..." */
128 overlapping_strcpy(scope, cp);
129}
130
Denis Vlasenko12d21292007-06-27 21:40:07 +0000131/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
132 * and a short count if an eof or non-interrupt error is encountered. */
133static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000134{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000135 size_t ret;
136 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000137
138 do {
139 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000140 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000141 ret = fread(p, 1, nmemb, stream);
142 p += ret;
143 nmemb -= ret;
144 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000145
Denis Vlasenko12d21292007-06-27 21:40:07 +0000146 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000147}
148
Denis Vlasenko12d21292007-06-27 21:40:07 +0000149/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000150 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
151static char *safe_fgets(char *s, int size, FILE *stream)
152{
153 char *ret;
154
155 do {
156 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000157 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000158 ret = fgets(s, size, stream);
159 } while (ret == NULL && ferror(stream) && errno == EINTR);
160
161 return ret;
162}
163
Denis Vlasenko9cade082006-11-21 10:43:02 +0000164#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100165/* Base64-encode character string. */
166static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000167{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000168 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100169 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
170 len = sizeof(G.wget_buf)/4*3 - 10;
171 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
172 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000173}
174#endif
175
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200176static char* sanitize_string(char *s)
177{
178 unsigned char *p = (void *) s;
179 while (*p >= ' ')
180 p++;
181 *p = '\0';
182 return s;
183}
184
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000185static FILE *open_socket(len_and_sockaddr *lsa)
186{
187 FILE *fp;
188
189 /* glibc 2.4 seems to try seeking on it - ??! */
190 /* hopefully it understands what ESPIPE means... */
191 fp = fdopen(xconnect_stream(lsa), "r+");
192 if (fp == NULL)
193 bb_perror_msg_and_die("fdopen");
194
195 return fp;
196}
197
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100198static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000199{
200 int result;
201 if (s1) {
202 if (!s2) s2 = "";
203 fprintf(fp, "%s%s\r\n", s1, s2);
204 fflush(fp);
205 }
206
207 do {
208 char *buf_ptr;
209
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100210 if (fgets(G.wget_buf, sizeof(G.wget_buf)-2, fp) == NULL) {
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000211 bb_perror_msg_and_die("error getting response");
212 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100213 buf_ptr = strstr(G.wget_buf, "\r\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000214 if (buf_ptr) {
215 *buf_ptr = '\0';
216 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100217 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000218
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100219 G.wget_buf[3] = '\0';
220 result = xatoi_positive(G.wget_buf);
221 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000222 return result;
223}
224
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000225static void parse_url(char *src_url, struct host_info *h)
226{
227 char *url, *p, *sp;
228
229 /* h->allocated = */ url = xstrdup(src_url);
230
231 if (strncmp(url, "http://", 7) == 0) {
232 h->port = bb_lookup_port("http", "tcp", 80);
233 h->host = url + 7;
234 h->is_ftp = 0;
235 } else if (strncmp(url, "ftp://", 6) == 0) {
236 h->port = bb_lookup_port("ftp", "tcp", 21);
237 h->host = url + 6;
238 h->is_ftp = 1;
239 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200240 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000241
242 // FYI:
243 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
244 // 'GET /?var=a/b HTTP 1.0'
245 // and saves 'index.html?var=a%2Fb' (we save 'b')
246 // wget 'http://busybox.net?login=john@doe':
247 // request: 'GET /?login=john@doe HTTP/1.0'
248 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
249 // wget 'http://busybox.net#test/test':
250 // request: 'GET / HTTP/1.0'
251 // saves: 'index.html' (we save 'test')
252 //
253 // We also don't add unique .N suffix if file exists...
254 sp = strchr(h->host, '/');
255 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
256 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
257 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000258 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000259 } else if (*sp == '/') {
260 *sp = '\0';
261 h->path = sp + 1;
262 } else { // '#' or '?'
263 // http://busybox.net?login=john@doe is a valid URL
264 // memmove converts to:
265 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000266 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000267 h->host--;
268 sp[-1] = '\0';
269 h->path = sp;
270 }
271
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200272 // We used to set h->user to NULL here, but this interferes
273 // with handling of code 302 ("object was moved")
274
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000275 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000276 if (sp != NULL) {
277 h->user = h->host;
278 *sp = '\0';
279 h->host = sp + 1;
280 }
281
282 sp = h->host;
283}
284
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100285static char *gethdr(FILE *fp /*, int *istrunc*/)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000286{
287 char *s, *hdrval;
288 int c;
289
290 /* *istrunc = 0; */
291
292 /* retrieve header line */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100293 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000294 return NULL;
295
296 /* see if we are at the end of the headers */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100297 for (s = G.wget_buf; *s == '\r'; ++s)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000298 continue;
299 if (*s == '\n')
300 return NULL;
301
302 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100303 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200304 /* tolower for "A-Z", no-op for "0-9a-z-." */
305 *s = (*s | 0x20);
306 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000307
308 /* verify we are at the end of the header name */
309 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100310 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000311
312 /* locate the start of the header value */
313 *s++ = '\0';
314 hdrval = skip_whitespace(s);
315
316 /* locate the end of header */
317 while (*s && *s != '\r' && *s != '\n')
318 ++s;
319
320 /* end of header found */
321 if (*s) {
322 *s = '\0';
323 return hdrval;
324 }
325
Denys Vlasenko7f432802009-06-28 01:02:24 +0200326 /* Rats! The buffer isn't big enough to hold the entire header value */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000327 while (c = getc(fp), c != EOF && c != '\n')
328 continue;
329 /* *istrunc = 1; */
330 return hdrval;
331}
332
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000333#if ENABLE_FEATURE_WGET_LONG_OPTIONS
334static char *URL_escape(const char *str)
335{
336 /* URL encode, see RFC 2396 */
337 char *dst;
338 char *res = dst = xmalloc(strlen(str) * 3 + 1);
339 unsigned char c;
340
341 while (1) {
342 c = *str++;
343 if (c == '\0'
344 /* || strchr("!&'()*-.=_~", c) - more code */
345 || c == '!'
346 || c == '&'
347 || c == '\''
348 || c == '('
349 || c == ')'
350 || c == '*'
351 || c == '-'
352 || c == '.'
353 || c == '='
354 || c == '_'
355 || c == '~'
356 || (c >= '0' && c <= '9')
357 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
358 ) {
359 *dst++ = c;
360 if (c == '\0')
361 return res;
362 } else {
363 *dst++ = '%';
364 *dst++ = bb_hexdigits_upcase[c >> 4];
365 *dst++ = bb_hexdigits_upcase[c & 0xf];
366 }
367 }
368}
369#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000370
Denys Vlasenko7f432802009-06-28 01:02:24 +0200371static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
372{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200373 FILE *sfp;
374 char *str;
375 int port;
376
377 if (!target->user)
378 target->user = xstrdup("anonymous:busybox@");
379
380 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100381 if (ftpcmd(NULL, NULL, sfp) != 220)
382 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200383
384 /*
385 * Splitting username:password pair,
386 * trying to log in
387 */
388 str = strchr(target->user, ':');
389 if (str)
390 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100391 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200392 case 230:
393 break;
394 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100395 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200396 break;
397 /* fall through (failed login) */
398 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100399 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200400 }
401
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100402 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200403
404 /*
405 * Querying file size
406 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100407 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
408 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100409 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200410 bb_error_msg_and_die("SIZE value is garbage");
411 }
412 G.got_clen = 1;
413 }
414
415 /*
416 * Entering passive mode
417 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100418 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200419 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100420 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200421 }
422 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
423 // Server's IP is N1.N2.N3.N4 (we ignore it)
424 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100425 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200426 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 if (!str) goto pasv_error;
429 port = xatou_range(str+1, 0, 255);
430 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100431 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200432 if (!str) goto pasv_error;
433 port += xatou_range(str+1, 0, 255) * 256;
434 set_nport(lsa, htons(port));
435
436 *dfpp = open_socket(lsa);
437
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100438 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100439 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
440 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100441 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200442 }
443
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100444 if (ftpcmd("RETR ", target->path, sfp) > 150)
445 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200446
447 return sfp;
448}
449
Denys Vlasenko7f432802009-06-28 01:02:24 +0200450static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
451{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200452#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
453# if ENABLE_FEATURE_WGET_TIMEOUT
454 unsigned second_cnt;
455# endif
456 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200457
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200458 polldata.fd = fileno(dfp);
459 polldata.events = POLLIN | POLLPRI;
Denys Vlasenkoda0df472010-08-08 04:21:50 +0200460 ndelay_on(polldata.fd);
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200461#endif
462 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200463
464 if (G.chunked)
465 goto get_clen;
466
467 /* Loops only if chunked */
468 while (1) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100469 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200470 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100471 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100473 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100474 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100475 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100476 if ((int)G.content_len <= 0)
477 break;
478 rdsz = (unsigned)G.content_len;
479 }
480 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200481#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
482# if ENABLE_FEATURE_WGET_TIMEOUT
483 second_cnt = G.timeout_seconds;
484# endif
485 while (1) {
486 if (safe_poll(&polldata, 1, 1000) != 0)
487 break; /* error, EOF, or data is available */
488# if ENABLE_FEATURE_WGET_TIMEOUT
489 if (second_cnt != 0 && --second_cnt == 0) {
490 progress_meter(PROGRESS_END);
491 bb_perror_msg_and_die("download timed out");
492 }
493# endif
494 /* Needed for "stalled" indicator */
495 progress_meter(PROGRESS_BUMP);
496 }
497#endif
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100498 n = safe_fread(G.wget_buf, rdsz, dfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200499 if (n <= 0) {
500 if (ferror(dfp)) {
501 /* perror will not work: ferror doesn't set errno */
502 bb_error_msg_and_die(bb_msg_read_error);
503 }
504 break;
505 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100506 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200507#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100508 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200509 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200510#endif
511 if (G.got_clen)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100512 G.content_len -= n;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200513 }
514
515 if (!G.chunked)
516 break;
517
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100518 safe_fgets(G.wget_buf, sizeof(G.wget_buf), dfp); /* This is a newline */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200519 get_clen:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100520 safe_fgets(G.wget_buf, sizeof(G.wget_buf), dfp);
521 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200522 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100523 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200524 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100525 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200526 }
527
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200528 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200529}
530
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000531int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000532int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000533{
Eric Andersen79757c92001-04-05 21:45:54 +0000534 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000535 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000536 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200537 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200538 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000539 char *dir_prefix = NULL;
540#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000541 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000542 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000543 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000544#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200545 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000546 FILE *dfp; /* socket to ftp server (data) */
547 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000548 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200549 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000550 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000551 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000552
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000553 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000554 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000555 enum {
556 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
557 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000558#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000559 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000560 /* name, has_arg, val */
561 "continue\0" No_argument "c"
562 "spider\0" No_argument "s"
563 "quiet\0" No_argument "q"
564 "output-document\0" Required_argument "O"
565 "directory-prefix\0" Required_argument "P"
566 "proxy\0" Required_argument "Y"
567 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200568#if ENABLE_FEATURE_WGET_TIMEOUT
569 "timeout\0" Required_argument "T"
570#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000571 /* Ignored: */
572 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000573 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000574 "passive-ftp\0" No_argument "\xff"
575 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000576 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100577 /* Ignored (we don't do ssl) */
578 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000579 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000580#endif
581
582 INIT_G();
583
584#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000585 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000586#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000587 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200588 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
589 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000590 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000591 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200592 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
593 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000594 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
595 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000596 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000597#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000598 if (headers_llist) {
599 int size = 1;
600 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000601 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000602 while (ll) {
603 size += strlen(ll->data) + 2;
604 ll = ll->link;
605 }
606 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000607 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000608 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000609 }
610 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000611#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000612
Denys Vlasenko7f432802009-06-28 01:02:24 +0200613 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200614
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200615 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000616 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000617
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000618 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200619 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000620 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000621 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200622 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100623 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000624 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000625 } else {
626 use_proxy = 0;
627 }
Robert Griebld7760112002-05-14 23:36:45 +0000628 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200629 if (!use_proxy) {
630 server.port = target.port;
631 if (ENABLE_FEATURE_IPV6) {
632 server.host = xstrdup(target.host);
633 } else {
634 server.host = target.host;
635 }
636 }
637
638 if (ENABLE_FEATURE_IPV6)
639 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000640
Denis Vlasenko818322b2007-09-24 18:27:04 +0000641 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000642 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000643 fname_out = bb_get_last_path_component_nostrip(target.path);
644 /* handle "wget http://kernel.org//" */
645 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000646 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000647 /* -P DIR is considered only if there was no -O FILE */
648 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000649 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000650 } else {
651 if (LONE_DASH(fname_out)) {
652 /* -O - */
653 output_fd = 1;
654 opt &= ~WGET_OPT_CONTINUE;
655 }
Eric Andersen29edd002000-12-09 16:55:35 +0000656 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000657#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100658 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000659#endif
660
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000661 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000662 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100663 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200664 */
Eric Andersen29edd002000-12-09 16:55:35 +0000665
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000666 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000667 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000668 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000669 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100670 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000671 }
672 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200673 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000674 }
675
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200676 redir_limit = 5;
677 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000678 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000679 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200680 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
681 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
682 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000683 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200684 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000685 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000686 /*
687 * HTTP session
688 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200689 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200690 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200691
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200692 /* Open socket to http server */
693 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200694
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200695 /* Send HTTP request */
696 if (use_proxy) {
697 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
698 target.is_ftp ? "f" : "ht", target.host,
699 target.path);
700 } else {
701 if (opt & WGET_OPT_POST_DATA)
702 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
703 else
704 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
705 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000706
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200707 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
708 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000709
Denis Vlasenko9cade082006-11-21 10:43:02 +0000710#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200711 if (target.user) {
712 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100713 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200714 }
715 if (use_proxy && server.user) {
716 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100717 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200718 }
Eric Andersen79757c92001-04-05 21:45:54 +0000719#endif
720
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100721 if (G.beg_range)
722 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000723#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200724 if (extra_headers)
725 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000726
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200727 if (opt & WGET_OPT_POST_DATA) {
728 char *estr = URL_escape(post_data);
729 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
730 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
731 (int) strlen(estr), estr);
732 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
733 /*fprintf(sfp, "%s\r\n", estr);*/
734 free(estr);
735 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000736#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200737 { /* If "Connection:" is needed, document why */
738 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
739 }
Eric Andersen79757c92001-04-05 21:45:54 +0000740
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200741 fflush(sfp);
742
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200743 /*
744 * Retrieve HTTP response line and check for "200" status code.
745 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000746 read_response:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100747 if (fgets(G.wget_buf, sizeof(G.wget_buf), sfp) == NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200748 bb_error_msg_and_die("no response from server");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000749
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100750 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200751 str = skip_non_whitespace(str);
752 str = skip_whitespace(str);
753 // FIXME: no error check
754 // xatou wouldn't work: "200 OK"
755 status = atoi(str);
756 switch (status) {
757 case 0:
758 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100759 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200760 /* eat all remaining headers */;
761 goto read_response;
762 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000763/*
764Response 204 doesn't say "null file", it says "metadata
765has changed but data didn't":
766
767"10.2.5 204 No Content
768The server has fulfilled the request but does not need to return
769an entity-body, and might want to return updated metainformation.
770The response MAY include new or updated metainformation in the form
771of entity-headers, which if present SHOULD be associated with
772the requested variant.
773
774If the client is a user agent, it SHOULD NOT change its document
775view from that which caused the request to be sent. This response
776is primarily intended to allow input for actions to take place
777without causing a change to the user agent's active document view,
778although any new or updated metainformation SHOULD be applied
779to the document currently in the user agent's active view.
780
781The 204 response MUST NOT include a message-body, and thus
782is always terminated by the first empty line after the header fields."
783
784However, in real world it was observed that some web servers
785(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
786*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200787 case 204:
788 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200789 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200790 case 301:
791 case 302:
792 case 303:
793 break;
794 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100795 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000796 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200797 /* fall through */
798 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100799 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200800 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000801
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200802 /*
803 * Retrieve HTTP headers.
804 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100805 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200806 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200807 smalluint key;
808 /* strip trailing whitespace */
809 char *s = strchrnul(str, '\0') - 1;
810 while (s >= str && (*s == ' ' || *s == '\t')) {
811 *s = '\0';
812 s--;
813 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100814 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200815 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100816 G.content_len = BB_STRTOOFF(str, NULL, 10);
817 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200818 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000819 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200820 G.got_clen = 1;
821 continue;
822 }
823 if (key == KEY_transfer_encoding) {
824 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
825 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
826 G.chunked = G.got_clen = 1;
827 }
828 if (key == KEY_location && status >= 300) {
829 if (--redir_limit == 0)
830 bb_error_msg_and_die("too many redirections");
831 fclose(sfp);
832 G.got_clen = 0;
833 G.chunked = 0;
834 if (str[0] == '/')
835 /* free(target.allocated); */
836 target.path = /* target.allocated = */ xstrdup(str+1);
837 /* lsa stays the same: it's on the same server */
838 else {
839 parse_url(str, &target);
840 if (!use_proxy) {
841 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200842 /* strip_ipv6_scope_id(target.host); - no! */
843 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200844 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000845 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200846 goto resolve_lsa;
847 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000848 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200849 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000850 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200851 }
852// if (status >= 300)
853// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000854
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200855 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000856 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000857
858 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000859 /*
860 * FTP session
861 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200862 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000863 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000864
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000865 if (opt & WGET_OPT_SPIDER) {
866 if (ENABLE_FEATURE_CLEAN_UP)
867 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000868 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000869 }
Eric Andersen79757c92001-04-05 21:45:54 +0000870
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000871 if (output_fd < 0) {
872 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
873 /* compat with wget: -O FILE can overwrite */
874 if (opt & WGET_OPT_OUTNAME)
875 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
876 output_fd = xopen(fname_out, o_flags);
877 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000878
Denys Vlasenko7f432802009-06-28 01:02:24 +0200879 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100880 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000881
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200882 if (dfp != sfp) {
883 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000884 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100885 if (ftpcmd(NULL, NULL, sfp) != 226)
886 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
887 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000888 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000889
890 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000891}