blob: 74d90040f6a8c23d970d5bcb1e4ceb40ffb150c1 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Eric Andersen79757c92001-04-05 21:45:54 +000013struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000014 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000016 const char *path;
17 const char *user;
18 char *host;
19 int port;
20 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000021};
22
Denis Vlasenko77105632007-09-24 15:04:00 +000023
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020024/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000025struct globals {
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000029 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010031 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000032#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020033#if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
35#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020036 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010038 /* Local downloads do benefit from big buffer.
39 * With 512 byte buffer, it was measured to be
40 * an order of magnitude slower than with big one.
41 */
42 uint64_t just_to_align_next_member;
43 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010044} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010045#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020046#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010047 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020048 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
49} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000050
51
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052/* Must match option string! */
53enum {
54 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020055 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056 WGET_OPT_QUIET = (1 << 2),
57 WGET_OPT_OUTNAME = (1 << 3),
58 WGET_OPT_PREFIX = (1 << 4),
59 WGET_OPT_PROXY = (1 << 5),
60 WGET_OPT_USER_AGENT = (1 << 6),
61 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
62 WGET_OPT_RETRIES = (1 << 8),
63 WGET_OPT_PASSIVE = (1 << 9),
64 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
65 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
66};
67
68enum {
69 PROGRESS_START = -1,
70 PROGRESS_END = 0,
71 PROGRESS_BUMP = 1,
72};
Denis Vlasenko9cade082006-11-21 10:43:02 +000073#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000074static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000075{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020076 if (option_mask32 & WGET_OPT_QUIET)
77 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000078
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020079 if (flag == PROGRESS_START)
Magnus Dammf5914992009-11-08 16:34:43 +010080 bb_progress_init(&G.pmt);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000081
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010082 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020083 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000084
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020085 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020086 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010087 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088 }
89}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000091static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000092#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000093
Denis Vlasenko47ddd012007-09-24 18:24:17 +000094
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020095/* IPv6 knows scoped address types i.e. link and site local addresses. Link
96 * local addresses can have a scope identifier to specify the
97 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
98 * identifier is only valid on a single node.
99 *
100 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
101 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
102 * in the Host header as invalid requests, see
103 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
104 */
105static void strip_ipv6_scope_id(char *host)
106{
107 char *scope, *cp;
108
109 /* bbox wget actually handles IPv6 addresses without [], like
110 * wget "http://::1/xxx", but this is not standard.
111 * To save code, _here_ we do not support it. */
112
113 if (host[0] != '[')
114 return; /* not IPv6 */
115
116 scope = strchr(host, '%');
117 if (!scope)
118 return;
119
120 /* Remove the IPv6 zone identifier from the host address */
121 cp = strchr(host, ']');
122 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
123 /* malformed address (not "[xx]:nn" or "[xx]") */
124 return;
125 }
126
127 /* cp points to "]...", scope points to "%eth0]..." */
128 overlapping_strcpy(scope, cp);
129}
130
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100131#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000132/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
133 * and a short count if an eof or non-interrupt error is encountered. */
134static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000135{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000136 size_t ret;
137 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000138
139 do {
140 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000141 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000142 ret = fread(p, 1, nmemb, stream);
143 p += ret;
144 nmemb -= ret;
145 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000146
Denis Vlasenko12d21292007-06-27 21:40:07 +0000147 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000148}
149
Denis Vlasenko12d21292007-06-27 21:40:07 +0000150/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000151 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
152static char *safe_fgets(char *s, int size, FILE *stream)
153{
154 char *ret;
155
156 do {
157 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000158 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000159 ret = fgets(s, size, stream);
160 } while (ret == NULL && ferror(stream) && errno == EINTR);
161
162 return ret;
163}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100164#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000165
Denis Vlasenko9cade082006-11-21 10:43:02 +0000166#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100167/* Base64-encode character string. */
168static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000169{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000170 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100171 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
172 len = sizeof(G.wget_buf)/4*3 - 10;
173 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
174 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000175}
176#endif
177
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200178static char* sanitize_string(char *s)
179{
180 unsigned char *p = (void *) s;
181 while (*p >= ' ')
182 p++;
183 *p = '\0';
184 return s;
185}
186
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000187static FILE *open_socket(len_and_sockaddr *lsa)
188{
189 FILE *fp;
190
191 /* glibc 2.4 seems to try seeking on it - ??! */
192 /* hopefully it understands what ESPIPE means... */
193 fp = fdopen(xconnect_stream(lsa), "r+");
194 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100195 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000196
197 return fp;
198}
199
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100200static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000201{
202 int result;
203 if (s1) {
204 if (!s2) s2 = "";
205 fprintf(fp, "%s%s\r\n", s1, s2);
206 fflush(fp);
207 }
208
209 do {
210 char *buf_ptr;
211
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100212 if (fgets(G.wget_buf, sizeof(G.wget_buf)-2, fp) == NULL) {
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000213 bb_perror_msg_and_die("error getting response");
214 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100215 buf_ptr = strstr(G.wget_buf, "\r\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000216 if (buf_ptr) {
217 *buf_ptr = '\0';
218 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100219 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000220
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100221 G.wget_buf[3] = '\0';
222 result = xatoi_positive(G.wget_buf);
223 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000224 return result;
225}
226
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000227static void parse_url(char *src_url, struct host_info *h)
228{
229 char *url, *p, *sp;
230
231 /* h->allocated = */ url = xstrdup(src_url);
232
233 if (strncmp(url, "http://", 7) == 0) {
234 h->port = bb_lookup_port("http", "tcp", 80);
235 h->host = url + 7;
236 h->is_ftp = 0;
237 } else if (strncmp(url, "ftp://", 6) == 0) {
238 h->port = bb_lookup_port("ftp", "tcp", 21);
239 h->host = url + 6;
240 h->is_ftp = 1;
241 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200242 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000243
244 // FYI:
245 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
246 // 'GET /?var=a/b HTTP 1.0'
247 // and saves 'index.html?var=a%2Fb' (we save 'b')
248 // wget 'http://busybox.net?login=john@doe':
249 // request: 'GET /?login=john@doe HTTP/1.0'
250 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
251 // wget 'http://busybox.net#test/test':
252 // request: 'GET / HTTP/1.0'
253 // saves: 'index.html' (we save 'test')
254 //
255 // We also don't add unique .N suffix if file exists...
256 sp = strchr(h->host, '/');
257 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
258 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
259 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000260 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261 } else if (*sp == '/') {
262 *sp = '\0';
263 h->path = sp + 1;
264 } else { // '#' or '?'
265 // http://busybox.net?login=john@doe is a valid URL
266 // memmove converts to:
267 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000268 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000269 h->host--;
270 sp[-1] = '\0';
271 h->path = sp;
272 }
273
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200274 // We used to set h->user to NULL here, but this interferes
275 // with handling of code 302 ("object was moved")
276
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000277 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000278 if (sp != NULL) {
279 h->user = h->host;
280 *sp = '\0';
281 h->host = sp + 1;
282 }
283
284 sp = h->host;
285}
286
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100287static char *gethdr(FILE *fp /*, int *istrunc*/)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000288{
289 char *s, *hdrval;
290 int c;
291
292 /* *istrunc = 0; */
293
294 /* retrieve header line */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100295 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 return NULL;
297
298 /* see if we are at the end of the headers */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100299 for (s = G.wget_buf; *s == '\r'; ++s)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000300 continue;
301 if (*s == '\n')
302 return NULL;
303
304 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100305 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200306 /* tolower for "A-Z", no-op for "0-9a-z-." */
307 *s = (*s | 0x20);
308 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000309
310 /* verify we are at the end of the header name */
311 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100312 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000313
314 /* locate the start of the header value */
315 *s++ = '\0';
316 hdrval = skip_whitespace(s);
317
318 /* locate the end of header */
319 while (*s && *s != '\r' && *s != '\n')
320 ++s;
321
322 /* end of header found */
323 if (*s) {
324 *s = '\0';
325 return hdrval;
326 }
327
Denys Vlasenko7f432802009-06-28 01:02:24 +0200328 /* Rats! The buffer isn't big enough to hold the entire header value */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000329 while (c = getc(fp), c != EOF && c != '\n')
330 continue;
331 /* *istrunc = 1; */
332 return hdrval;
333}
334
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000335#if ENABLE_FEATURE_WGET_LONG_OPTIONS
336static char *URL_escape(const char *str)
337{
338 /* URL encode, see RFC 2396 */
339 char *dst;
340 char *res = dst = xmalloc(strlen(str) * 3 + 1);
341 unsigned char c;
342
343 while (1) {
344 c = *str++;
345 if (c == '\0'
346 /* || strchr("!&'()*-.=_~", c) - more code */
347 || c == '!'
348 || c == '&'
349 || c == '\''
350 || c == '('
351 || c == ')'
352 || c == '*'
353 || c == '-'
354 || c == '.'
355 || c == '='
356 || c == '_'
357 || c == '~'
358 || (c >= '0' && c <= '9')
359 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
360 ) {
361 *dst++ = c;
362 if (c == '\0')
363 return res;
364 } else {
365 *dst++ = '%';
366 *dst++ = bb_hexdigits_upcase[c >> 4];
367 *dst++ = bb_hexdigits_upcase[c & 0xf];
368 }
369 }
370}
371#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000372
Denys Vlasenko7f432802009-06-28 01:02:24 +0200373static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
374{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200375 FILE *sfp;
376 char *str;
377 int port;
378
379 if (!target->user)
380 target->user = xstrdup("anonymous:busybox@");
381
382 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100383 if (ftpcmd(NULL, NULL, sfp) != 220)
384 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200385
386 /*
387 * Splitting username:password pair,
388 * trying to log in
389 */
390 str = strchr(target->user, ':');
391 if (str)
392 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100393 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200394 case 230:
395 break;
396 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100397 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200398 break;
399 /* fall through (failed login) */
400 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100401 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200402 }
403
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100404 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405
406 /*
407 * Querying file size
408 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100409 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
410 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100411 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200412 bb_error_msg_and_die("SIZE value is garbage");
413 }
414 G.got_clen = 1;
415 }
416
417 /*
418 * Entering passive mode
419 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100420 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200421 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100422 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200423 }
424 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
425 // Server's IP is N1.N2.N3.N4 (we ignore it)
426 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 if (!str) goto pasv_error;
431 port = xatou_range(str+1, 0, 255);
432 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100433 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200434 if (!str) goto pasv_error;
435 port += xatou_range(str+1, 0, 255) * 256;
436 set_nport(lsa, htons(port));
437
438 *dfpp = open_socket(lsa);
439
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100440 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100441 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
442 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100443 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200444 }
445
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100446 if (ftpcmd("RETR ", target->path, sfp) > 150)
447 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200448
449 return sfp;
450}
451
Denys Vlasenko7f432802009-06-28 01:02:24 +0200452static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
453{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200454#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
455# if ENABLE_FEATURE_WGET_TIMEOUT
456 unsigned second_cnt;
457# endif
458 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200460 polldata.fd = fileno(dfp);
461 polldata.events = POLLIN | POLLPRI;
Denys Vlasenkoda0df472010-08-08 04:21:50 +0200462 ndelay_on(polldata.fd);
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200463#endif
464 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200465
466 if (G.chunked)
467 goto get_clen;
468
469 /* Loops only if chunked */
470 while (1) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100471 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100473 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200474
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100475 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100476 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100477 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100478 if ((int)G.content_len <= 0)
479 break;
480 rdsz = (unsigned)G.content_len;
481 }
482 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200483#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
484# if ENABLE_FEATURE_WGET_TIMEOUT
485 second_cnt = G.timeout_seconds;
486# endif
487 while (1) {
488 if (safe_poll(&polldata, 1, 1000) != 0)
489 break; /* error, EOF, or data is available */
490# if ENABLE_FEATURE_WGET_TIMEOUT
491 if (second_cnt != 0 && --second_cnt == 0) {
492 progress_meter(PROGRESS_END);
493 bb_perror_msg_and_die("download timed out");
494 }
495# endif
496 /* Needed for "stalled" indicator */
497 progress_meter(PROGRESS_BUMP);
498 }
499#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100500 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200501 if (n <= 0) {
502 if (ferror(dfp)) {
503 /* perror will not work: ferror doesn't set errno */
504 bb_error_msg_and_die(bb_msg_read_error);
505 }
506 break;
507 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100508 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200509#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100510 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200511 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200512#endif
513 if (G.got_clen)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100514 G.content_len -= n;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200515 }
516
517 if (!G.chunked)
518 break;
519
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100520 fgets(G.wget_buf, sizeof(G.wget_buf), dfp); /* This is a newline */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200521 get_clen:
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100522 fgets(G.wget_buf, sizeof(G.wget_buf), dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100523 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200524 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100525 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200526 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100527 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200528 }
529
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200530 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200531}
532
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000533int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000534int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000535{
Eric Andersen79757c92001-04-05 21:45:54 +0000536 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000537 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000538 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200539 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200540 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000541 char *dir_prefix = NULL;
542#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000543 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000544 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000545 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000546#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200547 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000548 FILE *dfp; /* socket to ftp server (data) */
549 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000550 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200551 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000552 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000553 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000554
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000555 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000556 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000557 enum {
558 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
559 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000560#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000561 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000562 /* name, has_arg, val */
563 "continue\0" No_argument "c"
564 "spider\0" No_argument "s"
565 "quiet\0" No_argument "q"
566 "output-document\0" Required_argument "O"
567 "directory-prefix\0" Required_argument "P"
568 "proxy\0" Required_argument "Y"
569 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200570#if ENABLE_FEATURE_WGET_TIMEOUT
571 "timeout\0" Required_argument "T"
572#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000573 /* Ignored: */
574 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000575 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000576 "passive-ftp\0" No_argument "\xff"
577 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000578 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100579 /* Ignored (we don't do ssl) */
580 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000581 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000582#endif
583
584 INIT_G();
585
586#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000587 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000588#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000589 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200590 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
591 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000592 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000593 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200594 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
595 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000596 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
597 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000598 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000599#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000600 if (headers_llist) {
601 int size = 1;
602 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000603 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000604 while (ll) {
605 size += strlen(ll->data) + 2;
606 ll = ll->link;
607 }
608 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000609 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000610 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000611 }
612 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000613#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000614
Denys Vlasenko7f432802009-06-28 01:02:24 +0200615 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200616
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200617 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000618 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000619
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000620 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200621 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000622 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000623 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200624 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100625 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000626 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000627 } else {
628 use_proxy = 0;
629 }
Robert Griebld7760112002-05-14 23:36:45 +0000630 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200631 if (!use_proxy) {
632 server.port = target.port;
633 if (ENABLE_FEATURE_IPV6) {
634 server.host = xstrdup(target.host);
635 } else {
636 server.host = target.host;
637 }
638 }
639
640 if (ENABLE_FEATURE_IPV6)
641 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000642
Denis Vlasenko818322b2007-09-24 18:27:04 +0000643 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000644 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000645 fname_out = bb_get_last_path_component_nostrip(target.path);
646 /* handle "wget http://kernel.org//" */
647 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000648 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000649 /* -P DIR is considered only if there was no -O FILE */
650 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000651 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000652 } else {
653 if (LONE_DASH(fname_out)) {
654 /* -O - */
655 output_fd = 1;
656 opt &= ~WGET_OPT_CONTINUE;
657 }
Eric Andersen29edd002000-12-09 16:55:35 +0000658 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000659#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100660 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000661#endif
662
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000663 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000664 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100665 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200666 */
Eric Andersen29edd002000-12-09 16:55:35 +0000667
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000668 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000669 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000670 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000671 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100672 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000673 }
674 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200675 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000676 }
677
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200678 redir_limit = 5;
679 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000680 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000681 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200682 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
683 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
684 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000685 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200686 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000687 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000688 /*
689 * HTTP session
690 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200691 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200692 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200693
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200694 /* Open socket to http server */
695 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200696
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200697 /* Send HTTP request */
698 if (use_proxy) {
699 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
700 target.is_ftp ? "f" : "ht", target.host,
701 target.path);
702 } else {
703 if (opt & WGET_OPT_POST_DATA)
704 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
705 else
706 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
707 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000708
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200709 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
710 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000711
Denis Vlasenko9cade082006-11-21 10:43:02 +0000712#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200713 if (target.user) {
714 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100715 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200716 }
717 if (use_proxy && server.user) {
718 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100719 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200720 }
Eric Andersen79757c92001-04-05 21:45:54 +0000721#endif
722
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100723 if (G.beg_range)
724 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000725#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200726 if (extra_headers)
727 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000728
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200729 if (opt & WGET_OPT_POST_DATA) {
730 char *estr = URL_escape(post_data);
731 fprintf(sfp, "Content-Type: application/x-www-form-urlencoded\r\n");
732 fprintf(sfp, "Content-Length: %u\r\n" "\r\n" "%s",
733 (int) strlen(estr), estr);
734 /*fprintf(sfp, "Connection: Keep-Alive\r\n\r\n");*/
735 /*fprintf(sfp, "%s\r\n", estr);*/
736 free(estr);
737 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000738#endif
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200739 { /* If "Connection:" is needed, document why */
740 fprintf(sfp, /* "Connection: close\r\n" */ "\r\n");
741 }
Eric Andersen79757c92001-04-05 21:45:54 +0000742
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200743 fflush(sfp);
744
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200745 /*
746 * Retrieve HTTP response line and check for "200" status code.
747 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000748 read_response:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100749 if (fgets(G.wget_buf, sizeof(G.wget_buf), sfp) == NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200750 bb_error_msg_and_die("no response from server");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000751
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100752 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200753 str = skip_non_whitespace(str);
754 str = skip_whitespace(str);
755 // FIXME: no error check
756 // xatou wouldn't work: "200 OK"
757 status = atoi(str);
758 switch (status) {
759 case 0:
760 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100761 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200762 /* eat all remaining headers */;
763 goto read_response;
764 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000765/*
766Response 204 doesn't say "null file", it says "metadata
767has changed but data didn't":
768
769"10.2.5 204 No Content
770The server has fulfilled the request but does not need to return
771an entity-body, and might want to return updated metainformation.
772The response MAY include new or updated metainformation in the form
773of entity-headers, which if present SHOULD be associated with
774the requested variant.
775
776If the client is a user agent, it SHOULD NOT change its document
777view from that which caused the request to be sent. This response
778is primarily intended to allow input for actions to take place
779without causing a change to the user agent's active document view,
780although any new or updated metainformation SHOULD be applied
781to the document currently in the user agent's active view.
782
783The 204 response MUST NOT include a message-body, and thus
784is always terminated by the first empty line after the header fields."
785
786However, in real world it was observed that some web servers
787(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
788*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200789 case 204:
790 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200791 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200792 case 301:
793 case 302:
794 case 303:
795 break;
796 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100797 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000798 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200799 /* fall through */
800 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100801 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200802 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000803
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200804 /*
805 * Retrieve HTTP headers.
806 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100807 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200808 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200809 smalluint key;
810 /* strip trailing whitespace */
811 char *s = strchrnul(str, '\0') - 1;
812 while (s >= str && (*s == ' ' || *s == '\t')) {
813 *s = '\0';
814 s--;
815 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100816 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200817 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100818 G.content_len = BB_STRTOOFF(str, NULL, 10);
819 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200820 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000821 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200822 G.got_clen = 1;
823 continue;
824 }
825 if (key == KEY_transfer_encoding) {
826 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
827 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
828 G.chunked = G.got_clen = 1;
829 }
830 if (key == KEY_location && status >= 300) {
831 if (--redir_limit == 0)
832 bb_error_msg_and_die("too many redirections");
833 fclose(sfp);
834 G.got_clen = 0;
835 G.chunked = 0;
836 if (str[0] == '/')
837 /* free(target.allocated); */
838 target.path = /* target.allocated = */ xstrdup(str+1);
839 /* lsa stays the same: it's on the same server */
840 else {
841 parse_url(str, &target);
842 if (!use_proxy) {
843 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200844 /* strip_ipv6_scope_id(target.host); - no! */
845 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200846 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000847 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200848 goto resolve_lsa;
849 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000850 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200851 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000852 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200853 }
854// if (status >= 300)
855// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000856
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200857 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000858 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000859
860 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000861 /*
862 * FTP session
863 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200864 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000865 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000866
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000867 if (opt & WGET_OPT_SPIDER) {
868 if (ENABLE_FEATURE_CLEAN_UP)
869 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000870 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000871 }
Eric Andersen79757c92001-04-05 21:45:54 +0000872
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000873 if (output_fd < 0) {
874 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
875 /* compat with wget: -O FILE can overwrite */
876 if (opt & WGET_OPT_OUTNAME)
877 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
878 output_fd = xopen(fname_out, o_flags);
879 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000880
Denys Vlasenko7f432802009-06-28 01:02:24 +0200881 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100882 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000883
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200884 if (dfp != sfp) {
885 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000886 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100887 if (ftpcmd(NULL, NULL, sfp) != 226)
888 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
889 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000890 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000891
892 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000893}