blob: 76bd5e2609769e940d51df632062ecc2d64b788d [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010018 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000019 const char *path;
20 const char *user;
21 char *host;
22 int port;
23 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000024};
25
Denis Vlasenko77105632007-09-24 15:04:00 +000026
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020027/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000028struct globals {
29 off_t content_len; /* Content-length of the file */
30 off_t beg_range; /* Range at which continue begins */
31#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000032 off_t transferred; /* Number of bytes transferred so far */
33 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010034 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000035#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +010036 char *dir_prefix;
37#if ENABLE_FEATURE_WGET_LONG_OPTIONS
38 char *post_data;
39 char *extra_headers;
40#endif
41 char *fname_out; /* where to direct output (-O) */
42 const char *proxy_flag; /* Use proxies if env vars are set */
43 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020044#if ENABLE_FEATURE_WGET_TIMEOUT
45 unsigned timeout_seconds;
46#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020047 smallint chunked; /* chunked transfer encoding */
48 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010049 /* Local downloads do benefit from big buffer.
50 * With 512 byte buffer, it was measured to be
51 * an order of magnitude slower than with big one.
52 */
53 uint64_t just_to_align_next_member;
54 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010055} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010056#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020057#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010058 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020059 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
60} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000061
62
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020063/* Must match option string! */
64enum {
65 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020066 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020067 WGET_OPT_QUIET = (1 << 2),
68 WGET_OPT_OUTNAME = (1 << 3),
69 WGET_OPT_PREFIX = (1 << 4),
70 WGET_OPT_PROXY = (1 << 5),
71 WGET_OPT_USER_AGENT = (1 << 6),
72 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
73 WGET_OPT_RETRIES = (1 << 8),
74 WGET_OPT_PASSIVE = (1 << 9),
75 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
76 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
77};
78
79enum {
80 PROGRESS_START = -1,
81 PROGRESS_END = 0,
82 PROGRESS_BUMP = 1,
83};
Denis Vlasenko9cade082006-11-21 10:43:02 +000084#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000085static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000086{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020087 if (option_mask32 & WGET_OPT_QUIET)
88 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000089
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010091 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000092
Denys Vlasenkod55e1392011-02-11 18:56:13 +010093 bb_progress_update(&G.pmt, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020094 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000095
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020096 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010097 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020098 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010099 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000100 }
101}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200102#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000103static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000104#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000105
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000106
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200107/* IPv6 knows scoped address types i.e. link and site local addresses. Link
108 * local addresses can have a scope identifier to specify the
109 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
110 * identifier is only valid on a single node.
111 *
112 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
113 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
114 * in the Host header as invalid requests, see
115 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
116 */
117static void strip_ipv6_scope_id(char *host)
118{
119 char *scope, *cp;
120
121 /* bbox wget actually handles IPv6 addresses without [], like
122 * wget "http://::1/xxx", but this is not standard.
123 * To save code, _here_ we do not support it. */
124
125 if (host[0] != '[')
126 return; /* not IPv6 */
127
128 scope = strchr(host, '%');
129 if (!scope)
130 return;
131
132 /* Remove the IPv6 zone identifier from the host address */
133 cp = strchr(host, ']');
134 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
135 /* malformed address (not "[xx]:nn" or "[xx]") */
136 return;
137 }
138
139 /* cp points to "]...", scope points to "%eth0]..." */
140 overlapping_strcpy(scope, cp);
141}
142
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100143#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000144/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
145 * and a short count if an eof or non-interrupt error is encountered. */
146static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000147{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000148 size_t ret;
149 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000150
151 do {
152 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000153 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000154 ret = fread(p, 1, nmemb, stream);
155 p += ret;
156 nmemb -= ret;
157 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000158
Denis Vlasenko12d21292007-06-27 21:40:07 +0000159 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000160}
161
Denis Vlasenko12d21292007-06-27 21:40:07 +0000162/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000163 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
164static char *safe_fgets(char *s, int size, FILE *stream)
165{
166 char *ret;
167
168 do {
169 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000170 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000171 ret = fgets(s, size, stream);
172 } while (ret == NULL && ferror(stream) && errno == EINTR);
173
174 return ret;
175}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100176#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000177
Denis Vlasenko9cade082006-11-21 10:43:02 +0000178#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100179/* Base64-encode character string. */
180static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000181{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000182 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100183 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
184 len = sizeof(G.wget_buf)/4*3 - 10;
185 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
186 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000187}
188#endif
189
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200190static char* sanitize_string(char *s)
191{
192 unsigned char *p = (void *) s;
193 while (*p >= ' ')
194 p++;
195 *p = '\0';
196 return s;
197}
198
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000199static FILE *open_socket(len_and_sockaddr *lsa)
200{
201 FILE *fp;
202
203 /* glibc 2.4 seems to try seeking on it - ??! */
204 /* hopefully it understands what ESPIPE means... */
205 fp = fdopen(xconnect_stream(lsa), "r+");
206 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100207 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000208
209 return fp;
210}
211
Denys Vlasenkof836f012011-02-10 23:02:28 +0100212/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
213static char fgets_and_trim(FILE *fp)
214{
215 char c;
216 char *buf_ptr;
217
218 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
219 bb_perror_msg_and_die("error getting response");
220
221 buf_ptr = strchrnul(G.wget_buf, '\n');
222 c = *buf_ptr;
223 *buf_ptr = '\0';
224 buf_ptr = strchrnul(G.wget_buf, '\r');
225 *buf_ptr = '\0';
226
227 log_io("< %s", G.wget_buf);
228
229 return c;
230}
231
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100232static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000233{
234 int result;
235 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100236 if (!s2)
237 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000238 fprintf(fp, "%s%s\r\n", s1, s2);
239 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100240 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000241 }
242
243 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100244 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100245 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000246
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100247 G.wget_buf[3] = '\0';
248 result = xatoi_positive(G.wget_buf);
249 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000250 return result;
251}
252
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100253static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000254{
255 char *url, *p, *sp;
256
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100257 free(h->allocated);
258 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000259
260 if (strncmp(url, "http://", 7) == 0) {
261 h->port = bb_lookup_port("http", "tcp", 80);
262 h->host = url + 7;
263 h->is_ftp = 0;
264 } else if (strncmp(url, "ftp://", 6) == 0) {
265 h->port = bb_lookup_port("ftp", "tcp", 21);
266 h->host = url + 6;
267 h->is_ftp = 1;
268 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200269 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000270
271 // FYI:
272 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
273 // 'GET /?var=a/b HTTP 1.0'
274 // and saves 'index.html?var=a%2Fb' (we save 'b')
275 // wget 'http://busybox.net?login=john@doe':
276 // request: 'GET /?login=john@doe HTTP/1.0'
277 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
278 // wget 'http://busybox.net#test/test':
279 // request: 'GET / HTTP/1.0'
280 // saves: 'index.html' (we save 'test')
281 //
282 // We also don't add unique .N suffix if file exists...
283 sp = strchr(h->host, '/');
284 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
285 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
286 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000287 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000288 } else if (*sp == '/') {
289 *sp = '\0';
290 h->path = sp + 1;
291 } else { // '#' or '?'
292 // http://busybox.net?login=john@doe is a valid URL
293 // memmove converts to:
294 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000295 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 h->host--;
297 sp[-1] = '\0';
298 h->path = sp;
299 }
300
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200301 // We used to set h->user to NULL here, but this interferes
302 // with handling of code 302 ("object was moved")
303
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000304 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000305 if (sp != NULL) {
306 h->user = h->host;
307 *sp = '\0';
308 h->host = sp + 1;
309 }
310
311 sp = h->host;
312}
313
Denys Vlasenkof836f012011-02-10 23:02:28 +0100314static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000315{
316 char *s, *hdrval;
317 int c;
318
319 /* *istrunc = 0; */
320
321 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100322 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000323
Denys Vlasenkof836f012011-02-10 23:02:28 +0100324 /* end of the headers? */
325 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000326 return NULL;
327
328 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100329 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200330 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100331 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200332 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000333
334 /* verify we are at the end of the header name */
335 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100336 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337
338 /* locate the start of the header value */
339 *s++ = '\0';
340 hdrval = skip_whitespace(s);
341
Denys Vlasenkof836f012011-02-10 23:02:28 +0100342 if (c != '\n') {
343 /* Rats! The buffer isn't big enough to hold the entire header value */
344 while (c = getc(fp), c != EOF && c != '\n')
345 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000346 }
347
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000348 return hdrval;
349}
350
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000351#if ENABLE_FEATURE_WGET_LONG_OPTIONS
352static char *URL_escape(const char *str)
353{
354 /* URL encode, see RFC 2396 */
355 char *dst;
356 char *res = dst = xmalloc(strlen(str) * 3 + 1);
357 unsigned char c;
358
359 while (1) {
360 c = *str++;
361 if (c == '\0'
362 /* || strchr("!&'()*-.=_~", c) - more code */
363 || c == '!'
364 || c == '&'
365 || c == '\''
366 || c == '('
367 || c == ')'
368 || c == '*'
369 || c == '-'
370 || c == '.'
371 || c == '='
372 || c == '_'
373 || c == '~'
374 || (c >= '0' && c <= '9')
375 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
376 ) {
377 *dst++ = c;
378 if (c == '\0')
379 return res;
380 } else {
381 *dst++ = '%';
382 *dst++ = bb_hexdigits_upcase[c >> 4];
383 *dst++ = bb_hexdigits_upcase[c & 0xf];
384 }
385 }
386}
387#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000388
Denys Vlasenko7f432802009-06-28 01:02:24 +0200389static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
390{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200391 FILE *sfp;
392 char *str;
393 int port;
394
395 if (!target->user)
396 target->user = xstrdup("anonymous:busybox@");
397
398 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100399 if (ftpcmd(NULL, NULL, sfp) != 220)
400 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200401
402 /*
403 * Splitting username:password pair,
404 * trying to log in
405 */
406 str = strchr(target->user, ':');
407 if (str)
408 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100409 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200410 case 230:
411 break;
412 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100413 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200414 break;
415 /* fall through (failed login) */
416 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100417 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200418 }
419
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100420 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200421
422 /*
423 * Querying file size
424 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100425 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
426 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100427 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 bb_error_msg_and_die("SIZE value is garbage");
429 }
430 G.got_clen = 1;
431 }
432
433 /*
434 * Entering passive mode
435 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100436 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200437 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100438 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200439 }
440 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
441 // Server's IP is N1.N2.N3.N4 (we ignore it)
442 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100443 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200444 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100445 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200446 if (!str) goto pasv_error;
447 port = xatou_range(str+1, 0, 255);
448 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100449 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200450 if (!str) goto pasv_error;
451 port += xatou_range(str+1, 0, 255) * 256;
452 set_nport(lsa, htons(port));
453
454 *dfpp = open_socket(lsa);
455
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100456 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100457 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
458 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100459 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200460 }
461
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100462 if (ftpcmd("RETR ", target->path, sfp) > 150)
463 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200464
465 return sfp;
466}
467
Denys Vlasenko7f432802009-06-28 01:02:24 +0200468static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
469{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200470#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
471# if ENABLE_FEATURE_WGET_TIMEOUT
472 unsigned second_cnt;
473# endif
474 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200475
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200476 polldata.fd = fileno(dfp);
477 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200478#endif
479 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200480
481 if (G.chunked)
482 goto get_clen;
483
484 /* Loops only if chunked */
485 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100486
487#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
488 /* Must use nonblocking I/O, otherwise fread will loop
489 * and *block* until it reads full buffer,
490 * which messes up progress bar and/or timeout logic.
491 * Because of nonblocking I/O, we need to dance
492 * very carefully around EAGAIN. See explanation at
493 * clearerr() call.
494 */
495 ndelay_on(polldata.fd);
496#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100497 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200498 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100499 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200500
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100501 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100502 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100503 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100504 if ((int)G.content_len <= 0)
505 break;
506 rdsz = (unsigned)G.content_len;
507 }
508 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100509
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200510#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
511# if ENABLE_FEATURE_WGET_TIMEOUT
512 second_cnt = G.timeout_seconds;
513# endif
514 while (1) {
515 if (safe_poll(&polldata, 1, 1000) != 0)
516 break; /* error, EOF, or data is available */
517# if ENABLE_FEATURE_WGET_TIMEOUT
518 if (second_cnt != 0 && --second_cnt == 0) {
519 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100520 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200521 }
522# endif
523 /* Needed for "stalled" indicator */
524 progress_meter(PROGRESS_BUMP);
525 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100526
Denys Vlasenko8766a792011-02-11 21:42:00 +0100527 /* fread internally uses read loop, which in our case
528 * is usually exited when we get EAGAIN.
529 * In this case, libc sets error marker on the stream.
530 * Need to clear it before next fread to avoid possible
531 * rare false positive ferror below. Rare because usually
532 * fread gets more than zero bytes, and we don't fall
533 * into if (n <= 0) ...
534 */
535 clearerr(dfp);
536 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100537#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100538 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100539 /* man fread:
540 * If error occurs, or EOF is reached, the return value
541 * is a short item count (or zero).
542 * fread does not distinguish between EOF and error.
543 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200544 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100545#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
546 if (errno == EAGAIN) /* poll lied, there is no data? */
547 continue; /* yes */
548#endif
549 if (ferror(dfp))
550 bb_perror_msg_and_die(bb_msg_read_error);
551 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200552 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100553
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100554 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100555
Denys Vlasenko7f432802009-06-28 01:02:24 +0200556#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100557 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200558 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200559#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100560 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100561 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100562 if (G.content_len == 0)
563 break;
564 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200565 }
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100566#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
567 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100568 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100569#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200570 if (!G.chunked)
571 break;
572
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100573 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200574 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100575 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100576 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200577 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100578 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200579 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100580 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200581 }
582
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100583 /* Draw full bar and free its resources */
584 G.chunked = 0; /* makes it show 100% even for chunked download */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200585 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200586}
587
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100588static int download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000589{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100590 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200591 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100592 int output_fd;
593 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200594 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000595 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100596 char *proxy = NULL;
597 char *fname_out_alloc;
598 struct host_info server;
599 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000600
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100601 server.allocated = NULL;
602 target.allocated = NULL;
603 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200604 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100605
606 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000607
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000608 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100609 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000610 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000611 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200612 if (proxy && proxy[0]) {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000613 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000614 } else {
615 use_proxy = 0;
616 }
Robert Griebld7760112002-05-14 23:36:45 +0000617 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200618 if (!use_proxy) {
619 server.port = target.port;
620 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100621 //free(server.allocated); - can't be non-NULL
622 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200623 } else {
624 server.host = target.host;
625 }
626 }
627
628 if (ENABLE_FEATURE_IPV6)
629 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000630
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100631 /* If there was no -O FILE, guess output filename */
632 output_fd = -1;
633 fname_out_alloc = NULL;
634 if (!G.fname_out) {
635 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000636 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100637 if (G.fname_out[0] == '/' || !G.fname_out[0])
638 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000639 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100640 if (G.dir_prefix)
641 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000642 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100643 if (LONE_DASH(G.fname_out)) {
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000644 /* -O - */
645 output_fd = 1;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100646 option_mask32 &= ~WGET_OPT_CONTINUE;
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000647 }
Eric Andersen29edd002000-12-09 16:55:35 +0000648 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000649#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100650 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000651#endif
652
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000653 /* Determine where to start transfer */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100654 if (option_mask32 & WGET_OPT_CONTINUE) {
655 output_fd = open(G.fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000656 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100657 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000658 }
659 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100660 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000661 }
662
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200663 redir_limit = 5;
664 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000665 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100666 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200667 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
668 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
669 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000670 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200671 establish_session:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100672 G.chunked = G.got_clen = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000673 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000674 /*
675 * HTTP session
676 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200677 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200678 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200679
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100680
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200681 /* Open socket to http server */
682 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200683
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200684 /* Send HTTP request */
685 if (use_proxy) {
686 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
687 target.is_ftp ? "f" : "ht", target.host,
688 target.path);
689 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100690 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200691 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
692 else
693 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
694 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000695
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200696 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100697 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000698
Denys Vlasenko9213a552011-02-10 13:23:45 +0100699 /* Ask server to close the connection as soon as we are done
700 * (IOW: we do not intend to send more requests)
701 */
702 fprintf(sfp, "Connection: close\r\n");
703
Denis Vlasenko9cade082006-11-21 10:43:02 +0000704#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200705 if (target.user) {
706 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100707 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200708 }
709 if (use_proxy && server.user) {
710 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100711 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200712 }
Eric Andersen79757c92001-04-05 21:45:54 +0000713#endif
714
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100715 if (G.beg_range)
716 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100717
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000718#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100719 if (G.extra_headers)
720 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000721
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100722 if (option_mask32 & WGET_OPT_POST_DATA) {
723 char *estr = URL_escape(G.post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100724 fprintf(sfp,
725 "Content-Type: application/x-www-form-urlencoded\r\n"
726 "Content-Length: %u\r\n"
727 "\r\n"
728 "%s",
729 (int) strlen(estr), estr
730 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200731 free(estr);
732 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000733#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100734 {
735 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200736 }
Eric Andersen79757c92001-04-05 21:45:54 +0000737
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200738 fflush(sfp);
739
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200740 /*
741 * Retrieve HTTP response line and check for "200" status code.
742 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000743 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100744 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000745
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100746 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200747 str = skip_non_whitespace(str);
748 str = skip_whitespace(str);
749 // FIXME: no error check
750 // xatou wouldn't work: "200 OK"
751 status = atoi(str);
752 switch (status) {
753 case 0:
754 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100755 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200756 /* eat all remaining headers */;
757 goto read_response;
758 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000759/*
760Response 204 doesn't say "null file", it says "metadata
761has changed but data didn't":
762
763"10.2.5 204 No Content
764The server has fulfilled the request but does not need to return
765an entity-body, and might want to return updated metainformation.
766The response MAY include new or updated metainformation in the form
767of entity-headers, which if present SHOULD be associated with
768the requested variant.
769
770If the client is a user agent, it SHOULD NOT change its document
771view from that which caused the request to be sent. This response
772is primarily intended to allow input for actions to take place
773without causing a change to the user agent's active document view,
774although any new or updated metainformation SHOULD be applied
775to the document currently in the user agent's active view.
776
777The 204 response MUST NOT include a message-body, and thus
778is always terminated by the first empty line after the header fields."
779
780However, in real world it was observed that some web servers
781(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
782*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200783 case 204:
784 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200785 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200786 case 301:
787 case 302:
788 case 303:
789 break;
790 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100791 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000792 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200793 /* fall through */
794 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100795 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200796 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000797
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200798 /*
799 * Retrieve HTTP headers.
800 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100801 while ((str = gethdr(sfp)) != NULL) {
802 static const char keywords[] ALIGN1 =
803 "content-length\0""transfer-encoding\0""location\0";
804 enum {
805 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
806 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200807 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100808
809 /* gethdr converted "FOO:" string to lowercase */
810
Matthijs van de Water0d586662009-08-22 20:19:48 +0200811 /* strip trailing whitespace */
812 char *s = strchrnul(str, '\0') - 1;
813 while (s >= str && (*s == ' ' || *s == '\t')) {
814 *s = '\0';
815 s--;
816 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100817 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200818 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100819 G.content_len = BB_STRTOOFF(str, NULL, 10);
820 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200821 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000822 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200823 G.got_clen = 1;
824 continue;
825 }
826 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100827 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200828 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100829 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200830 }
831 if (key == KEY_location && status >= 300) {
832 if (--redir_limit == 0)
833 bb_error_msg_and_die("too many redirections");
834 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100835 if (str[0] == '/') {
836 free(target.allocated);
837 target.path = target.allocated = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200838 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100839 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200840 parse_url(str, &target);
841 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100842 free(server.allocated);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200843 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200844 /* strip_ipv6_scope_id(target.host); - no! */
845 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200846 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000847 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200848 goto resolve_lsa;
849 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000850 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200851 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000852 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200853 }
854// if (status >= 300)
855// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000856
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200857 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000858 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000859
860 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000861 /*
862 * FTP session
863 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200864 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000865 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000866
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100867 free(lsa);
868 free(server.allocated);
869 free(target.allocated);
870
871 if (option_mask32 & WGET_OPT_SPIDER) {
872 free(fname_out_alloc);
873 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000874 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000875 }
Eric Andersen79757c92001-04-05 21:45:54 +0000876
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000877 if (output_fd < 0) {
878 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
879 /* compat with wget: -O FILE can overwrite */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100880 if (option_mask32 & WGET_OPT_OUTNAME)
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000881 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100882 output_fd = xopen(G.fname_out, o_flags);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000883 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000884
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100885 free(fname_out_alloc);
886
Denys Vlasenko7f432802009-06-28 01:02:24 +0200887 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100888 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000889
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200890 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100891 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000892 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100893 if (ftpcmd(NULL, NULL, sfp) != 226)
894 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
895 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000896 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100897 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000898
899 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000900}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100901
902int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
903int wget_main(int argc UNUSED_PARAM, char **argv)
904{
905#if ENABLE_FEATURE_WGET_LONG_OPTIONS
906 static const char wget_longopts[] ALIGN1 =
907 /* name, has_arg, val */
908 "continue\0" No_argument "c"
909//FIXME: -s isn't --spider, it's --save-headers!
910 "spider\0" No_argument "s"
911 "quiet\0" No_argument "q"
912 "output-document\0" Required_argument "O"
913 "directory-prefix\0" Required_argument "P"
914 "proxy\0" Required_argument "Y"
915 "user-agent\0" Required_argument "U"
916#if ENABLE_FEATURE_WGET_TIMEOUT
917 "timeout\0" Required_argument "T"
918#endif
919 /* Ignored: */
920 // "tries\0" Required_argument "t"
921 /* Ignored (we always use PASV): */
922 "passive-ftp\0" No_argument "\xff"
923 "header\0" Required_argument "\xfe"
924 "post-data\0" Required_argument "\xfd"
925 /* Ignored (we don't do ssl) */
926 "no-check-certificate\0" No_argument "\xfc"
927 ;
928#endif
929
930 int exitcode;
931#if ENABLE_FEATURE_WGET_LONG_OPTIONS
932 llist_t *headers_llist = NULL;
933#endif
934
935 INIT_G();
936
937 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
938 G.proxy_flag = "on"; /* use proxies if env vars are set */
939 G.user_agent = "Wget"; /* "User-Agent" header field */
940
941#if ENABLE_FEATURE_WGET_LONG_OPTIONS
942 applet_long_options = wget_longopts;
943#endif
944 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
945 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
946 &G.fname_out, &G.dir_prefix,
947 &G.proxy_flag, &G.user_agent,
948 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
949 NULL /* -t RETRIES */
950 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
951 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
952 );
953 argv += optind;
954
955#if ENABLE_FEATURE_WGET_LONG_OPTIONS
956 if (headers_llist) {
957 int size = 1;
958 char *cp;
959 llist_t *ll = headers_llist;
960 while (ll) {
961 size += strlen(ll->data) + 2;
962 ll = ll->link;
963 }
964 G.extra_headers = cp = xmalloc(size);
965 while (headers_llist) {
966 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
967 }
968 }
969#endif
970
971 exitcode = 0;
972 while (*argv)
973 exitcode |= download_one_url(*argv++);
974
975 return exitcode;
976}