blob: 6c015dccca43f51a3b84d7094bab612de273f32d [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010018 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000019 const char *path;
20 const char *user;
21 char *host;
22 int port;
23 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000024};
25
Denis Vlasenko77105632007-09-24 15:04:00 +000026
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020027/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000028struct globals {
29 off_t content_len; /* Content-length of the file */
30 off_t beg_range; /* Range at which continue begins */
31#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000032 off_t transferred; /* Number of bytes transferred so far */
33 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010034 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000035#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +010036 char *dir_prefix;
37#if ENABLE_FEATURE_WGET_LONG_OPTIONS
38 char *post_data;
39 char *extra_headers;
40#endif
41 char *fname_out; /* where to direct output (-O) */
42 const char *proxy_flag; /* Use proxies if env vars are set */
43 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020044#if ENABLE_FEATURE_WGET_TIMEOUT
45 unsigned timeout_seconds;
46#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020047 smallint chunked; /* chunked transfer encoding */
48 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010049 /* Local downloads do benefit from big buffer.
50 * With 512 byte buffer, it was measured to be
51 * an order of magnitude slower than with big one.
52 */
53 uint64_t just_to_align_next_member;
54 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010055} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010056#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020057#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010058 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020059 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
60} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000061
62
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020063/* Must match option string! */
64enum {
65 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020066 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020067 WGET_OPT_QUIET = (1 << 2),
68 WGET_OPT_OUTNAME = (1 << 3),
69 WGET_OPT_PREFIX = (1 << 4),
70 WGET_OPT_PROXY = (1 << 5),
71 WGET_OPT_USER_AGENT = (1 << 6),
72 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
73 WGET_OPT_RETRIES = (1 << 8),
74 WGET_OPT_PASSIVE = (1 << 9),
75 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
76 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
77};
78
79enum {
80 PROGRESS_START = -1,
81 PROGRESS_END = 0,
82 PROGRESS_BUMP = 1,
83};
Denis Vlasenko9cade082006-11-21 10:43:02 +000084#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000085static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000086{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020087 if (option_mask32 & WGET_OPT_QUIET)
88 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000089
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010091 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000092
Denys Vlasenkod55e1392011-02-11 18:56:13 +010093 bb_progress_update(&G.pmt, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020094 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000095
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020096 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010097 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020098 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010099 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000100 }
101}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200102#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000103static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000104#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000105
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000106
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200107/* IPv6 knows scoped address types i.e. link and site local addresses. Link
108 * local addresses can have a scope identifier to specify the
109 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
110 * identifier is only valid on a single node.
111 *
112 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
113 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
114 * in the Host header as invalid requests, see
115 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
116 */
117static void strip_ipv6_scope_id(char *host)
118{
119 char *scope, *cp;
120
121 /* bbox wget actually handles IPv6 addresses without [], like
122 * wget "http://::1/xxx", but this is not standard.
123 * To save code, _here_ we do not support it. */
124
125 if (host[0] != '[')
126 return; /* not IPv6 */
127
128 scope = strchr(host, '%');
129 if (!scope)
130 return;
131
132 /* Remove the IPv6 zone identifier from the host address */
133 cp = strchr(host, ']');
134 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
135 /* malformed address (not "[xx]:nn" or "[xx]") */
136 return;
137 }
138
139 /* cp points to "]...", scope points to "%eth0]..." */
140 overlapping_strcpy(scope, cp);
141}
142
Denis Vlasenko9cade082006-11-21 10:43:02 +0000143#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100144/* Base64-encode character string. */
145static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000146{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000147 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100148 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
149 len = sizeof(G.wget_buf)/4*3 - 10;
150 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
151 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000152}
153#endif
154
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200155static char* sanitize_string(char *s)
156{
157 unsigned char *p = (void *) s;
158 while (*p >= ' ')
159 p++;
160 *p = '\0';
161 return s;
162}
163
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000164static FILE *open_socket(len_and_sockaddr *lsa)
165{
166 FILE *fp;
167
168 /* glibc 2.4 seems to try seeking on it - ??! */
169 /* hopefully it understands what ESPIPE means... */
170 fp = fdopen(xconnect_stream(lsa), "r+");
171 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100172 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000173
174 return fp;
175}
176
Denys Vlasenkof836f012011-02-10 23:02:28 +0100177/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
178static char fgets_and_trim(FILE *fp)
179{
180 char c;
181 char *buf_ptr;
182
183 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
184 bb_perror_msg_and_die("error getting response");
185
186 buf_ptr = strchrnul(G.wget_buf, '\n');
187 c = *buf_ptr;
188 *buf_ptr = '\0';
189 buf_ptr = strchrnul(G.wget_buf, '\r');
190 *buf_ptr = '\0';
191
192 log_io("< %s", G.wget_buf);
193
194 return c;
195}
196
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100197static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000198{
199 int result;
200 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100201 if (!s2)
202 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000203 fprintf(fp, "%s%s\r\n", s1, s2);
204 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100205 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000206 }
207
208 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100209 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100210 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000211
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100212 G.wget_buf[3] = '\0';
213 result = xatoi_positive(G.wget_buf);
214 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000215 return result;
216}
217
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100218static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000219{
220 char *url, *p, *sp;
221
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100222 free(h->allocated);
223 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000224
225 if (strncmp(url, "http://", 7) == 0) {
226 h->port = bb_lookup_port("http", "tcp", 80);
227 h->host = url + 7;
228 h->is_ftp = 0;
229 } else if (strncmp(url, "ftp://", 6) == 0) {
230 h->port = bb_lookup_port("ftp", "tcp", 21);
231 h->host = url + 6;
232 h->is_ftp = 1;
233 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200234 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000235
236 // FYI:
237 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
238 // 'GET /?var=a/b HTTP 1.0'
239 // and saves 'index.html?var=a%2Fb' (we save 'b')
240 // wget 'http://busybox.net?login=john@doe':
241 // request: 'GET /?login=john@doe HTTP/1.0'
242 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
243 // wget 'http://busybox.net#test/test':
244 // request: 'GET / HTTP/1.0'
245 // saves: 'index.html' (we save 'test')
246 //
247 // We also don't add unique .N suffix if file exists...
248 sp = strchr(h->host, '/');
249 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
250 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
251 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000252 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000253 } else if (*sp == '/') {
254 *sp = '\0';
255 h->path = sp + 1;
256 } else { // '#' or '?'
257 // http://busybox.net?login=john@doe is a valid URL
258 // memmove converts to:
259 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000260 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261 h->host--;
262 sp[-1] = '\0';
263 h->path = sp;
264 }
265
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200266 // We used to set h->user to NULL here, but this interferes
267 // with handling of code 302 ("object was moved")
268
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000269 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000270 if (sp != NULL) {
271 h->user = h->host;
272 *sp = '\0';
273 h->host = sp + 1;
274 }
275
276 sp = h->host;
277}
278
Denys Vlasenkof836f012011-02-10 23:02:28 +0100279static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000280{
281 char *s, *hdrval;
282 int c;
283
284 /* *istrunc = 0; */
285
286 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100287 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000288
Denys Vlasenkof836f012011-02-10 23:02:28 +0100289 /* end of the headers? */
290 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000291 return NULL;
292
293 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100294 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200295 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100296 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200297 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000298
299 /* verify we are at the end of the header name */
300 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100301 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000302
303 /* locate the start of the header value */
304 *s++ = '\0';
305 hdrval = skip_whitespace(s);
306
Denys Vlasenkof836f012011-02-10 23:02:28 +0100307 if (c != '\n') {
308 /* Rats! The buffer isn't big enough to hold the entire header value */
309 while (c = getc(fp), c != EOF && c != '\n')
310 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000311 }
312
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000313 return hdrval;
314}
315
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000316#if ENABLE_FEATURE_WGET_LONG_OPTIONS
317static char *URL_escape(const char *str)
318{
319 /* URL encode, see RFC 2396 */
320 char *dst;
321 char *res = dst = xmalloc(strlen(str) * 3 + 1);
322 unsigned char c;
323
324 while (1) {
325 c = *str++;
326 if (c == '\0'
327 /* || strchr("!&'()*-.=_~", c) - more code */
328 || c == '!'
329 || c == '&'
330 || c == '\''
331 || c == '('
332 || c == ')'
333 || c == '*'
334 || c == '-'
335 || c == '.'
336 || c == '='
337 || c == '_'
338 || c == '~'
339 || (c >= '0' && c <= '9')
340 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
341 ) {
342 *dst++ = c;
343 if (c == '\0')
344 return res;
345 } else {
346 *dst++ = '%';
347 *dst++ = bb_hexdigits_upcase[c >> 4];
348 *dst++ = bb_hexdigits_upcase[c & 0xf];
349 }
350 }
351}
352#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000353
Denys Vlasenko7f432802009-06-28 01:02:24 +0200354static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
355{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200356 FILE *sfp;
357 char *str;
358 int port;
359
360 if (!target->user)
361 target->user = xstrdup("anonymous:busybox@");
362
363 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100364 if (ftpcmd(NULL, NULL, sfp) != 220)
365 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200366
367 /*
368 * Splitting username:password pair,
369 * trying to log in
370 */
371 str = strchr(target->user, ':');
372 if (str)
373 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100374 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200375 case 230:
376 break;
377 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100378 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200379 break;
380 /* fall through (failed login) */
381 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100382 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200383 }
384
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100385 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200386
387 /*
388 * Querying file size
389 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
391 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100392 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200393 bb_error_msg_and_die("SIZE value is garbage");
394 }
395 G.got_clen = 1;
396 }
397
398 /*
399 * Entering passive mode
400 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100401 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200402 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100403 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200404 }
405 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
406 // Server's IP is N1.N2.N3.N4 (we ignore it)
407 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100410 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200411 if (!str) goto pasv_error;
412 port = xatou_range(str+1, 0, 255);
413 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100414 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200415 if (!str) goto pasv_error;
416 port += xatou_range(str+1, 0, 255) * 256;
417 set_nport(lsa, htons(port));
418
419 *dfpp = open_socket(lsa);
420
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100421 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100422 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
423 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100424 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200425 }
426
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 if (ftpcmd("RETR ", target->path, sfp) > 150)
428 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200429
430 return sfp;
431}
432
Denys Vlasenko7f432802009-06-28 01:02:24 +0200433static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
434{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200435#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
436# if ENABLE_FEATURE_WGET_TIMEOUT
437 unsigned second_cnt;
438# endif
439 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200440
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200441 polldata.fd = fileno(dfp);
442 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200443#endif
444 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200445
446 if (G.chunked)
447 goto get_clen;
448
449 /* Loops only if chunked */
450 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100451
452#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
453 /* Must use nonblocking I/O, otherwise fread will loop
454 * and *block* until it reads full buffer,
455 * which messes up progress bar and/or timeout logic.
456 * Because of nonblocking I/O, we need to dance
457 * very carefully around EAGAIN. See explanation at
458 * clearerr() call.
459 */
460 ndelay_on(polldata.fd);
461#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100462 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200463 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100464 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200465
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100466 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100467 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100468 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100469 if ((int)G.content_len <= 0)
470 break;
471 rdsz = (unsigned)G.content_len;
472 }
473 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100474
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200475#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
476# if ENABLE_FEATURE_WGET_TIMEOUT
477 second_cnt = G.timeout_seconds;
478# endif
479 while (1) {
480 if (safe_poll(&polldata, 1, 1000) != 0)
481 break; /* error, EOF, or data is available */
482# if ENABLE_FEATURE_WGET_TIMEOUT
483 if (second_cnt != 0 && --second_cnt == 0) {
484 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100485 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200486 }
487# endif
488 /* Needed for "stalled" indicator */
489 progress_meter(PROGRESS_BUMP);
490 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100491
Denys Vlasenko8766a792011-02-11 21:42:00 +0100492 /* fread internally uses read loop, which in our case
493 * is usually exited when we get EAGAIN.
494 * In this case, libc sets error marker on the stream.
495 * Need to clear it before next fread to avoid possible
496 * rare false positive ferror below. Rare because usually
497 * fread gets more than zero bytes, and we don't fall
498 * into if (n <= 0) ...
499 */
500 clearerr(dfp);
501 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100502#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100503 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100504 /* man fread:
505 * If error occurs, or EOF is reached, the return value
506 * is a short item count (or zero).
507 * fread does not distinguish between EOF and error.
508 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200509 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100510#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
511 if (errno == EAGAIN) /* poll lied, there is no data? */
512 continue; /* yes */
513#endif
514 if (ferror(dfp))
515 bb_perror_msg_and_die(bb_msg_read_error);
516 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200517 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100518
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100519 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100520
Denys Vlasenko7f432802009-06-28 01:02:24 +0200521#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100522 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200523 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200524#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100525 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100526 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100527 if (G.content_len == 0)
528 break;
529 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200530 }
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100531#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
532 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100533 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100534#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200535 if (!G.chunked)
536 break;
537
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100538 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200539 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100540 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100541 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200542 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100543 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200544 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100545 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200546 }
547
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100548 /* Draw full bar and free its resources */
549 G.chunked = 0; /* makes it show 100% even for chunked download */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200550 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200551}
552
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100553static int download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000554{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100555 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200556 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100557 int output_fd;
558 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200559 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000560 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100561 char *proxy = NULL;
562 char *fname_out_alloc;
563 struct host_info server;
564 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000565
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100566 server.allocated = NULL;
567 target.allocated = NULL;
568 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200569 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100570
571 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000572
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000573 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100574 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000575 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000576 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200577 if (proxy && proxy[0]) {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000578 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000579 } else {
580 use_proxy = 0;
581 }
Robert Griebld7760112002-05-14 23:36:45 +0000582 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200583 if (!use_proxy) {
584 server.port = target.port;
585 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100586 //free(server.allocated); - can't be non-NULL
587 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200588 } else {
589 server.host = target.host;
590 }
591 }
592
593 if (ENABLE_FEATURE_IPV6)
594 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000595
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100596 /* If there was no -O FILE, guess output filename */
597 output_fd = -1;
598 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100599 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100600 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000601 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100602 if (G.fname_out[0] == '/' || !G.fname_out[0])
603 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000604 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100605 if (G.dir_prefix)
606 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000607 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100608 if (LONE_DASH(G.fname_out)) {
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000609 /* -O - */
610 output_fd = 1;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100611 option_mask32 &= ~WGET_OPT_CONTINUE;
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000612 }
Eric Andersen29edd002000-12-09 16:55:35 +0000613 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000614#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100615 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000616#endif
617
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000618 /* Determine where to start transfer */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100619 if (option_mask32 & WGET_OPT_CONTINUE) {
620 output_fd = open(G.fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000621 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100622 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000623 }
624 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100625 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000626 }
627
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200628 redir_limit = 5;
629 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000630 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100631 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200632 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
633 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
634 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000635 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200636 establish_session:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100637 G.chunked = G.got_clen = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000638 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000639 /*
640 * HTTP session
641 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200642 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200643 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200644
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100645
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200646 /* Open socket to http server */
647 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200648
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200649 /* Send HTTP request */
650 if (use_proxy) {
651 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
652 target.is_ftp ? "f" : "ht", target.host,
653 target.path);
654 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100655 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200656 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
657 else
658 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
659 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000660
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200661 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100662 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000663
Denys Vlasenko9213a552011-02-10 13:23:45 +0100664 /* Ask server to close the connection as soon as we are done
665 * (IOW: we do not intend to send more requests)
666 */
667 fprintf(sfp, "Connection: close\r\n");
668
Denis Vlasenko9cade082006-11-21 10:43:02 +0000669#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200670 if (target.user) {
671 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100672 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200673 }
674 if (use_proxy && server.user) {
675 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100676 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200677 }
Eric Andersen79757c92001-04-05 21:45:54 +0000678#endif
679
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100680 if (G.beg_range)
681 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100682
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000683#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100684 if (G.extra_headers)
685 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000686
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100687 if (option_mask32 & WGET_OPT_POST_DATA) {
688 char *estr = URL_escape(G.post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100689 fprintf(sfp,
690 "Content-Type: application/x-www-form-urlencoded\r\n"
691 "Content-Length: %u\r\n"
692 "\r\n"
693 "%s",
694 (int) strlen(estr), estr
695 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200696 free(estr);
697 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000698#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100699 {
700 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200701 }
Eric Andersen79757c92001-04-05 21:45:54 +0000702
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200703 fflush(sfp);
704
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200705 /*
706 * Retrieve HTTP response line and check for "200" status code.
707 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000708 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100709 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000710
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100711 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200712 str = skip_non_whitespace(str);
713 str = skip_whitespace(str);
714 // FIXME: no error check
715 // xatou wouldn't work: "200 OK"
716 status = atoi(str);
717 switch (status) {
718 case 0:
719 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100720 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200721 /* eat all remaining headers */;
722 goto read_response;
723 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000724/*
725Response 204 doesn't say "null file", it says "metadata
726has changed but data didn't":
727
728"10.2.5 204 No Content
729The server has fulfilled the request but does not need to return
730an entity-body, and might want to return updated metainformation.
731The response MAY include new or updated metainformation in the form
732of entity-headers, which if present SHOULD be associated with
733the requested variant.
734
735If the client is a user agent, it SHOULD NOT change its document
736view from that which caused the request to be sent. This response
737is primarily intended to allow input for actions to take place
738without causing a change to the user agent's active document view,
739although any new or updated metainformation SHOULD be applied
740to the document currently in the user agent's active view.
741
742The 204 response MUST NOT include a message-body, and thus
743is always terminated by the first empty line after the header fields."
744
745However, in real world it was observed that some web servers
746(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
747*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200748 case 204:
749 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200750 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200751 case 301:
752 case 302:
753 case 303:
754 break;
755 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100756 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000757 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200758 /* fall through */
759 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100760 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200761 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000762
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200763 /*
764 * Retrieve HTTP headers.
765 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100766 while ((str = gethdr(sfp)) != NULL) {
767 static const char keywords[] ALIGN1 =
768 "content-length\0""transfer-encoding\0""location\0";
769 enum {
770 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
771 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200772 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100773
774 /* gethdr converted "FOO:" string to lowercase */
775
Matthijs van de Water0d586662009-08-22 20:19:48 +0200776 /* strip trailing whitespace */
777 char *s = strchrnul(str, '\0') - 1;
778 while (s >= str && (*s == ' ' || *s == '\t')) {
779 *s = '\0';
780 s--;
781 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100782 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200783 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100784 G.content_len = BB_STRTOOFF(str, NULL, 10);
785 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200786 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000787 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200788 G.got_clen = 1;
789 continue;
790 }
791 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100792 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200793 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100794 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200795 }
796 if (key == KEY_location && status >= 300) {
797 if (--redir_limit == 0)
798 bb_error_msg_and_die("too many redirections");
799 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100800 if (str[0] == '/') {
801 free(target.allocated);
802 target.path = target.allocated = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200803 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100804 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200805 parse_url(str, &target);
806 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100807 free(server.allocated);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200808 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200809 /* strip_ipv6_scope_id(target.host); - no! */
810 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200811 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000812 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200813 goto resolve_lsa;
814 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000815 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200816 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000817 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200818 }
819// if (status >= 300)
820// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000821
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200822 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000823 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000824
825 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000826 /*
827 * FTP session
828 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200829 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000830 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000831
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100832 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100833
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100834 if (!(option_mask32 & WGET_OPT_SPIDER)) {
835 if (output_fd < 0) {
836 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
837 /* compat with wget: -O FILE can overwrite */
838 if (option_mask32 & WGET_OPT_OUTNAME)
839 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
840 output_fd = xopen(G.fname_out, o_flags);
841 }
842 retrieve_file_data(dfp, output_fd);
843 xclose(output_fd);
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000844 }
Eric Andersen79757c92001-04-05 21:45:54 +0000845
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200846 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100847 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000848 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100849 if (ftpcmd(NULL, NULL, sfp) != 226)
850 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
851 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000852 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100853 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000854
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100855 free(server.allocated);
856 free(target.allocated);
857 free(fname_out_alloc);
858
Denis Vlasenko77105632007-09-24 15:04:00 +0000859 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000860}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100861
862int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
863int wget_main(int argc UNUSED_PARAM, char **argv)
864{
865#if ENABLE_FEATURE_WGET_LONG_OPTIONS
866 static const char wget_longopts[] ALIGN1 =
867 /* name, has_arg, val */
868 "continue\0" No_argument "c"
869//FIXME: -s isn't --spider, it's --save-headers!
870 "spider\0" No_argument "s"
871 "quiet\0" No_argument "q"
872 "output-document\0" Required_argument "O"
873 "directory-prefix\0" Required_argument "P"
874 "proxy\0" Required_argument "Y"
875 "user-agent\0" Required_argument "U"
876#if ENABLE_FEATURE_WGET_TIMEOUT
877 "timeout\0" Required_argument "T"
878#endif
879 /* Ignored: */
880 // "tries\0" Required_argument "t"
881 /* Ignored (we always use PASV): */
882 "passive-ftp\0" No_argument "\xff"
883 "header\0" Required_argument "\xfe"
884 "post-data\0" Required_argument "\xfd"
885 /* Ignored (we don't do ssl) */
886 "no-check-certificate\0" No_argument "\xfc"
887 ;
888#endif
889
890 int exitcode;
891#if ENABLE_FEATURE_WGET_LONG_OPTIONS
892 llist_t *headers_llist = NULL;
893#endif
894
895 INIT_G();
896
897 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
898 G.proxy_flag = "on"; /* use proxies if env vars are set */
899 G.user_agent = "Wget"; /* "User-Agent" header field */
900
901#if ENABLE_FEATURE_WGET_LONG_OPTIONS
902 applet_long_options = wget_longopts;
903#endif
904 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
905 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
906 &G.fname_out, &G.dir_prefix,
907 &G.proxy_flag, &G.user_agent,
908 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
909 NULL /* -t RETRIES */
910 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
911 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
912 );
913 argv += optind;
914
915#if ENABLE_FEATURE_WGET_LONG_OPTIONS
916 if (headers_llist) {
917 int size = 1;
918 char *cp;
919 llist_t *ll = headers_llist;
920 while (ll) {
921 size += strlen(ll->data) + 2;
922 ll = ll->link;
923 }
924 G.extra_headers = cp = xmalloc(size);
925 while (headers_llist) {
926 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
927 }
928 }
929#endif
930
931 exitcode = 0;
932 while (*argv)
933 exitcode |= download_one_url(*argv++);
934
935 return exitcode;
936}