blob: b5c808df46e6ecc8a2c9b64293ec919a24bd583b [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
16//usage: " [--no-check-certificate] [-U|--user-agent AGENT]"
17//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
18//usage: )
19//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
20//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
21//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
22//usage: )
23//usage:#define wget_full_usage "\n\n"
24//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010025//usage: "\n -s Spider mode - only check file existence"
26//usage: "\n -c Continue retrieval of aborted transfer"
27//usage: "\n -q Quiet"
28//usage: "\n -P DIR Save to DIR (default .)"
29//usage: IF_FEATURE_WGET_TIMEOUT(
30//usage: "\n -T SEC Network read timeout is SEC seconds"
31//usage: )
32//usage: "\n -O FILE Save to FILE ('-' for stdout)"
33//usage: "\n -U STR Use STR for User-Agent header"
34//usage: "\n -Y Use proxy ('on' or 'off')"
35
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000036#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000037
Denys Vlasenkof836f012011-02-10 23:02:28 +010038//#define log_io(...) bb_error_msg(__VA_ARGS__)
39#define log_io(...) ((void)0)
40
41
Eric Andersen79757c92001-04-05 21:45:54 +000042struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010043 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000044 const char *path;
45 const char *user;
46 char *host;
47 int port;
48 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000049};
50
Denis Vlasenko77105632007-09-24 15:04:00 +000051
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000053struct globals {
54 off_t content_len; /* Content-length of the file */
55 off_t beg_range; /* Range at which continue begins */
56#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000057 off_t transferred; /* Number of bytes transferred so far */
58 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010059 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000060#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +010061 char *dir_prefix;
62#if ENABLE_FEATURE_WGET_LONG_OPTIONS
63 char *post_data;
64 char *extra_headers;
65#endif
66 char *fname_out; /* where to direct output (-O) */
67 const char *proxy_flag; /* Use proxies if env vars are set */
68 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020069#if ENABLE_FEATURE_WGET_TIMEOUT
70 unsigned timeout_seconds;
71#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010072 int output_fd;
73 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020074 smallint chunked; /* chunked transfer encoding */
75 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010076 /* Local downloads do benefit from big buffer.
77 * With 512 byte buffer, it was measured to be
78 * an order of magnitude slower than with big one.
79 */
80 uint64_t just_to_align_next_member;
81 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010082} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010083#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020084#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010085 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020086 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
87} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000088
89
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090/* Must match option string! */
91enum {
92 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020093 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094 WGET_OPT_QUIET = (1 << 2),
95 WGET_OPT_OUTNAME = (1 << 3),
96 WGET_OPT_PREFIX = (1 << 4),
97 WGET_OPT_PROXY = (1 << 5),
98 WGET_OPT_USER_AGENT = (1 << 6),
99 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
100 WGET_OPT_RETRIES = (1 << 8),
101 WGET_OPT_PASSIVE = (1 << 9),
102 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
103 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
104};
105
106enum {
107 PROGRESS_START = -1,
108 PROGRESS_END = 0,
109 PROGRESS_BUMP = 1,
110};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000111#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000112static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000113{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200114 if (option_mask32 & WGET_OPT_QUIET)
115 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000116
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200117 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100118 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000119
Denys Vlasenko2384a352011-02-15 00:58:36 +0100120 bb_progress_update(&G.pmt,
121 G.beg_range,
122 G.transferred,
123 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
124 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000125
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200126 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100127 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200128 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100129 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000130 }
131}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200132#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000133static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000134#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000135
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000136
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200137/* IPv6 knows scoped address types i.e. link and site local addresses. Link
138 * local addresses can have a scope identifier to specify the
139 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
140 * identifier is only valid on a single node.
141 *
142 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
143 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
144 * in the Host header as invalid requests, see
145 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
146 */
147static void strip_ipv6_scope_id(char *host)
148{
149 char *scope, *cp;
150
151 /* bbox wget actually handles IPv6 addresses without [], like
152 * wget "http://::1/xxx", but this is not standard.
153 * To save code, _here_ we do not support it. */
154
155 if (host[0] != '[')
156 return; /* not IPv6 */
157
158 scope = strchr(host, '%');
159 if (!scope)
160 return;
161
162 /* Remove the IPv6 zone identifier from the host address */
163 cp = strchr(host, ']');
164 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
165 /* malformed address (not "[xx]:nn" or "[xx]") */
166 return;
167 }
168
169 /* cp points to "]...", scope points to "%eth0]..." */
170 overlapping_strcpy(scope, cp);
171}
172
Denis Vlasenko9cade082006-11-21 10:43:02 +0000173#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100174/* Base64-encode character string. */
175static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000176{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000177 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100178 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
179 len = sizeof(G.wget_buf)/4*3 - 10;
180 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
181 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000182}
183#endif
184
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200185static char* sanitize_string(char *s)
186{
187 unsigned char *p = (void *) s;
188 while (*p >= ' ')
189 p++;
190 *p = '\0';
191 return s;
192}
193
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000194static FILE *open_socket(len_and_sockaddr *lsa)
195{
196 FILE *fp;
197
198 /* glibc 2.4 seems to try seeking on it - ??! */
199 /* hopefully it understands what ESPIPE means... */
200 fp = fdopen(xconnect_stream(lsa), "r+");
201 if (fp == NULL)
Tanguy Pruvot8aeb3712011-06-30 08:59:26 +0200202 bb_perror_msg_and_die("%s", bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000203
204 return fp;
205}
206
Denys Vlasenkof836f012011-02-10 23:02:28 +0100207/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
208static char fgets_and_trim(FILE *fp)
209{
210 char c;
211 char *buf_ptr;
212
213 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
214 bb_perror_msg_and_die("error getting response");
215
216 buf_ptr = strchrnul(G.wget_buf, '\n');
217 c = *buf_ptr;
218 *buf_ptr = '\0';
219 buf_ptr = strchrnul(G.wget_buf, '\r');
220 *buf_ptr = '\0';
221
222 log_io("< %s", G.wget_buf);
223
224 return c;
225}
226
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100227static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000228{
229 int result;
230 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100231 if (!s2)
232 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000233 fprintf(fp, "%s%s\r\n", s1, s2);
234 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100235 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000236 }
237
238 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100239 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100240 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000241
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100242 G.wget_buf[3] = '\0';
243 result = xatoi_positive(G.wget_buf);
244 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245 return result;
246}
247
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100248static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000249{
250 char *url, *p, *sp;
251
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100252 free(h->allocated);
253 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000254
255 if (strncmp(url, "http://", 7) == 0) {
256 h->port = bb_lookup_port("http", "tcp", 80);
257 h->host = url + 7;
258 h->is_ftp = 0;
259 } else if (strncmp(url, "ftp://", 6) == 0) {
260 h->port = bb_lookup_port("ftp", "tcp", 21);
261 h->host = url + 6;
262 h->is_ftp = 1;
263 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200264 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000265
266 // FYI:
267 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
268 // 'GET /?var=a/b HTTP 1.0'
269 // and saves 'index.html?var=a%2Fb' (we save 'b')
270 // wget 'http://busybox.net?login=john@doe':
271 // request: 'GET /?login=john@doe HTTP/1.0'
272 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
273 // wget 'http://busybox.net#test/test':
274 // request: 'GET / HTTP/1.0'
275 // saves: 'index.html' (we save 'test')
276 //
277 // We also don't add unique .N suffix if file exists...
278 sp = strchr(h->host, '/');
279 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
280 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
281 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000282 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000283 } else if (*sp == '/') {
284 *sp = '\0';
285 h->path = sp + 1;
286 } else { // '#' or '?'
287 // http://busybox.net?login=john@doe is a valid URL
288 // memmove converts to:
289 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000290 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000291 h->host--;
292 sp[-1] = '\0';
293 h->path = sp;
294 }
295
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200296 // We used to set h->user to NULL here, but this interferes
297 // with handling of code 302 ("object was moved")
298
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000299 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000300 if (sp != NULL) {
Tanguy Pruvot8a6c2c22012-04-28 00:24:09 +0200301 // URL-decode "user:password" string before base64-encoding:
302 // wget http://test:my%20pass@example.com should send
303 // Authorization: Basic dGVzdDpteSBwYXNz
304 // which decodes to "test:my pass".
305 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306 *sp = '\0';
Tanguy Pruvot8a6c2c22012-04-28 00:24:09 +0200307 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000308 h->host = sp + 1;
309 }
310
311 sp = h->host;
312}
313
Denys Vlasenkof836f012011-02-10 23:02:28 +0100314static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000315{
316 char *s, *hdrval;
317 int c;
318
319 /* *istrunc = 0; */
320
321 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100322 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000323
Denys Vlasenkof836f012011-02-10 23:02:28 +0100324 /* end of the headers? */
325 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000326 return NULL;
327
328 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100329 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200330 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100331 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200332 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000333
334 /* verify we are at the end of the header name */
335 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100336 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337
338 /* locate the start of the header value */
339 *s++ = '\0';
340 hdrval = skip_whitespace(s);
341
Denys Vlasenkof836f012011-02-10 23:02:28 +0100342 if (c != '\n') {
343 /* Rats! The buffer isn't big enough to hold the entire header value */
344 while (c = getc(fp), c != EOF && c != '\n')
345 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000346 }
347
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000348 return hdrval;
349}
350
Denys Vlasenko6030af92012-06-13 17:31:07 +0200351static void reset_beg_range_to_zero(void)
352{
353 //bb_error_msg("restart failed");
354 G.beg_range = 0;
355 xlseek(G.output_fd, 0, SEEK_SET);
356 ftruncate(G.output_fd, 0);
357}
358
Denys Vlasenko7f432802009-06-28 01:02:24 +0200359static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
360{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200361 FILE *sfp;
362 char *str;
363 int port;
364
365 if (!target->user)
366 target->user = xstrdup("anonymous:busybox@");
367
368 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100369 if (ftpcmd(NULL, NULL, sfp) != 220)
370 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200371
372 /*
373 * Splitting username:password pair,
374 * trying to log in
375 */
376 str = strchr(target->user, ':');
377 if (str)
378 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100379 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200380 case 230:
381 break;
382 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100383 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200384 break;
385 /* fall through (failed login) */
386 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100387 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200388 }
389
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200391
392 /*
393 * Querying file size
394 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100395 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
396 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100397 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200398 bb_error_msg_and_die("SIZE value is garbage");
399 }
400 G.got_clen = 1;
401 }
402
403 /*
404 * Entering passive mode
405 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100406 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200407 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 }
410 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
411 // Server's IP is N1.N2.N3.N4 (we ignore it)
412 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100413 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200414 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100415 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200416 if (!str) goto pasv_error;
417 port = xatou_range(str+1, 0, 255);
418 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100419 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200420 if (!str) goto pasv_error;
421 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200422 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200423
424 *dfpp = open_socket(lsa);
425
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100426 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
428 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100429 G.content_len -= G.beg_range;
Denys Vlasenko6030af92012-06-13 17:31:07 +0200430 else
431 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200432 }
433
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100434 if (ftpcmd("RETR ", target->path, sfp) > 150)
435 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200436
437 return sfp;
438}
439
Denys Vlasenko2384a352011-02-15 00:58:36 +0100440static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200441{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200442#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
443# if ENABLE_FEATURE_WGET_TIMEOUT
444 unsigned second_cnt;
445# endif
446 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200447
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200448 polldata.fd = fileno(dfp);
449 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200450#endif
451 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200452
453 if (G.chunked)
454 goto get_clen;
455
456 /* Loops only if chunked */
457 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100458
459#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
460 /* Must use nonblocking I/O, otherwise fread will loop
461 * and *block* until it reads full buffer,
462 * which messes up progress bar and/or timeout logic.
463 * Because of nonblocking I/O, we need to dance
464 * very carefully around EAGAIN. See explanation at
465 * clearerr() call.
466 */
467 ndelay_on(polldata.fd);
468#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100469 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200470 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100471 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100473 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100474 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100475 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100476 if ((int)G.content_len <= 0)
477 break;
478 rdsz = (unsigned)G.content_len;
479 }
480 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100481
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200482#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
483# if ENABLE_FEATURE_WGET_TIMEOUT
484 second_cnt = G.timeout_seconds;
485# endif
486 while (1) {
487 if (safe_poll(&polldata, 1, 1000) != 0)
488 break; /* error, EOF, or data is available */
489# if ENABLE_FEATURE_WGET_TIMEOUT
490 if (second_cnt != 0 && --second_cnt == 0) {
491 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100492 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200493 }
494# endif
495 /* Needed for "stalled" indicator */
496 progress_meter(PROGRESS_BUMP);
497 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100498
Denys Vlasenko8766a792011-02-11 21:42:00 +0100499 /* fread internally uses read loop, which in our case
500 * is usually exited when we get EAGAIN.
501 * In this case, libc sets error marker on the stream.
502 * Need to clear it before next fread to avoid possible
503 * rare false positive ferror below. Rare because usually
504 * fread gets more than zero bytes, and we don't fall
505 * into if (n <= 0) ...
506 */
507 clearerr(dfp);
508 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100509#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100510 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100511 /* man fread:
512 * If error occurs, or EOF is reached, the return value
513 * is a short item count (or zero).
514 * fread does not distinguish between EOF and error.
515 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200516 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100517#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
518 if (errno == EAGAIN) /* poll lied, there is no data? */
519 continue; /* yes */
520#endif
521 if (ferror(dfp))
Tanguy Pruvotf7ae0a22011-07-04 05:30:48 +0200522 bb_perror_msg_and_die(bb_msg_read_error);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100523 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200524 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100525
Denys Vlasenko2384a352011-02-15 00:58:36 +0100526 xwrite(G.output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100527
Denys Vlasenko7f432802009-06-28 01:02:24 +0200528#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100529 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200530 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200531#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100532 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100533 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100534 if (G.content_len == 0)
535 break;
536 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200537 }
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100538#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
539 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100540 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100541#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200542 if (!G.chunked)
543 break;
544
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100545 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200546 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100547 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100548 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200549 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100550 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200551 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100552 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200553 }
554
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100555 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100556 G.chunked = 0; /* makes it show 100% even for chunked download */
557 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200558 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200559}
560
Pere Orga53695632011-02-16 20:09:36 +0100561static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000562{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100563 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200564 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100565 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200566 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000567 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100568 char *proxy = NULL;
569 char *fname_out_alloc;
Denys Vlasenko8a90e612012-02-04 19:55:27 +0100570 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100571 struct host_info server;
572 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000573
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100574 server.allocated = NULL;
575 target.allocated = NULL;
576 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200577 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100578
579 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000580
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000581 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100582 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000583 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000584 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko2384a352011-02-15 00:58:36 +0100585 use_proxy = (proxy && proxy[0]);
586 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000587 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000588 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200589 if (!use_proxy) {
590 server.port = target.port;
591 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100592 //free(server.allocated); - can't be non-NULL
593 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200594 } else {
595 server.host = target.host;
596 }
597 }
598
599 if (ENABLE_FEATURE_IPV6)
600 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000601
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100602 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100603 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100604 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100605 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000606 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100607 if (G.fname_out[0] == '/' || !G.fname_out[0])
608 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000609 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenko625f2182011-03-21 00:29:37 +0100610 else {
611 if (G.dir_prefix)
612 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
613 else {
614 /* redirects may free target.path later, need to make a copy */
615 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
616 }
617 }
Eric Andersen29edd002000-12-09 16:55:35 +0000618 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000619#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100620 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000621#endif
622
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000623 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100624 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100625 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100626 G.output_fd = open(G.fname_out, O_WRONLY);
627 if (G.output_fd >= 0) {
628 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000629 }
630 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100631 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000632 }
633
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200634 redir_limit = 5;
635 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000636 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100637 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200638 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
639 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
640 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000641 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200642 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100643 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
644 G.got_clen = 0;
645 G.chunked = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000646 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000647 /*
648 * HTTP session
649 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200650 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200651 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200652
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100653
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200654 /* Open socket to http server */
655 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200656
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200657 /* Send HTTP request */
658 if (use_proxy) {
659 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
660 target.is_ftp ? "f" : "ht", target.host,
661 target.path);
662 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100663 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200664 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
665 else
666 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
667 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000668
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200669 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100670 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000671
Denys Vlasenko9213a552011-02-10 13:23:45 +0100672 /* Ask server to close the connection as soon as we are done
673 * (IOW: we do not intend to send more requests)
674 */
675 fprintf(sfp, "Connection: close\r\n");
676
Denis Vlasenko9cade082006-11-21 10:43:02 +0000677#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200678 if (target.user) {
679 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100680 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200681 }
682 if (use_proxy && server.user) {
683 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100684 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200685 }
Eric Andersen79757c92001-04-05 21:45:54 +0000686#endif
687
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100688 if (G.beg_range)
689 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100690
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000691#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100692 if (G.extra_headers)
693 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000694
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100695 if (option_mask32 & WGET_OPT_POST_DATA) {
Denys Vlasenko9213a552011-02-10 13:23:45 +0100696 fprintf(sfp,
697 "Content-Type: application/x-www-form-urlencoded\r\n"
698 "Content-Length: %u\r\n"
699 "\r\n"
700 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +0200701 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +0100702 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200703 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000704#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100705 {
706 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200707 }
Eric Andersen79757c92001-04-05 21:45:54 +0000708
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200709 fflush(sfp);
710
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200711 /*
712 * Retrieve HTTP response line and check for "200" status code.
713 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000714 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100715 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000716
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100717 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200718 str = skip_non_whitespace(str);
719 str = skip_whitespace(str);
720 // FIXME: no error check
721 // xatou wouldn't work: "200 OK"
722 status = atoi(str);
723 switch (status) {
724 case 0:
725 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100726 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200727 /* eat all remaining headers */;
728 goto read_response;
729 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000730/*
731Response 204 doesn't say "null file", it says "metadata
732has changed but data didn't":
733
734"10.2.5 204 No Content
735The server has fulfilled the request but does not need to return
736an entity-body, and might want to return updated metainformation.
737The response MAY include new or updated metainformation in the form
738of entity-headers, which if present SHOULD be associated with
739the requested variant.
740
741If the client is a user agent, it SHOULD NOT change its document
742view from that which caused the request to be sent. This response
743is primarily intended to allow input for actions to take place
744without causing a change to the user agent's active document view,
745although any new or updated metainformation SHOULD be applied
746to the document currently in the user agent's active view.
747
748The 204 response MUST NOT include a message-body, and thus
749is always terminated by the first empty line after the header fields."
750
751However, in real world it was observed that some web servers
752(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
753*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200754 case 204:
Denys Vlasenko6030af92012-06-13 17:31:07 +0200755 if (G.beg_range != 0) {
756 /* "Range:..." was not honored by the server.
757 * Restart download from the beginning.
758 */
759 reset_beg_range_to_zero();
760 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200761 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200762 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200763 case 301:
764 case 302:
765 case 303:
766 break;
767 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100768 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000769 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200770 /* fall through */
771 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100772 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200773 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000774
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200775 /*
776 * Retrieve HTTP headers.
777 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100778 while ((str = gethdr(sfp)) != NULL) {
779 static const char keywords[] ALIGN1 =
780 "content-length\0""transfer-encoding\0""location\0";
781 enum {
782 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
783 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200784 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100785
786 /* gethdr converted "FOO:" string to lowercase */
787
Matthijs van de Water0d586662009-08-22 20:19:48 +0200788 /* strip trailing whitespace */
789 char *s = strchrnul(str, '\0') - 1;
790 while (s >= str && (*s == ' ' || *s == '\t')) {
791 *s = '\0';
792 s--;
793 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100794 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200795 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100796 G.content_len = BB_STRTOOFF(str, NULL, 10);
797 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200798 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000799 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200800 G.got_clen = 1;
801 continue;
802 }
803 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100804 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200805 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100806 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200807 }
808 if (key == KEY_location && status >= 300) {
809 if (--redir_limit == 0)
810 bb_error_msg_and_die("too many redirections");
811 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100812 if (str[0] == '/') {
Denys Vlasenko8a90e612012-02-04 19:55:27 +0100813 free(redirected_path);
814 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200815 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100816 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200817 parse_url(str, &target);
818 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100819 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100820 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200821 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200822 /* strip_ipv6_scope_id(target.host); - no! */
823 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200824 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000825 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200826 goto resolve_lsa;
827 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000828 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200829 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000830 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200831 }
832// if (status >= 300)
833// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000834
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200835 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000836 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000837
838 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000839 /*
840 * FTP session
841 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200842 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000843 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000844
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100845 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100846
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100847 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100848 if (G.output_fd < 0)
849 G.output_fd = xopen(G.fname_out, G.o_flags);
850 retrieve_file_data(dfp);
851 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
852 xclose(G.output_fd);
853 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100854 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000855 }
Eric Andersen79757c92001-04-05 21:45:54 +0000856
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200857 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100858 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000859 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100860 if (ftpcmd(NULL, NULL, sfp) != 226)
861 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
862 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000863 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100864 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000865
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100866 free(server.allocated);
867 free(target.allocated);
868 free(fname_out_alloc);
Denys Vlasenko8a90e612012-02-04 19:55:27 +0100869 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +0000870}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100871
872int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
873int wget_main(int argc UNUSED_PARAM, char **argv)
874{
875#if ENABLE_FEATURE_WGET_LONG_OPTIONS
876 static const char wget_longopts[] ALIGN1 =
877 /* name, has_arg, val */
878 "continue\0" No_argument "c"
879//FIXME: -s isn't --spider, it's --save-headers!
880 "spider\0" No_argument "s"
881 "quiet\0" No_argument "q"
882 "output-document\0" Required_argument "O"
883 "directory-prefix\0" Required_argument "P"
884 "proxy\0" Required_argument "Y"
885 "user-agent\0" Required_argument "U"
886#if ENABLE_FEATURE_WGET_TIMEOUT
887 "timeout\0" Required_argument "T"
888#endif
889 /* Ignored: */
890 // "tries\0" Required_argument "t"
891 /* Ignored (we always use PASV): */
892 "passive-ftp\0" No_argument "\xff"
893 "header\0" Required_argument "\xfe"
894 "post-data\0" Required_argument "\xfd"
895 /* Ignored (we don't do ssl) */
896 "no-check-certificate\0" No_argument "\xfc"
897 ;
898#endif
899
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100900#if ENABLE_FEATURE_WGET_LONG_OPTIONS
901 llist_t *headers_llist = NULL;
902#endif
903
904 INIT_G();
905
906 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
907 G.proxy_flag = "on"; /* use proxies if env vars are set */
908 G.user_agent = "Wget"; /* "User-Agent" header field */
909
910#if ENABLE_FEATURE_WGET_LONG_OPTIONS
911 applet_long_options = wget_longopts;
912#endif
913 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
914 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
915 &G.fname_out, &G.dir_prefix,
916 &G.proxy_flag, &G.user_agent,
917 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
918 NULL /* -t RETRIES */
919 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
920 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
921 );
922 argv += optind;
923
924#if ENABLE_FEATURE_WGET_LONG_OPTIONS
925 if (headers_llist) {
926 int size = 1;
927 char *cp;
928 llist_t *ll = headers_llist;
929 while (ll) {
930 size += strlen(ll->data) + 2;
931 ll = ll->link;
932 }
933 G.extra_headers = cp = xmalloc(size);
934 while (headers_llist) {
935 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
936 }
937 }
938#endif
939
Denys Vlasenko2384a352011-02-15 00:58:36 +0100940 G.output_fd = -1;
941 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
942 if (G.fname_out) { /* -O FILE ? */
943 if (LONE_DASH(G.fname_out)) { /* -O - ? */
944 G.output_fd = 1;
945 option_mask32 &= ~WGET_OPT_CONTINUE;
946 }
947 /* compat with wget: -O FILE can overwrite */
948 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
949 }
950
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100951 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +0100952 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100953
Denys Vlasenko28556b92011-02-15 11:03:53 +0100954 if (G.output_fd >= 0)
955 xclose(G.output_fd);
956
Pere Orga53695632011-02-16 20:09:36 +0100957 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100958}