blob: 9d50437d8c0615028043b47d4f633bdb99eb0af9 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010018 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000019 const char *path;
20 const char *user;
21 char *host;
22 int port;
23 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000024};
25
Denis Vlasenko77105632007-09-24 15:04:00 +000026
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020027/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000028struct globals {
29 off_t content_len; /* Content-length of the file */
30 off_t beg_range; /* Range at which continue begins */
31#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000032 off_t transferred; /* Number of bytes transferred so far */
33 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010034 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000035#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +010036 char *dir_prefix;
37#if ENABLE_FEATURE_WGET_LONG_OPTIONS
38 char *post_data;
39 char *extra_headers;
40#endif
41 char *fname_out; /* where to direct output (-O) */
42 const char *proxy_flag; /* Use proxies if env vars are set */
43 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020044#if ENABLE_FEATURE_WGET_TIMEOUT
45 unsigned timeout_seconds;
46#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010047 int output_fd;
48 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020049 smallint chunked; /* chunked transfer encoding */
50 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010051 /* Local downloads do benefit from big buffer.
52 * With 512 byte buffer, it was measured to be
53 * an order of magnitude slower than with big one.
54 */
55 uint64_t just_to_align_next_member;
56 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010057} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010058#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020059#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010060 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020061 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
62} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000063
64
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020065/* Must match option string! */
66enum {
67 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020068 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020069 WGET_OPT_QUIET = (1 << 2),
70 WGET_OPT_OUTNAME = (1 << 3),
71 WGET_OPT_PREFIX = (1 << 4),
72 WGET_OPT_PROXY = (1 << 5),
73 WGET_OPT_USER_AGENT = (1 << 6),
74 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
75 WGET_OPT_RETRIES = (1 << 8),
76 WGET_OPT_PASSIVE = (1 << 9),
77 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
78 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
79};
80
81enum {
82 PROGRESS_START = -1,
83 PROGRESS_END = 0,
84 PROGRESS_BUMP = 1,
85};
Denis Vlasenko9cade082006-11-21 10:43:02 +000086#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000087static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089 if (option_mask32 & WGET_OPT_QUIET)
90 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000091
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020092 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010093 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000094
Denys Vlasenko2384a352011-02-15 00:58:36 +010095 bb_progress_update(&G.pmt,
96 G.beg_range,
97 G.transferred,
98 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
99 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000100
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200101 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100102 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200103 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100104 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000105 }
106}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200107#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000108static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000109#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000110
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000111
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200112/* IPv6 knows scoped address types i.e. link and site local addresses. Link
113 * local addresses can have a scope identifier to specify the
114 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
115 * identifier is only valid on a single node.
116 *
117 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
118 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
119 * in the Host header as invalid requests, see
120 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
121 */
122static void strip_ipv6_scope_id(char *host)
123{
124 char *scope, *cp;
125
126 /* bbox wget actually handles IPv6 addresses without [], like
127 * wget "http://::1/xxx", but this is not standard.
128 * To save code, _here_ we do not support it. */
129
130 if (host[0] != '[')
131 return; /* not IPv6 */
132
133 scope = strchr(host, '%');
134 if (!scope)
135 return;
136
137 /* Remove the IPv6 zone identifier from the host address */
138 cp = strchr(host, ']');
139 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
140 /* malformed address (not "[xx]:nn" or "[xx]") */
141 return;
142 }
143
144 /* cp points to "]...", scope points to "%eth0]..." */
145 overlapping_strcpy(scope, cp);
146}
147
Denis Vlasenko9cade082006-11-21 10:43:02 +0000148#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100149/* Base64-encode character string. */
150static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000151{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000152 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100153 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
154 len = sizeof(G.wget_buf)/4*3 - 10;
155 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
156 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000157}
158#endif
159
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200160static char* sanitize_string(char *s)
161{
162 unsigned char *p = (void *) s;
163 while (*p >= ' ')
164 p++;
165 *p = '\0';
166 return s;
167}
168
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000169static FILE *open_socket(len_and_sockaddr *lsa)
170{
171 FILE *fp;
172
173 /* glibc 2.4 seems to try seeking on it - ??! */
174 /* hopefully it understands what ESPIPE means... */
175 fp = fdopen(xconnect_stream(lsa), "r+");
176 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100177 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000178
179 return fp;
180}
181
Denys Vlasenkof836f012011-02-10 23:02:28 +0100182/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
183static char fgets_and_trim(FILE *fp)
184{
185 char c;
186 char *buf_ptr;
187
188 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
189 bb_perror_msg_and_die("error getting response");
190
191 buf_ptr = strchrnul(G.wget_buf, '\n');
192 c = *buf_ptr;
193 *buf_ptr = '\0';
194 buf_ptr = strchrnul(G.wget_buf, '\r');
195 *buf_ptr = '\0';
196
197 log_io("< %s", G.wget_buf);
198
199 return c;
200}
201
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100202static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000203{
204 int result;
205 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100206 if (!s2)
207 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000208 fprintf(fp, "%s%s\r\n", s1, s2);
209 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100210 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000211 }
212
213 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100214 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100215 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000216
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100217 G.wget_buf[3] = '\0';
218 result = xatoi_positive(G.wget_buf);
219 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000220 return result;
221}
222
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100223static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000224{
225 char *url, *p, *sp;
226
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100227 free(h->allocated);
228 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000229
230 if (strncmp(url, "http://", 7) == 0) {
231 h->port = bb_lookup_port("http", "tcp", 80);
232 h->host = url + 7;
233 h->is_ftp = 0;
234 } else if (strncmp(url, "ftp://", 6) == 0) {
235 h->port = bb_lookup_port("ftp", "tcp", 21);
236 h->host = url + 6;
237 h->is_ftp = 1;
238 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200239 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000240
241 // FYI:
242 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
243 // 'GET /?var=a/b HTTP 1.0'
244 // and saves 'index.html?var=a%2Fb' (we save 'b')
245 // wget 'http://busybox.net?login=john@doe':
246 // request: 'GET /?login=john@doe HTTP/1.0'
247 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
248 // wget 'http://busybox.net#test/test':
249 // request: 'GET / HTTP/1.0'
250 // saves: 'index.html' (we save 'test')
251 //
252 // We also don't add unique .N suffix if file exists...
253 sp = strchr(h->host, '/');
254 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
255 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
256 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000257 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000258 } else if (*sp == '/') {
259 *sp = '\0';
260 h->path = sp + 1;
261 } else { // '#' or '?'
262 // http://busybox.net?login=john@doe is a valid URL
263 // memmove converts to:
264 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000265 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000266 h->host--;
267 sp[-1] = '\0';
268 h->path = sp;
269 }
270
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200271 // We used to set h->user to NULL here, but this interferes
272 // with handling of code 302 ("object was moved")
273
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000274 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000275 if (sp != NULL) {
276 h->user = h->host;
277 *sp = '\0';
278 h->host = sp + 1;
279 }
280
281 sp = h->host;
282}
283
Denys Vlasenkof836f012011-02-10 23:02:28 +0100284static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000285{
286 char *s, *hdrval;
287 int c;
288
289 /* *istrunc = 0; */
290
291 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100292 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000293
Denys Vlasenkof836f012011-02-10 23:02:28 +0100294 /* end of the headers? */
295 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 return NULL;
297
298 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100299 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200300 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100301 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200302 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000303
304 /* verify we are at the end of the header name */
305 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100306 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000307
308 /* locate the start of the header value */
309 *s++ = '\0';
310 hdrval = skip_whitespace(s);
311
Denys Vlasenkof836f012011-02-10 23:02:28 +0100312 if (c != '\n') {
313 /* Rats! The buffer isn't big enough to hold the entire header value */
314 while (c = getc(fp), c != EOF && c != '\n')
315 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000316 }
317
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000318 return hdrval;
319}
320
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000321#if ENABLE_FEATURE_WGET_LONG_OPTIONS
322static char *URL_escape(const char *str)
323{
324 /* URL encode, see RFC 2396 */
325 char *dst;
326 char *res = dst = xmalloc(strlen(str) * 3 + 1);
327 unsigned char c;
328
329 while (1) {
330 c = *str++;
331 if (c == '\0'
332 /* || strchr("!&'()*-.=_~", c) - more code */
333 || c == '!'
334 || c == '&'
335 || c == '\''
336 || c == '('
337 || c == ')'
338 || c == '*'
339 || c == '-'
340 || c == '.'
341 || c == '='
342 || c == '_'
343 || c == '~'
344 || (c >= '0' && c <= '9')
345 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
346 ) {
347 *dst++ = c;
348 if (c == '\0')
349 return res;
350 } else {
351 *dst++ = '%';
352 *dst++ = bb_hexdigits_upcase[c >> 4];
353 *dst++ = bb_hexdigits_upcase[c & 0xf];
354 }
355 }
356}
357#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000358
Denys Vlasenko7f432802009-06-28 01:02:24 +0200359static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
360{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200361 FILE *sfp;
362 char *str;
363 int port;
364
365 if (!target->user)
366 target->user = xstrdup("anonymous:busybox@");
367
368 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100369 if (ftpcmd(NULL, NULL, sfp) != 220)
370 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200371
372 /*
373 * Splitting username:password pair,
374 * trying to log in
375 */
376 str = strchr(target->user, ':');
377 if (str)
378 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100379 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200380 case 230:
381 break;
382 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100383 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200384 break;
385 /* fall through (failed login) */
386 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100387 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200388 }
389
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200391
392 /*
393 * Querying file size
394 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100395 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
396 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100397 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200398 bb_error_msg_and_die("SIZE value is garbage");
399 }
400 G.got_clen = 1;
401 }
402
403 /*
404 * Entering passive mode
405 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100406 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200407 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 }
410 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
411 // Server's IP is N1.N2.N3.N4 (we ignore it)
412 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100413 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200414 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100415 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200416 if (!str) goto pasv_error;
417 port = xatou_range(str+1, 0, 255);
418 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100419 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200420 if (!str) goto pasv_error;
421 port += xatou_range(str+1, 0, 255) * 256;
422 set_nport(lsa, htons(port));
423
424 *dfpp = open_socket(lsa);
425
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100426 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
428 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100429 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 }
431
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100432 if (ftpcmd("RETR ", target->path, sfp) > 150)
433 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200434
435 return sfp;
436}
437
Denys Vlasenko2384a352011-02-15 00:58:36 +0100438static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200439{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200440#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
441# if ENABLE_FEATURE_WGET_TIMEOUT
442 unsigned second_cnt;
443# endif
444 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200445
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200446 polldata.fd = fileno(dfp);
447 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200448#endif
449 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200450
451 if (G.chunked)
452 goto get_clen;
453
454 /* Loops only if chunked */
455 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100456
457#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
458 /* Must use nonblocking I/O, otherwise fread will loop
459 * and *block* until it reads full buffer,
460 * which messes up progress bar and/or timeout logic.
461 * Because of nonblocking I/O, we need to dance
462 * very carefully around EAGAIN. See explanation at
463 * clearerr() call.
464 */
465 ndelay_on(polldata.fd);
466#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100467 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200468 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100469 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200470
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100471 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100472 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100473 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100474 if ((int)G.content_len <= 0)
475 break;
476 rdsz = (unsigned)G.content_len;
477 }
478 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100479
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200480#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
481# if ENABLE_FEATURE_WGET_TIMEOUT
482 second_cnt = G.timeout_seconds;
483# endif
484 while (1) {
485 if (safe_poll(&polldata, 1, 1000) != 0)
486 break; /* error, EOF, or data is available */
487# if ENABLE_FEATURE_WGET_TIMEOUT
488 if (second_cnt != 0 && --second_cnt == 0) {
489 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100490 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200491 }
492# endif
493 /* Needed for "stalled" indicator */
494 progress_meter(PROGRESS_BUMP);
495 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100496
Denys Vlasenko8766a792011-02-11 21:42:00 +0100497 /* fread internally uses read loop, which in our case
498 * is usually exited when we get EAGAIN.
499 * In this case, libc sets error marker on the stream.
500 * Need to clear it before next fread to avoid possible
501 * rare false positive ferror below. Rare because usually
502 * fread gets more than zero bytes, and we don't fall
503 * into if (n <= 0) ...
504 */
505 clearerr(dfp);
506 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100507#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100508 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100509 /* man fread:
510 * If error occurs, or EOF is reached, the return value
511 * is a short item count (or zero).
512 * fread does not distinguish between EOF and error.
513 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200514 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100515#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
516 if (errno == EAGAIN) /* poll lied, there is no data? */
517 continue; /* yes */
518#endif
519 if (ferror(dfp))
520 bb_perror_msg_and_die(bb_msg_read_error);
521 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200522 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100523
Denys Vlasenko2384a352011-02-15 00:58:36 +0100524 xwrite(G.output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100525
Denys Vlasenko7f432802009-06-28 01:02:24 +0200526#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100527 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200528 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200529#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100530 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100531 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100532 if (G.content_len == 0)
533 break;
534 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200535 }
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100536#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100538 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100539#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200540 if (!G.chunked)
541 break;
542
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100543 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200544 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100545 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100546 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200547 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100548 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200549 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100550 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200551 }
552
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100553 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100554 G.chunked = 0; /* makes it show 100% even for chunked download */
555 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200556 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200557}
558
Pere Orga53695632011-02-16 20:09:36 +0100559static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000560{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100561 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200562 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100563 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200564 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000565 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100566 char *proxy = NULL;
567 char *fname_out_alloc;
568 struct host_info server;
569 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000570
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100571 server.allocated = NULL;
572 target.allocated = NULL;
573 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200574 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100575
576 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000577
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000578 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100579 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000580 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000581 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko2384a352011-02-15 00:58:36 +0100582 use_proxy = (proxy && proxy[0]);
583 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000584 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000585 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200586 if (!use_proxy) {
587 server.port = target.port;
588 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100589 //free(server.allocated); - can't be non-NULL
590 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200591 } else {
592 server.host = target.host;
593 }
594 }
595
596 if (ENABLE_FEATURE_IPV6)
597 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000598
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100599 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100600 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100601 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100602 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000603 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100604 if (G.fname_out[0] == '/' || !G.fname_out[0])
605 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000606 /* -P DIR is considered only if there was no -O FILE */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100607 if (G.dir_prefix)
608 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Eric Andersen29edd002000-12-09 16:55:35 +0000609 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000610#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100611 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000612#endif
613
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000614 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100615 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100616 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100617 G.output_fd = open(G.fname_out, O_WRONLY);
618 if (G.output_fd >= 0) {
619 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000620 }
621 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100622 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000623 }
624
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200625 redir_limit = 5;
626 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000627 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100628 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200629 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
630 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
631 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000632 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200633 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100634 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
635 G.got_clen = 0;
636 G.chunked = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000637 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000638 /*
639 * HTTP session
640 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200641 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200642 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200643
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100644
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200645 /* Open socket to http server */
646 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200647
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200648 /* Send HTTP request */
649 if (use_proxy) {
650 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
651 target.is_ftp ? "f" : "ht", target.host,
652 target.path);
653 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100654 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200655 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
656 else
657 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
658 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000659
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200660 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100661 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000662
Denys Vlasenko9213a552011-02-10 13:23:45 +0100663 /* Ask server to close the connection as soon as we are done
664 * (IOW: we do not intend to send more requests)
665 */
666 fprintf(sfp, "Connection: close\r\n");
667
Denis Vlasenko9cade082006-11-21 10:43:02 +0000668#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200669 if (target.user) {
670 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100671 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200672 }
673 if (use_proxy && server.user) {
674 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100675 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200676 }
Eric Andersen79757c92001-04-05 21:45:54 +0000677#endif
678
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100679 if (G.beg_range)
680 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100681
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000682#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100683 if (G.extra_headers)
684 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000685
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100686 if (option_mask32 & WGET_OPT_POST_DATA) {
687 char *estr = URL_escape(G.post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100688 fprintf(sfp,
689 "Content-Type: application/x-www-form-urlencoded\r\n"
690 "Content-Length: %u\r\n"
691 "\r\n"
692 "%s",
693 (int) strlen(estr), estr
694 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200695 free(estr);
696 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000697#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100698 {
699 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200700 }
Eric Andersen79757c92001-04-05 21:45:54 +0000701
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200702 fflush(sfp);
703
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200704 /*
705 * Retrieve HTTP response line and check for "200" status code.
706 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000707 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100708 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000709
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100710 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200711 str = skip_non_whitespace(str);
712 str = skip_whitespace(str);
713 // FIXME: no error check
714 // xatou wouldn't work: "200 OK"
715 status = atoi(str);
716 switch (status) {
717 case 0:
718 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100719 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200720 /* eat all remaining headers */;
721 goto read_response;
722 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000723/*
724Response 204 doesn't say "null file", it says "metadata
725has changed but data didn't":
726
727"10.2.5 204 No Content
728The server has fulfilled the request but does not need to return
729an entity-body, and might want to return updated metainformation.
730The response MAY include new or updated metainformation in the form
731of entity-headers, which if present SHOULD be associated with
732the requested variant.
733
734If the client is a user agent, it SHOULD NOT change its document
735view from that which caused the request to be sent. This response
736is primarily intended to allow input for actions to take place
737without causing a change to the user agent's active document view,
738although any new or updated metainformation SHOULD be applied
739to the document currently in the user agent's active view.
740
741The 204 response MUST NOT include a message-body, and thus
742is always terminated by the first empty line after the header fields."
743
744However, in real world it was observed that some web servers
745(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
746*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200747 case 204:
748 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200749 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200750 case 301:
751 case 302:
752 case 303:
753 break;
754 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100755 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000756 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200757 /* fall through */
758 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100759 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200760 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000761
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200762 /*
763 * Retrieve HTTP headers.
764 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100765 while ((str = gethdr(sfp)) != NULL) {
766 static const char keywords[] ALIGN1 =
767 "content-length\0""transfer-encoding\0""location\0";
768 enum {
769 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
770 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200771 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100772
773 /* gethdr converted "FOO:" string to lowercase */
774
Matthijs van de Water0d586662009-08-22 20:19:48 +0200775 /* strip trailing whitespace */
776 char *s = strchrnul(str, '\0') - 1;
777 while (s >= str && (*s == ' ' || *s == '\t')) {
778 *s = '\0';
779 s--;
780 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100781 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200782 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100783 G.content_len = BB_STRTOOFF(str, NULL, 10);
784 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200785 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000786 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200787 G.got_clen = 1;
788 continue;
789 }
790 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100791 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200792 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100793 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200794 }
795 if (key == KEY_location && status >= 300) {
796 if (--redir_limit == 0)
797 bb_error_msg_and_die("too many redirections");
798 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100799 if (str[0] == '/') {
800 free(target.allocated);
801 target.path = target.allocated = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200802 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100803 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200804 parse_url(str, &target);
805 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100806 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100807 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200808 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200809 /* strip_ipv6_scope_id(target.host); - no! */
810 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200811 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000812 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200813 goto resolve_lsa;
814 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000815 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200816 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000817 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200818 }
819// if (status >= 300)
820// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000821
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200822 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000823 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000824
825 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000826 /*
827 * FTP session
828 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200829 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000830 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000831
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100832 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100833
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100834 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100835 if (G.output_fd < 0)
836 G.output_fd = xopen(G.fname_out, G.o_flags);
837 retrieve_file_data(dfp);
838 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
839 xclose(G.output_fd);
840 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100841 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000842 }
Eric Andersen79757c92001-04-05 21:45:54 +0000843
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200844 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100845 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000846 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100847 if (ftpcmd(NULL, NULL, sfp) != 226)
848 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
849 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000850 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100851 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000852
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100853 free(server.allocated);
854 free(target.allocated);
855 free(fname_out_alloc);
Eric Andersen96700832000-09-04 15:15:55 +0000856}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100857
858int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
859int wget_main(int argc UNUSED_PARAM, char **argv)
860{
861#if ENABLE_FEATURE_WGET_LONG_OPTIONS
862 static const char wget_longopts[] ALIGN1 =
863 /* name, has_arg, val */
864 "continue\0" No_argument "c"
865//FIXME: -s isn't --spider, it's --save-headers!
866 "spider\0" No_argument "s"
867 "quiet\0" No_argument "q"
868 "output-document\0" Required_argument "O"
869 "directory-prefix\0" Required_argument "P"
870 "proxy\0" Required_argument "Y"
871 "user-agent\0" Required_argument "U"
872#if ENABLE_FEATURE_WGET_TIMEOUT
873 "timeout\0" Required_argument "T"
874#endif
875 /* Ignored: */
876 // "tries\0" Required_argument "t"
877 /* Ignored (we always use PASV): */
878 "passive-ftp\0" No_argument "\xff"
879 "header\0" Required_argument "\xfe"
880 "post-data\0" Required_argument "\xfd"
881 /* Ignored (we don't do ssl) */
882 "no-check-certificate\0" No_argument "\xfc"
883 ;
884#endif
885
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100886#if ENABLE_FEATURE_WGET_LONG_OPTIONS
887 llist_t *headers_llist = NULL;
888#endif
889
890 INIT_G();
891
892 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;)
893 G.proxy_flag = "on"; /* use proxies if env vars are set */
894 G.user_agent = "Wget"; /* "User-Agent" header field */
895
896#if ENABLE_FEATURE_WGET_LONG_OPTIONS
897 applet_long_options = wget_longopts;
898#endif
899 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
900 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
901 &G.fname_out, &G.dir_prefix,
902 &G.proxy_flag, &G.user_agent,
903 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
904 NULL /* -t RETRIES */
905 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
906 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
907 );
908 argv += optind;
909
910#if ENABLE_FEATURE_WGET_LONG_OPTIONS
911 if (headers_llist) {
912 int size = 1;
913 char *cp;
914 llist_t *ll = headers_llist;
915 while (ll) {
916 size += strlen(ll->data) + 2;
917 ll = ll->link;
918 }
919 G.extra_headers = cp = xmalloc(size);
920 while (headers_llist) {
921 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
922 }
923 }
924#endif
925
Denys Vlasenko2384a352011-02-15 00:58:36 +0100926 G.output_fd = -1;
927 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
928 if (G.fname_out) { /* -O FILE ? */
929 if (LONE_DASH(G.fname_out)) { /* -O - ? */
930 G.output_fd = 1;
931 option_mask32 &= ~WGET_OPT_CONTINUE;
932 }
933 /* compat with wget: -O FILE can overwrite */
934 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
935 }
936
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100937 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +0100938 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100939
Denys Vlasenko28556b92011-02-15 11:03:53 +0100940 if (G.output_fd >= 0)
941 xclose(G.output_fd);
942
Pere Orga53695632011-02-16 20:09:36 +0100943 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100944}