blob: e15f68ddd01bb2554dd0dc147417d5d232cf0730 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010011
12//usage:#define wget_trivial_usage
13//usage: IF_FEATURE_WGET_LONG_OPTIONS(
14//usage: "[-c|--continue] [-s|--spider] [-q|--quiet] [-O|--output-document FILE]\n"
15//usage: " [--header 'header: value'] [-Y|--proxy on/off] [-P DIR]\n"
Tanguy Pruvot823694d2012-11-18 13:20:29 +010016/* Since we ignore these opts, we don't show them in --help */
17/* //usage: " [--no-check-certificate] [--no-cache]" */
18//usage: " [-U|--user-agent AGENT]" IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010019//usage: )
20//usage: IF_NOT_FEATURE_WGET_LONG_OPTIONS(
21//usage: "[-csq] [-O FILE] [-Y on/off] [-P DIR] [-U AGENT]"
22//usage: IF_FEATURE_WGET_TIMEOUT(" [-T SEC]") " URL..."
23//usage: )
24//usage:#define wget_full_usage "\n\n"
25//usage: "Retrieve files via HTTP or FTP\n"
Denys Vlasenkoe2e55b02011-03-21 00:37:05 +010026//usage: "\n -s Spider mode - only check file existence"
27//usage: "\n -c Continue retrieval of aborted transfer"
28//usage: "\n -q Quiet"
29//usage: "\n -P DIR Save to DIR (default .)"
30//usage: IF_FEATURE_WGET_TIMEOUT(
31//usage: "\n -T SEC Network read timeout is SEC seconds"
32//usage: )
33//usage: "\n -O FILE Save to FILE ('-' for stdout)"
34//usage: "\n -U STR Use STR for User-Agent header"
35//usage: "\n -Y Use proxy ('on' or 'off')"
36
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000037#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000038
Tanguy Pruvot823694d2012-11-18 13:20:29 +010039#if 0
40# define log_io(...) bb_error_msg(__VA_ARGS__)
41#else
42# define log_io(...) ((void)0)
43#endif
Denys Vlasenkof836f012011-02-10 23:02:28 +010044
45
Eric Andersen79757c92001-04-05 21:45:54 +000046struct host_info {
Denys Vlasenkoa3661092011-02-13 02:33:11 +010047 char *allocated;
Denis Vlasenko818322b2007-09-24 18:27:04 +000048 const char *path;
49 const char *user;
50 char *host;
51 int port;
52 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000053};
54
Denis Vlasenko77105632007-09-24 15:04:00 +000055
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000057struct globals {
58 off_t content_len; /* Content-length of the file */
59 off_t beg_range; /* Range at which continue begins */
60#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000061 off_t transferred; /* Number of bytes transferred so far */
62 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010063 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000064#endif
maxwen27116ba2015-08-14 21:41:28 +020065 char *dir_prefix;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010066#if ENABLE_FEATURE_WGET_LONG_OPTIONS
maxwen27116ba2015-08-14 21:41:28 +020067 char *post_data;
68 char *extra_headers;
Denys Vlasenkoa3661092011-02-13 02:33:11 +010069#endif
maxwen27116ba2015-08-14 21:41:28 +020070 char *fname_out; /* where to direct output (-O) */
71 const char *proxy_flag; /* Use proxies if env vars are set */
72 const char *user_agent; /* "User-Agent" header field */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020073#if ENABLE_FEATURE_WGET_TIMEOUT
74 unsigned timeout_seconds;
maxwen27116ba2015-08-14 21:41:28 +020075 bool connecting;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020076#endif
Denys Vlasenko2384a352011-02-15 00:58:36 +010077 int output_fd;
78 int o_flags;
Denys Vlasenko7f432802009-06-28 01:02:24 +020079 smallint chunked; /* chunked transfer encoding */
80 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010081 /* Local downloads do benefit from big buffer.
82 * With 512 byte buffer, it was measured to be
83 * an order of magnitude slower than with big one.
84 */
85 uint64_t just_to_align_next_member;
86 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010087} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010088#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089#define INIT_G() do { \
maxwen27116ba2015-08-14 21:41:28 +020090 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
91} while (0)
92#define FINI_G() do { \
93 FREE_PTR_TO_GLOBALS(); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000095
96
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020097/* Must match option string! */
98enum {
99 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200100 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200101 WGET_OPT_QUIET = (1 << 2),
102 WGET_OPT_OUTNAME = (1 << 3),
103 WGET_OPT_PREFIX = (1 << 4),
104 WGET_OPT_PROXY = (1 << 5),
105 WGET_OPT_USER_AGENT = (1 << 6),
106 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
107 WGET_OPT_RETRIES = (1 << 8),
108 WGET_OPT_PASSIVE = (1 << 9),
109 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
110 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
111};
112
113enum {
114 PROGRESS_START = -1,
115 PROGRESS_END = 0,
116 PROGRESS_BUMP = 1,
117};
Denis Vlasenko9cade082006-11-21 10:43:02 +0000118#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +0000119static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000120{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200121 if (option_mask32 & WGET_OPT_QUIET)
122 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000123
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200124 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +0100125 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000126
Denys Vlasenko2384a352011-02-15 00:58:36 +0100127 bb_progress_update(&G.pmt,
128 G.beg_range,
129 G.transferred,
130 (G.chunked || !G.got_clen) ? 0 : G.beg_range + G.transferred + G.content_len
131 );
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000132
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200133 if (flag == PROGRESS_END) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100134 bb_progress_free(&G.pmt);
Denys Vlasenko19ced5c2010-06-06 21:53:09 +0200135 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100136 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000137 }
138}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200139#else
Denis Vlasenko00d84172008-11-24 07:34:42 +0000140static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +0000141#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000142
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000143
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200144/* IPv6 knows scoped address types i.e. link and site local addresses. Link
145 * local addresses can have a scope identifier to specify the
146 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
147 * identifier is only valid on a single node.
148 *
149 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
150 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
151 * in the Host header as invalid requests, see
152 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
153 */
154static void strip_ipv6_scope_id(char *host)
155{
156 char *scope, *cp;
157
158 /* bbox wget actually handles IPv6 addresses without [], like
159 * wget "http://::1/xxx", but this is not standard.
160 * To save code, _here_ we do not support it. */
161
162 if (host[0] != '[')
163 return; /* not IPv6 */
164
165 scope = strchr(host, '%');
166 if (!scope)
167 return;
168
169 /* Remove the IPv6 zone identifier from the host address */
170 cp = strchr(host, ']');
171 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
172 /* malformed address (not "[xx]:nn" or "[xx]") */
173 return;
174 }
175
176 /* cp points to "]...", scope points to "%eth0]..." */
177 overlapping_strcpy(scope, cp);
178}
179
Denis Vlasenko9cade082006-11-21 10:43:02 +0000180#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100181/* Base64-encode character string. */
182static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000183{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000184 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100185 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
186 len = sizeof(G.wget_buf)/4*3 - 10;
187 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
188 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000189}
190#endif
191
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200192static char* sanitize_string(char *s)
193{
194 unsigned char *p = (void *) s;
195 while (*p >= ' ')
196 p++;
197 *p = '\0';
198 return s;
199}
200
maxwen27116ba2015-08-14 21:41:28 +0200201#if ENABLE_FEATURE_WGET_TIMEOUT
202static void alarm_handler(int sig UNUSED_PARAM)
203{
204 /* This is theoretically unsafe (uses stdio and malloc in signal handler) */
205 if (G.connecting)
206 bb_error_msg_and_die("download timed out");
207}
208#endif
209
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000210static FILE *open_socket(len_and_sockaddr *lsa)
211{
maxwen27116ba2015-08-14 21:41:28 +0200212 int fd;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000213 FILE *fp;
214
maxwen27116ba2015-08-14 21:41:28 +0200215 IF_FEATURE_WGET_TIMEOUT(alarm(G.timeout_seconds); G.connecting = 1;)
216 fd = xconnect_stream(lsa);
217 IF_FEATURE_WGET_TIMEOUT(G.connecting = 0;)
218
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000219 /* glibc 2.4 seems to try seeking on it - ??! */
220 /* hopefully it understands what ESPIPE means... */
maxwen27116ba2015-08-14 21:41:28 +0200221 fp = fdopen(fd, "r+");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000222 if (fp == NULL)
Tanguy Pruvot8aeb3712011-06-30 08:59:26 +0200223 bb_perror_msg_and_die("%s", bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000224
225 return fp;
226}
227
Denys Vlasenkof836f012011-02-10 23:02:28 +0100228/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
maxwen27116ba2015-08-14 21:41:28 +0200229/* FIXME: does not respect FEATURE_WGET_TIMEOUT and -T N: */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100230static char fgets_and_trim(FILE *fp)
231{
232 char c;
233 char *buf_ptr;
234
235 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
236 bb_perror_msg_and_die("error getting response");
237
238 buf_ptr = strchrnul(G.wget_buf, '\n');
239 c = *buf_ptr;
240 *buf_ptr = '\0';
241 buf_ptr = strchrnul(G.wget_buf, '\r');
242 *buf_ptr = '\0';
243
244 log_io("< %s", G.wget_buf);
245
246 return c;
247}
248
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100249static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000250{
251 int result;
252 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100253 if (!s2)
254 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000255 fprintf(fp, "%s%s\r\n", s1, s2);
256 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100257 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000258 }
259
260 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100261 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100262 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000263
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100264 G.wget_buf[3] = '\0';
265 result = xatoi_positive(G.wget_buf);
266 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000267 return result;
268}
269
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100270static void parse_url(const char *src_url, struct host_info *h)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000271{
272 char *url, *p, *sp;
273
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100274 free(h->allocated);
275 h->allocated = url = xstrdup(src_url);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000276
maxwen27116ba2015-08-14 21:41:28 +0200277 if (strncmp(url, "ftp://", 6) == 0) {
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000278 h->port = bb_lookup_port("ftp", "tcp", 21);
279 h->host = url + 6;
280 h->is_ftp = 1;
281 } else
maxwen27116ba2015-08-14 21:41:28 +0200282 if (strncmp(url, "http://", 7) == 0) {
283 h->host = url + 7;
284 http:
285 h->port = bb_lookup_port("http", "tcp", 80);
286 h->is_ftp = 0;
287 } else
288 if (!strstr(url, "//")) {
289 // GNU wget is user-friendly and falls back to http://
290 h->host = url;
291 goto http;
292 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200293 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000294
295 // FYI:
296 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
297 // 'GET /?var=a/b HTTP 1.0'
298 // and saves 'index.html?var=a%2Fb' (we save 'b')
299 // wget 'http://busybox.net?login=john@doe':
300 // request: 'GET /?login=john@doe HTTP/1.0'
301 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
302 // wget 'http://busybox.net#test/test':
303 // request: 'GET / HTTP/1.0'
304 // saves: 'index.html' (we save 'test')
305 //
306 // We also don't add unique .N suffix if file exists...
307 sp = strchr(h->host, '/');
308 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
309 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
310 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000311 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000312 } else if (*sp == '/') {
313 *sp = '\0';
314 h->path = sp + 1;
315 } else { // '#' or '?'
316 // http://busybox.net?login=john@doe is a valid URL
317 // memmove converts to:
318 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000319 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000320 h->host--;
321 sp[-1] = '\0';
322 h->path = sp;
323 }
324
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200325 // We used to set h->user to NULL here, but this interferes
326 // with handling of code 302 ("object was moved")
327
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000329 if (sp != NULL) {
Tanguy Pruvot8a6c2c22012-04-28 00:24:09 +0200330 // URL-decode "user:password" string before base64-encoding:
331 // wget http://test:my%20pass@example.com should send
332 // Authorization: Basic dGVzdDpteSBwYXNz
333 // which decodes to "test:my pass".
334 // Standard wget and curl do this too.
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000335 *sp = '\0';
Tanguy Pruvot8a6c2c22012-04-28 00:24:09 +0200336 h->user = percent_decode_in_place(h->host, /*strict:*/ 0);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337 h->host = sp + 1;
338 }
339
340 sp = h->host;
341}
342
Denys Vlasenkof836f012011-02-10 23:02:28 +0100343static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000344{
345 char *s, *hdrval;
346 int c;
347
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000348 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100349 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000350
Denys Vlasenkof836f012011-02-10 23:02:28 +0100351 /* end of the headers? */
352 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000353 return NULL;
354
355 /* convert the header name to lower case */
maxwen27116ba2015-08-14 21:41:28 +0200356 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.' || *s == '_'; ++s) {
357 /*
358 * No-op for 20-3f and 60-7f. "0-9a-z-." are in these ranges.
359 * 40-5f range ("@A-Z[\]^_") maps to 60-7f.
360 * "A-Z" maps to "a-z".
361 * "@[\]" can't occur in header names.
362 * "^_" maps to "~,DEL" (which is wrong).
363 * "^" was never seen yet, "_" was seen from web.archive.org
364 * (x-archive-orig-x_commoncrawl_Signature: HEXSTRING).
365 */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100366 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200367 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000368
369 /* verify we are at the end of the header name */
370 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100371 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000372
373 /* locate the start of the header value */
374 *s++ = '\0';
375 hdrval = skip_whitespace(s);
376
Denys Vlasenkof836f012011-02-10 23:02:28 +0100377 if (c != '\n') {
378 /* Rats! The buffer isn't big enough to hold the entire header value */
379 while (c = getc(fp), c != EOF && c != '\n')
380 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000381 }
382
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000383 return hdrval;
384}
385
Denys Vlasenko6030af92012-06-13 17:31:07 +0200386static void reset_beg_range_to_zero(void)
387{
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100388 bb_error_msg("restart failed");
Denys Vlasenko6030af92012-06-13 17:31:07 +0200389 G.beg_range = 0;
390 xlseek(G.output_fd, 0, SEEK_SET);
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100391 /* Done at the end instead: */
392 /* ftruncate(G.output_fd, 0); */
Denys Vlasenko6030af92012-06-13 17:31:07 +0200393}
394
Denys Vlasenko7f432802009-06-28 01:02:24 +0200395static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
396{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200397 FILE *sfp;
398 char *str;
399 int port;
400
401 if (!target->user)
402 target->user = xstrdup("anonymous:busybox@");
403
404 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100405 if (ftpcmd(NULL, NULL, sfp) != 220)
406 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200407
408 /*
409 * Splitting username:password pair,
410 * trying to log in
411 */
412 str = strchr(target->user, ':');
413 if (str)
414 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100415 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200416 case 230:
417 break;
418 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100419 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200420 break;
421 /* fall through (failed login) */
422 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100423 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200424 }
425
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100426 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200427
428 /*
429 * Querying file size
430 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100431 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
432 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100433 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200434 bb_error_msg_and_die("SIZE value is garbage");
435 }
436 G.got_clen = 1;
437 }
438
439 /*
440 * Entering passive mode
441 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100442 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200443 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100444 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200445 }
446 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
447 // Server's IP is N1.N2.N3.N4 (we ignore it)
448 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100449 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200450 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100451 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200452 if (!str) goto pasv_error;
453 port = xatou_range(str+1, 0, 255);
454 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100455 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200456 if (!str) goto pasv_error;
457 port += xatou_range(str+1, 0, 255) * 256;
Denys Vlasenkoca183112011-04-07 17:52:20 +0200458 set_nport(&lsa->u.sa, htons(port));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459
460 *dfpp = open_socket(lsa);
461
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100462 if (G.beg_range != 0) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100463 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
464 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100465 G.content_len -= G.beg_range;
Denys Vlasenko6030af92012-06-13 17:31:07 +0200466 else
467 reset_beg_range_to_zero();
Denys Vlasenko7f432802009-06-28 01:02:24 +0200468 }
469
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100470 if (ftpcmd("RETR ", target->path, sfp) > 150)
471 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472
473 return sfp;
474}
475
Denys Vlasenko2384a352011-02-15 00:58:36 +0100476static void NOINLINE retrieve_file_data(FILE *dfp)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200477{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200478#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
479# if ENABLE_FEATURE_WGET_TIMEOUT
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100480 unsigned second_cnt = G.timeout_seconds;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200481# endif
482 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200483
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200484 polldata.fd = fileno(dfp);
485 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200486#endif
487 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200488
489 if (G.chunked)
490 goto get_clen;
491
492 /* Loops only if chunked */
493 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100494
495#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
496 /* Must use nonblocking I/O, otherwise fread will loop
497 * and *block* until it reads full buffer,
498 * which messes up progress bar and/or timeout logic.
499 * Because of nonblocking I/O, we need to dance
500 * very carefully around EAGAIN. See explanation at
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100501 * clearerr() calls.
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100502 */
503 ndelay_on(polldata.fd);
504#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100505 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200506 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100507 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200508
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200509#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
Denys Vlasenko8766a792011-02-11 21:42:00 +0100510 /* fread internally uses read loop, which in our case
511 * is usually exited when we get EAGAIN.
512 * In this case, libc sets error marker on the stream.
513 * Need to clear it before next fread to avoid possible
514 * rare false positive ferror below. Rare because usually
515 * fread gets more than zero bytes, and we don't fall
516 * into if (n <= 0) ...
517 */
518 clearerr(dfp);
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100519#endif
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100520 errno = 0;
521 rdsz = sizeof(G.wget_buf);
522 if (G.got_clen) {
523 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
524 if ((int)G.content_len <= 0)
525 break;
526 rdsz = (unsigned)G.content_len;
527 }
528 }
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100529 n = fread(G.wget_buf, 1, rdsz, dfp);
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100530
531 if (n > 0) {
532 xwrite(G.output_fd, G.wget_buf, n);
533#if ENABLE_FEATURE_WGET_STATUSBAR
534 G.transferred += n;
535#endif
536 if (G.got_clen) {
537 G.content_len -= n;
538 if (G.content_len == 0)
539 break;
540 }
541#if ENABLE_FEATURE_WGET_TIMEOUT
542 second_cnt = G.timeout_seconds;
543#endif
544 continue;
545 }
546
547 /* n <= 0.
548 * man fread:
Denys Vlasenko8766a792011-02-11 21:42:00 +0100549 * If error occurs, or EOF is reached, the return value
550 * is a short item count (or zero).
551 * fread does not distinguish between EOF and error.
552 */
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100553 if (errno != EAGAIN) {
554 if (ferror(dfp)) {
555 progress_meter(PROGRESS_END);
Tanguy Pruvotf7ae0a22011-07-04 05:30:48 +0200556 bb_perror_msg_and_die(bb_msg_read_error);
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100557 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100558 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200559 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100560
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100561#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
562 /* It was EAGAIN. There is no data. Wait up to one second
563 * then abort if timed out, or update the bar and try reading again.
564 */
565 if (safe_poll(&polldata, 1, 1000) == 0) {
566# if ENABLE_FEATURE_WGET_TIMEOUT
567 if (second_cnt != 0 && --second_cnt == 0) {
568 progress_meter(PROGRESS_END);
569 bb_error_msg_and_die("download timed out");
570 }
571# endif
572 /* We used to loop back to poll here,
573 * but there is no great harm in letting fread
574 * to try reading anyway.
575 */
576 }
577 /* Need to do it _every_ second for "stalled" indicator
578 * to be shown properly.
579 */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200580 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200581#endif
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100582 } /* while (reading data) */
583
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100584#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
585 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100586 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100587#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200588 if (!G.chunked)
589 break;
590
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100591 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200592 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100593 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100594 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200595 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100596 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200597 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100598 G.got_clen = 1;
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100599 /*
600 * Note that fgets may result in some data being buffered in dfp.
601 * We loop back to fread, which will retrieve this data.
602 * Also note that code has to be arranged so that fread
603 * is done _before_ one-second poll wait - poll doesn't know
604 * about stdio buffering and can result in spurious one second waits!
605 */
606 }
607
608 /* If -c failed, we restart from the beginning,
609 * but we do not truncate file then, we do it only now, at the end.
610 * This lets user to ^C if his 99% complete 10 GB file download
611 * failed to restart *without* losing the almost complete file.
612 */
613 {
614 off_t pos = lseek(G.output_fd, 0, SEEK_CUR);
615 if (pos != (off_t)-1)
616 ftruncate(G.output_fd, pos);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200617 }
618
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100619 /* Draw full bar and free its resources */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100620 G.chunked = 0; /* makes it show 100% even for chunked download */
621 G.got_clen = 1; /* makes it show 100% even for download of (formerly) unknown size */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200622 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200623}
624
Pere Orga53695632011-02-16 20:09:36 +0100625static void download_one_url(const char *url)
Eric Andersen96700832000-09-04 15:15:55 +0000626{
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100627 bool use_proxy; /* Use proxies if env vars are set */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200628 int redir_limit;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100629 len_and_sockaddr *lsa;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200630 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000631 FILE *dfp; /* socket to ftp server (data) */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100632 char *proxy = NULL;
633 char *fname_out_alloc;
Denys Vlasenko8a90e612012-02-04 19:55:27 +0100634 char *redirected_path = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100635 struct host_info server;
636 struct host_info target;
Denis Vlasenko77105632007-09-24 15:04:00 +0000637
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100638 server.allocated = NULL;
639 target.allocated = NULL;
640 server.user = NULL;
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200641 target.user = NULL;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100642
643 parse_url(url, &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000644
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000645 /* Use the proxy if necessary */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100646 use_proxy = (strcmp(G.proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000647 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000648 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko2384a352011-02-15 00:58:36 +0100649 use_proxy = (proxy && proxy[0]);
650 if (use_proxy)
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000651 parse_url(proxy, &server);
Robert Griebld7760112002-05-14 23:36:45 +0000652 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200653 if (!use_proxy) {
654 server.port = target.port;
655 if (ENABLE_FEATURE_IPV6) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100656 //free(server.allocated); - can't be non-NULL
657 server.host = server.allocated = xstrdup(target.host);
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200658 } else {
659 server.host = target.host;
660 }
661 }
662
663 if (ENABLE_FEATURE_IPV6)
664 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000665
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100666 /* If there was no -O FILE, guess output filename */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100667 fname_out_alloc = NULL;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100668 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100669 G.fname_out = bb_get_last_path_component_nostrip(target.path);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000670 /* handle "wget http://kernel.org//" */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100671 if (G.fname_out[0] == '/' || !G.fname_out[0])
672 G.fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000673 /* -P DIR is considered only if there was no -O FILE */
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100674 if (G.dir_prefix)
675 G.fname_out = fname_out_alloc = concat_path_file(G.dir_prefix, G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100676 else {
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100677 /* redirects may free target.path later, need to make a copy */
678 G.fname_out = fname_out_alloc = xstrdup(G.fname_out);
Denys Vlasenko625f2182011-03-21 00:29:37 +0100679 }
Eric Andersen29edd002000-12-09 16:55:35 +0000680 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000681#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100682 G.curfile = bb_get_last_path_component_nostrip(G.fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000683#endif
684
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000685 /* Determine where to start transfer */
Denys Vlasenko2384a352011-02-15 00:58:36 +0100686 G.beg_range = 0;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100687 if (option_mask32 & WGET_OPT_CONTINUE) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100688 G.output_fd = open(G.fname_out, O_WRONLY);
689 if (G.output_fd >= 0) {
690 G.beg_range = xlseek(G.output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000691 }
692 /* File doesn't exist. We do not create file here yet.
Denys Vlasenkoa84eadf2011-02-12 23:40:31 +0100693 * We are not sure it exists on remote side */
Eric Andersen96700832000-09-04 15:15:55 +0000694 }
695
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200696 redir_limit = 5;
697 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000698 lsa = xhost2sockaddr(server.host, server.port);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100699 if (!(option_mask32 & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200700 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
701 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
702 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000703 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200704 establish_session:
Denys Vlasenko2384a352011-02-15 00:58:36 +0100705 /*G.content_len = 0; - redundant, got_clen = 0 is enough */
706 G.got_clen = 0;
707 G.chunked = 0;
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000708 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000709 /*
710 * HTTP session
711 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200712 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200713 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200714
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100715
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200716 /* Open socket to http server */
717 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200718
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200719 /* Send HTTP request */
720 if (use_proxy) {
721 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
722 target.is_ftp ? "f" : "ht", target.host,
723 target.path);
724 } else {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100725 if (option_mask32 & WGET_OPT_POST_DATA)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200726 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
727 else
728 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
729 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000730
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200731 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100732 target.host, G.user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000733
Denys Vlasenko9213a552011-02-10 13:23:45 +0100734 /* Ask server to close the connection as soon as we are done
735 * (IOW: we do not intend to send more requests)
736 */
737 fprintf(sfp, "Connection: close\r\n");
738
Denis Vlasenko9cade082006-11-21 10:43:02 +0000739#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200740 if (target.user) {
741 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100742 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200743 }
744 if (use_proxy && server.user) {
745 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100746 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200747 }
Eric Andersen79757c92001-04-05 21:45:54 +0000748#endif
749
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100750 if (G.beg_range != 0)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100751 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100752
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000753#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100754 if (G.extra_headers)
755 fputs(G.extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000756
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100757 if (option_mask32 & WGET_OPT_POST_DATA) {
Denys Vlasenko9213a552011-02-10 13:23:45 +0100758 fprintf(sfp,
759 "Content-Type: application/x-www-form-urlencoded\r\n"
760 "Content-Length: %u\r\n"
761 "\r\n"
762 "%s",
Vitaly Magerya700fbc32011-03-27 22:33:13 +0200763 (int) strlen(G.post_data), G.post_data
Denys Vlasenko9213a552011-02-10 13:23:45 +0100764 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200765 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000766#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100767 {
768 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200769 }
Eric Andersen79757c92001-04-05 21:45:54 +0000770
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200771 fflush(sfp);
772
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200773 /*
774 * Retrieve HTTP response line and check for "200" status code.
775 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000776 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100777 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000778
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100779 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200780 str = skip_non_whitespace(str);
781 str = skip_whitespace(str);
782 // FIXME: no error check
783 // xatou wouldn't work: "200 OK"
784 status = atoi(str);
785 switch (status) {
786 case 0:
787 case 100:
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100788 while (gethdr(sfp) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200789 /* eat all remaining headers */;
790 goto read_response;
791 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000792/*
793Response 204 doesn't say "null file", it says "metadata
794has changed but data didn't":
795
796"10.2.5 204 No Content
797The server has fulfilled the request but does not need to return
798an entity-body, and might want to return updated metainformation.
799The response MAY include new or updated metainformation in the form
800of entity-headers, which if present SHOULD be associated with
801the requested variant.
802
803If the client is a user agent, it SHOULD NOT change its document
804view from that which caused the request to be sent. This response
805is primarily intended to allow input for actions to take place
806without causing a change to the user agent's active document view,
807although any new or updated metainformation SHOULD be applied
808to the document currently in the user agent's active view.
809
810The 204 response MUST NOT include a message-body, and thus
811is always terminated by the first empty line after the header fields."
812
813However, in real world it was observed that some web servers
814(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
815*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200816 case 204:
Denys Vlasenko6030af92012-06-13 17:31:07 +0200817 if (G.beg_range != 0) {
818 /* "Range:..." was not honored by the server.
819 * Restart download from the beginning.
820 */
821 reset_beg_range_to_zero();
822 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200823 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200824 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200825 case 301:
826 case 302:
827 case 303:
828 break;
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100829 case 206: /* Partial Content */
830 if (G.beg_range != 0)
831 /* "Range:..." worked. Good. */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000832 break;
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100833 /* Partial Content even though we did not ask for it??? */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200834 /* fall through */
835 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100836 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200837 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000838
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200839 /*
840 * Retrieve HTTP headers.
841 */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100842 while ((str = gethdr(sfp)) != NULL) {
843 static const char keywords[] ALIGN1 =
844 "content-length\0""transfer-encoding\0""location\0";
845 enum {
846 KEY_content_length = 1, KEY_transfer_encoding, KEY_location
847 };
Matthijs van de Water0d586662009-08-22 20:19:48 +0200848 smalluint key;
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100849
850 /* gethdr converted "FOO:" string to lowercase */
851
Matthijs van de Water0d586662009-08-22 20:19:48 +0200852 /* strip trailing whitespace */
853 char *s = strchrnul(str, '\0') - 1;
854 while (s >= str && (*s == ' ' || *s == '\t')) {
855 *s = '\0';
856 s--;
857 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100858 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200859 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100860 G.content_len = BB_STRTOOFF(str, NULL, 10);
861 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200862 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000863 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200864 G.got_clen = 1;
865 continue;
866 }
867 if (key == KEY_transfer_encoding) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100868 if (strcmp(str_tolower(str), "chunked") != 0)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200869 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100870 G.chunked = 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200871 }
872 if (key == KEY_location && status >= 300) {
873 if (--redir_limit == 0)
874 bb_error_msg_and_die("too many redirections");
875 fclose(sfp);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100876 if (str[0] == '/') {
Denys Vlasenko8a90e612012-02-04 19:55:27 +0100877 free(redirected_path);
878 target.path = redirected_path = xstrdup(str+1);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200879 /* lsa stays the same: it's on the same server */
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100880 } else {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200881 parse_url(str, &target);
882 if (!use_proxy) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100883 free(server.allocated);
Pere Orga57b49092011-02-14 23:56:07 +0100884 server.allocated = NULL;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200885 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200886 /* strip_ipv6_scope_id(target.host); - no! */
887 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200888 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000889 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200890 goto resolve_lsa;
891 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000892 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200893 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000894 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200895 }
896// if (status >= 300)
897// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000898
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200899 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000900 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000901
902 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000903 /*
904 * FTP session
905 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200906 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000907 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000908
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100909 free(lsa);
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100910
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100911 if (!(option_mask32 & WGET_OPT_SPIDER)) {
Denys Vlasenko2384a352011-02-15 00:58:36 +0100912 if (G.output_fd < 0)
913 G.output_fd = xopen(G.fname_out, G.o_flags);
914 retrieve_file_data(dfp);
915 if (!(option_mask32 & WGET_OPT_OUTNAME)) {
916 xclose(G.output_fd);
917 G.output_fd = -1;
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100918 }
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000919 }
Eric Andersen79757c92001-04-05 21:45:54 +0000920
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200921 if (dfp != sfp) {
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100922 /* It's ftp. Close data connection properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000923 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100924 if (ftpcmd(NULL, NULL, sfp) != 226)
925 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
926 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000927 }
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100928 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000929
Denys Vlasenko9a5b7f62011-02-13 02:49:43 +0100930 free(server.allocated);
931 free(target.allocated);
932 free(fname_out_alloc);
Denys Vlasenko8a90e612012-02-04 19:55:27 +0100933 free(redirected_path);
Eric Andersen96700832000-09-04 15:15:55 +0000934}
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100935
936int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
937int wget_main(int argc UNUSED_PARAM, char **argv)
938{
939#if ENABLE_FEATURE_WGET_LONG_OPTIONS
940 static const char wget_longopts[] ALIGN1 =
941 /* name, has_arg, val */
942 "continue\0" No_argument "c"
943//FIXME: -s isn't --spider, it's --save-headers!
944 "spider\0" No_argument "s"
945 "quiet\0" No_argument "q"
946 "output-document\0" Required_argument "O"
947 "directory-prefix\0" Required_argument "P"
948 "proxy\0" Required_argument "Y"
949 "user-agent\0" Required_argument "U"
950#if ENABLE_FEATURE_WGET_TIMEOUT
951 "timeout\0" Required_argument "T"
952#endif
953 /* Ignored: */
954 // "tries\0" Required_argument "t"
955 /* Ignored (we always use PASV): */
956 "passive-ftp\0" No_argument "\xff"
957 "header\0" Required_argument "\xfe"
958 "post-data\0" Required_argument "\xfd"
959 /* Ignored (we don't do ssl) */
960 "no-check-certificate\0" No_argument "\xfc"
Tanguy Pruvot823694d2012-11-18 13:20:29 +0100961 /* Ignored (we don't support caching) */
962 "no-cache\0" No_argument "\xfb"
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100963 ;
964#endif
965
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100966#if ENABLE_FEATURE_WGET_LONG_OPTIONS
967 llist_t *headers_llist = NULL;
968#endif
969
970 INIT_G();
971
maxwen27116ba2015-08-14 21:41:28 +0200972#if ENABLE_FEATURE_WGET_TIMEOUT
973 G.timeout_seconds = 900;
974 signal(SIGALRM, alarm_handler);
975#endif
Denys Vlasenkoa3661092011-02-13 02:33:11 +0100976 G.proxy_flag = "on"; /* use proxies if env vars are set */
977 G.user_agent = "Wget"; /* "User-Agent" header field */
978
979#if ENABLE_FEATURE_WGET_LONG_OPTIONS
980 applet_long_options = wget_longopts;
981#endif
982 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
983 getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
984 &G.fname_out, &G.dir_prefix,
985 &G.proxy_flag, &G.user_agent,
986 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
987 NULL /* -t RETRIES */
988 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
989 IF_FEATURE_WGET_LONG_OPTIONS(, &G.post_data)
990 );
991 argv += optind;
992
993#if ENABLE_FEATURE_WGET_LONG_OPTIONS
994 if (headers_llist) {
995 int size = 1;
996 char *cp;
997 llist_t *ll = headers_llist;
998 while (ll) {
999 size += strlen(ll->data) + 2;
1000 ll = ll->link;
1001 }
1002 G.extra_headers = cp = xmalloc(size);
1003 while (headers_llist) {
1004 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
1005 }
1006 }
1007#endif
1008
Denys Vlasenko2384a352011-02-15 00:58:36 +01001009 G.output_fd = -1;
1010 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
1011 if (G.fname_out) { /* -O FILE ? */
1012 if (LONE_DASH(G.fname_out)) { /* -O - ? */
1013 G.output_fd = 1;
1014 option_mask32 &= ~WGET_OPT_CONTINUE;
1015 }
1016 /* compat with wget: -O FILE can overwrite */
1017 G.o_flags = O_WRONLY | O_CREAT | O_TRUNC;
1018 }
1019
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001020 while (*argv)
Pere Orga53695632011-02-16 20:09:36 +01001021 download_one_url(*argv++);
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001022
Denys Vlasenko28556b92011-02-15 11:03:53 +01001023 if (G.output_fd >= 0)
1024 xclose(G.output_fd);
1025
maxwen27116ba2015-08-14 21:41:28 +02001026#if ENABLE_FEATURE_CLEAN_UP && ENABLE_FEATURE_WGET_LONG_OPTIONS
1027 free(G.extra_headers);
1028#endif
1029 FINI_G();
1030
Pere Orga53695632011-02-16 20:09:36 +01001031 return EXIT_SUCCESS;
Denys Vlasenkoa3661092011-02-13 02:33:11 +01001032}