blob: 931882fdef0f92f8520826ab7b7ef56a449d1b87 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Eric Andersen79757c92001-04-05 21:45:54 +000013struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000014 // May be used if we ever will want to free() all xstrdup()s...
15 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000016 const char *path;
17 const char *user;
18 char *host;
19 int port;
20 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000021};
22
Denis Vlasenko77105632007-09-24 15:04:00 +000023
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020024/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000025struct globals {
26 off_t content_len; /* Content-length of the file */
27 off_t beg_range; /* Range at which continue begins */
28#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000029 off_t transferred; /* Number of bytes transferred so far */
30 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010031 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000032#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020033#if ENABLE_FEATURE_WGET_TIMEOUT
34 unsigned timeout_seconds;
35#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020036 smallint chunked; /* chunked transfer encoding */
37 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010038 /* Local downloads do benefit from big buffer.
39 * With 512 byte buffer, it was measured to be
40 * an order of magnitude slower than with big one.
41 */
42 uint64_t just_to_align_next_member;
43 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010044} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010045#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020046#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010047 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020048 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
49} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000050
51
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052/* Must match option string! */
53enum {
54 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020055 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056 WGET_OPT_QUIET = (1 << 2),
57 WGET_OPT_OUTNAME = (1 << 3),
58 WGET_OPT_PREFIX = (1 << 4),
59 WGET_OPT_PROXY = (1 << 5),
60 WGET_OPT_USER_AGENT = (1 << 6),
61 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
62 WGET_OPT_RETRIES = (1 << 8),
63 WGET_OPT_PASSIVE = (1 << 9),
64 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
65 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
66};
67
68enum {
69 PROGRESS_START = -1,
70 PROGRESS_END = 0,
71 PROGRESS_BUMP = 1,
72};
Denis Vlasenko9cade082006-11-21 10:43:02 +000073#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000074static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000075{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020076 if (option_mask32 & WGET_OPT_QUIET)
77 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000078
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020079 if (flag == PROGRESS_START)
Magnus Dammf5914992009-11-08 16:34:43 +010080 bb_progress_init(&G.pmt);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000081
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010082 bb_progress_update(&G.pmt, G.curfile, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020083 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000084
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020085 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020086 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010087 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088 }
89}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020090#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000091static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000092#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000093
Denis Vlasenko47ddd012007-09-24 18:24:17 +000094
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020095/* IPv6 knows scoped address types i.e. link and site local addresses. Link
96 * local addresses can have a scope identifier to specify the
97 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
98 * identifier is only valid on a single node.
99 *
100 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
101 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
102 * in the Host header as invalid requests, see
103 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
104 */
105static void strip_ipv6_scope_id(char *host)
106{
107 char *scope, *cp;
108
109 /* bbox wget actually handles IPv6 addresses without [], like
110 * wget "http://::1/xxx", but this is not standard.
111 * To save code, _here_ we do not support it. */
112
113 if (host[0] != '[')
114 return; /* not IPv6 */
115
116 scope = strchr(host, '%');
117 if (!scope)
118 return;
119
120 /* Remove the IPv6 zone identifier from the host address */
121 cp = strchr(host, ']');
122 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
123 /* malformed address (not "[xx]:nn" or "[xx]") */
124 return;
125 }
126
127 /* cp points to "]...", scope points to "%eth0]..." */
128 overlapping_strcpy(scope, cp);
129}
130
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100131#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000132/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
133 * and a short count if an eof or non-interrupt error is encountered. */
134static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000135{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000136 size_t ret;
137 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000138
139 do {
140 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000141 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000142 ret = fread(p, 1, nmemb, stream);
143 p += ret;
144 nmemb -= ret;
145 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000146
Denis Vlasenko12d21292007-06-27 21:40:07 +0000147 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000148}
149
Denis Vlasenko12d21292007-06-27 21:40:07 +0000150/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000151 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
152static char *safe_fgets(char *s, int size, FILE *stream)
153{
154 char *ret;
155
156 do {
157 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000158 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000159 ret = fgets(s, size, stream);
160 } while (ret == NULL && ferror(stream) && errno == EINTR);
161
162 return ret;
163}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100164#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000165
Denis Vlasenko9cade082006-11-21 10:43:02 +0000166#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100167/* Base64-encode character string. */
168static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000169{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000170 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100171 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
172 len = sizeof(G.wget_buf)/4*3 - 10;
173 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
174 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000175}
176#endif
177
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200178static char* sanitize_string(char *s)
179{
180 unsigned char *p = (void *) s;
181 while (*p >= ' ')
182 p++;
183 *p = '\0';
184 return s;
185}
186
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000187static FILE *open_socket(len_and_sockaddr *lsa)
188{
189 FILE *fp;
190
191 /* glibc 2.4 seems to try seeking on it - ??! */
192 /* hopefully it understands what ESPIPE means... */
193 fp = fdopen(xconnect_stream(lsa), "r+");
194 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100195 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000196
197 return fp;
198}
199
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100200static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000201{
202 int result;
203 if (s1) {
204 if (!s2) s2 = "";
205 fprintf(fp, "%s%s\r\n", s1, s2);
206 fflush(fp);
207 }
208
209 do {
210 char *buf_ptr;
211
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100212 if (fgets(G.wget_buf, sizeof(G.wget_buf)-2, fp) == NULL) {
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000213 bb_perror_msg_and_die("error getting response");
214 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100215 buf_ptr = strstr(G.wget_buf, "\r\n");
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000216 if (buf_ptr) {
217 *buf_ptr = '\0';
218 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100219 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000220
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100221 G.wget_buf[3] = '\0';
222 result = xatoi_positive(G.wget_buf);
223 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000224 return result;
225}
226
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000227static void parse_url(char *src_url, struct host_info *h)
228{
229 char *url, *p, *sp;
230
231 /* h->allocated = */ url = xstrdup(src_url);
232
233 if (strncmp(url, "http://", 7) == 0) {
234 h->port = bb_lookup_port("http", "tcp", 80);
235 h->host = url + 7;
236 h->is_ftp = 0;
237 } else if (strncmp(url, "ftp://", 6) == 0) {
238 h->port = bb_lookup_port("ftp", "tcp", 21);
239 h->host = url + 6;
240 h->is_ftp = 1;
241 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200242 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000243
244 // FYI:
245 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
246 // 'GET /?var=a/b HTTP 1.0'
247 // and saves 'index.html?var=a%2Fb' (we save 'b')
248 // wget 'http://busybox.net?login=john@doe':
249 // request: 'GET /?login=john@doe HTTP/1.0'
250 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
251 // wget 'http://busybox.net#test/test':
252 // request: 'GET / HTTP/1.0'
253 // saves: 'index.html' (we save 'test')
254 //
255 // We also don't add unique .N suffix if file exists...
256 sp = strchr(h->host, '/');
257 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
258 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
259 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000260 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261 } else if (*sp == '/') {
262 *sp = '\0';
263 h->path = sp + 1;
264 } else { // '#' or '?'
265 // http://busybox.net?login=john@doe is a valid URL
266 // memmove converts to:
267 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000268 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000269 h->host--;
270 sp[-1] = '\0';
271 h->path = sp;
272 }
273
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200274 // We used to set h->user to NULL here, but this interferes
275 // with handling of code 302 ("object was moved")
276
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000277 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000278 if (sp != NULL) {
279 h->user = h->host;
280 *sp = '\0';
281 h->host = sp + 1;
282 }
283
284 sp = h->host;
285}
286
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100287static char *gethdr(FILE *fp /*, int *istrunc*/)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000288{
289 char *s, *hdrval;
290 int c;
291
292 /* *istrunc = 0; */
293
294 /* retrieve header line */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100295 if (fgets(G.wget_buf, sizeof(G.wget_buf), fp) == NULL)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 return NULL;
297
298 /* see if we are at the end of the headers */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100299 for (s = G.wget_buf; *s == '\r'; ++s)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000300 continue;
301 if (*s == '\n')
302 return NULL;
303
304 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100305 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200306 /* tolower for "A-Z", no-op for "0-9a-z-." */
307 *s = (*s | 0x20);
308 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000309
310 /* verify we are at the end of the header name */
311 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100312 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000313
314 /* locate the start of the header value */
315 *s++ = '\0';
316 hdrval = skip_whitespace(s);
317
318 /* locate the end of header */
319 while (*s && *s != '\r' && *s != '\n')
320 ++s;
321
322 /* end of header found */
323 if (*s) {
324 *s = '\0';
325 return hdrval;
326 }
327
Denys Vlasenko7f432802009-06-28 01:02:24 +0200328 /* Rats! The buffer isn't big enough to hold the entire header value */
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000329 while (c = getc(fp), c != EOF && c != '\n')
330 continue;
331 /* *istrunc = 1; */
332 return hdrval;
333}
334
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000335#if ENABLE_FEATURE_WGET_LONG_OPTIONS
336static char *URL_escape(const char *str)
337{
338 /* URL encode, see RFC 2396 */
339 char *dst;
340 char *res = dst = xmalloc(strlen(str) * 3 + 1);
341 unsigned char c;
342
343 while (1) {
344 c = *str++;
345 if (c == '\0'
346 /* || strchr("!&'()*-.=_~", c) - more code */
347 || c == '!'
348 || c == '&'
349 || c == '\''
350 || c == '('
351 || c == ')'
352 || c == '*'
353 || c == '-'
354 || c == '.'
355 || c == '='
356 || c == '_'
357 || c == '~'
358 || (c >= '0' && c <= '9')
359 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
360 ) {
361 *dst++ = c;
362 if (c == '\0')
363 return res;
364 } else {
365 *dst++ = '%';
366 *dst++ = bb_hexdigits_upcase[c >> 4];
367 *dst++ = bb_hexdigits_upcase[c & 0xf];
368 }
369 }
370}
371#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000372
Denys Vlasenko7f432802009-06-28 01:02:24 +0200373static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
374{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200375 FILE *sfp;
376 char *str;
377 int port;
378
379 if (!target->user)
380 target->user = xstrdup("anonymous:busybox@");
381
382 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100383 if (ftpcmd(NULL, NULL, sfp) != 220)
384 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200385
386 /*
387 * Splitting username:password pair,
388 * trying to log in
389 */
390 str = strchr(target->user, ':');
391 if (str)
392 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100393 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200394 case 230:
395 break;
396 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100397 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200398 break;
399 /* fall through (failed login) */
400 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100401 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200402 }
403
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100404 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405
406 /*
407 * Querying file size
408 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100409 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
410 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100411 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200412 bb_error_msg_and_die("SIZE value is garbage");
413 }
414 G.got_clen = 1;
415 }
416
417 /*
418 * Entering passive mode
419 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100420 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200421 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100422 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200423 }
424 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
425 // Server's IP is N1.N2.N3.N4 (we ignore it)
426 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 if (!str) goto pasv_error;
431 port = xatou_range(str+1, 0, 255);
432 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100433 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200434 if (!str) goto pasv_error;
435 port += xatou_range(str+1, 0, 255) * 256;
436 set_nport(lsa, htons(port));
437
438 *dfpp = open_socket(lsa);
439
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100440 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100441 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
442 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100443 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200444 }
445
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100446 if (ftpcmd("RETR ", target->path, sfp) > 150)
447 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200448
449 return sfp;
450}
451
Denys Vlasenko7f432802009-06-28 01:02:24 +0200452static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
453{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200454#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
455# if ENABLE_FEATURE_WGET_TIMEOUT
456 unsigned second_cnt;
457# endif
458 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200460 polldata.fd = fileno(dfp);
461 polldata.events = POLLIN | POLLPRI;
Denys Vlasenkoda0df472010-08-08 04:21:50 +0200462 ndelay_on(polldata.fd);
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200463#endif
464 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200465
466 if (G.chunked)
467 goto get_clen;
468
469 /* Loops only if chunked */
470 while (1) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100471 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100473 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200474
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100475 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100476 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100477 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100478 if ((int)G.content_len <= 0)
479 break;
480 rdsz = (unsigned)G.content_len;
481 }
482 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200483#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
484# if ENABLE_FEATURE_WGET_TIMEOUT
485 second_cnt = G.timeout_seconds;
486# endif
487 while (1) {
488 if (safe_poll(&polldata, 1, 1000) != 0)
489 break; /* error, EOF, or data is available */
490# if ENABLE_FEATURE_WGET_TIMEOUT
491 if (second_cnt != 0 && --second_cnt == 0) {
492 progress_meter(PROGRESS_END);
493 bb_perror_msg_and_die("download timed out");
494 }
495# endif
496 /* Needed for "stalled" indicator */
497 progress_meter(PROGRESS_BUMP);
498 }
499#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100500 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200501 if (n <= 0) {
502 if (ferror(dfp)) {
503 /* perror will not work: ferror doesn't set errno */
504 bb_error_msg_and_die(bb_msg_read_error);
505 }
506 break;
507 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100508 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200509#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100510 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200511 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200512#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100513 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100514 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100515 if (G.content_len == 0)
516 break;
517 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200518 }
519
520 if (!G.chunked)
521 break;
522
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100523 fgets(G.wget_buf, sizeof(G.wget_buf), dfp); /* This is a newline */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200524 get_clen:
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100525 fgets(G.wget_buf, sizeof(G.wget_buf), dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100526 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200527 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100528 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200529 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100530 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200531 }
532
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200533 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200534}
535
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000536int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000537int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000538{
Eric Andersen79757c92001-04-05 21:45:54 +0000539 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000540 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000541 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200542 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200543 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000544 char *dir_prefix = NULL;
545#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000546 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000547 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000548 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000549#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200550 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000551 FILE *dfp; /* socket to ftp server (data) */
552 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000553 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200554 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000555 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000556 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000557
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000558 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000559 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000560 enum {
561 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
562 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000563#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000564 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000565 /* name, has_arg, val */
566 "continue\0" No_argument "c"
567 "spider\0" No_argument "s"
568 "quiet\0" No_argument "q"
569 "output-document\0" Required_argument "O"
570 "directory-prefix\0" Required_argument "P"
571 "proxy\0" Required_argument "Y"
572 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200573#if ENABLE_FEATURE_WGET_TIMEOUT
574 "timeout\0" Required_argument "T"
575#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000576 /* Ignored: */
577 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000578 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000579 "passive-ftp\0" No_argument "\xff"
580 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000581 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100582 /* Ignored (we don't do ssl) */
583 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000584 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000585#endif
586
587 INIT_G();
588
589#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000590 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000591#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000592 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200593 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
594 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000595 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000596 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200597 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
598 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000599 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
600 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000601 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000602#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000603 if (headers_llist) {
604 int size = 1;
605 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000606 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000607 while (ll) {
608 size += strlen(ll->data) + 2;
609 ll = ll->link;
610 }
611 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000612 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000613 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000614 }
615 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000616#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000617
Denys Vlasenko7f432802009-06-28 01:02:24 +0200618 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200619
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200620 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000621 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000622
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000623 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200624 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000625 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000626 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200627 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100628 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000629 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000630 } else {
631 use_proxy = 0;
632 }
Robert Griebld7760112002-05-14 23:36:45 +0000633 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200634 if (!use_proxy) {
635 server.port = target.port;
636 if (ENABLE_FEATURE_IPV6) {
637 server.host = xstrdup(target.host);
638 } else {
639 server.host = target.host;
640 }
641 }
642
643 if (ENABLE_FEATURE_IPV6)
644 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000645
Denis Vlasenko818322b2007-09-24 18:27:04 +0000646 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000647 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000648 fname_out = bb_get_last_path_component_nostrip(target.path);
649 /* handle "wget http://kernel.org//" */
650 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000651 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000652 /* -P DIR is considered only if there was no -O FILE */
653 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000654 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000655 } else {
656 if (LONE_DASH(fname_out)) {
657 /* -O - */
658 output_fd = 1;
659 opt &= ~WGET_OPT_CONTINUE;
660 }
Eric Andersen29edd002000-12-09 16:55:35 +0000661 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000662#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100663 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000664#endif
665
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000666 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000667 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100668 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200669 */
Eric Andersen29edd002000-12-09 16:55:35 +0000670
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000671 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000672 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000673 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000674 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100675 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000676 }
677 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200678 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000679 }
680
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200681 redir_limit = 5;
682 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000683 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000684 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200685 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
686 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
687 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000688 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200689 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000690 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000691 /*
692 * HTTP session
693 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200694 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200695 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200696
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200697 /* Open socket to http server */
698 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200699
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200700 /* Send HTTP request */
701 if (use_proxy) {
702 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
703 target.is_ftp ? "f" : "ht", target.host,
704 target.path);
705 } else {
706 if (opt & WGET_OPT_POST_DATA)
707 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
708 else
709 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
710 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000711
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200712 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
713 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000714
Denys Vlasenko9213a552011-02-10 13:23:45 +0100715 /* Ask server to close the connection as soon as we are done
716 * (IOW: we do not intend to send more requests)
717 */
718 fprintf(sfp, "Connection: close\r\n");
719
Denis Vlasenko9cade082006-11-21 10:43:02 +0000720#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200721 if (target.user) {
722 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100723 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200724 }
725 if (use_proxy && server.user) {
726 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100727 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200728 }
Eric Andersen79757c92001-04-05 21:45:54 +0000729#endif
730
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100731 if (G.beg_range)
732 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100733
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000734#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200735 if (extra_headers)
736 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000737
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200738 if (opt & WGET_OPT_POST_DATA) {
739 char *estr = URL_escape(post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100740 fprintf(sfp,
741 "Content-Type: application/x-www-form-urlencoded\r\n"
742 "Content-Length: %u\r\n"
743 "\r\n"
744 "%s",
745 (int) strlen(estr), estr
746 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200747 free(estr);
748 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000749#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100750 {
751 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200752 }
Eric Andersen79757c92001-04-05 21:45:54 +0000753
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200754 fflush(sfp);
755
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200756 /*
757 * Retrieve HTTP response line and check for "200" status code.
758 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000759 read_response:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100760 if (fgets(G.wget_buf, sizeof(G.wget_buf), sfp) == NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200761 bb_error_msg_and_die("no response from server");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000762
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100763 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200764 str = skip_non_whitespace(str);
765 str = skip_whitespace(str);
766 // FIXME: no error check
767 // xatou wouldn't work: "200 OK"
768 status = atoi(str);
769 switch (status) {
770 case 0:
771 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100772 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200773 /* eat all remaining headers */;
774 goto read_response;
775 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000776/*
777Response 204 doesn't say "null file", it says "metadata
778has changed but data didn't":
779
780"10.2.5 204 No Content
781The server has fulfilled the request but does not need to return
782an entity-body, and might want to return updated metainformation.
783The response MAY include new or updated metainformation in the form
784of entity-headers, which if present SHOULD be associated with
785the requested variant.
786
787If the client is a user agent, it SHOULD NOT change its document
788view from that which caused the request to be sent. This response
789is primarily intended to allow input for actions to take place
790without causing a change to the user agent's active document view,
791although any new or updated metainformation SHOULD be applied
792to the document currently in the user agent's active view.
793
794The 204 response MUST NOT include a message-body, and thus
795is always terminated by the first empty line after the header fields."
796
797However, in real world it was observed that some web servers
798(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
799*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200800 case 204:
801 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200802 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200803 case 301:
804 case 302:
805 case 303:
806 break;
807 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100808 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000809 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200810 /* fall through */
811 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100812 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200813 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000814
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200815 /*
816 * Retrieve HTTP headers.
817 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100818 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200819 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200820 smalluint key;
821 /* strip trailing whitespace */
822 char *s = strchrnul(str, '\0') - 1;
823 while (s >= str && (*s == ' ' || *s == '\t')) {
824 *s = '\0';
825 s--;
826 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100827 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200828 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100829 G.content_len = BB_STRTOOFF(str, NULL, 10);
830 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200831 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000832 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200833 G.got_clen = 1;
834 continue;
835 }
836 if (key == KEY_transfer_encoding) {
837 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
838 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
839 G.chunked = G.got_clen = 1;
840 }
841 if (key == KEY_location && status >= 300) {
842 if (--redir_limit == 0)
843 bb_error_msg_and_die("too many redirections");
844 fclose(sfp);
845 G.got_clen = 0;
846 G.chunked = 0;
847 if (str[0] == '/')
848 /* free(target.allocated); */
849 target.path = /* target.allocated = */ xstrdup(str+1);
850 /* lsa stays the same: it's on the same server */
851 else {
852 parse_url(str, &target);
853 if (!use_proxy) {
854 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200855 /* strip_ipv6_scope_id(target.host); - no! */
856 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200857 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000858 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200859 goto resolve_lsa;
860 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000861 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200862 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000863 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200864 }
865// if (status >= 300)
866// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000867
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200868 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000869 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000870
871 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000872 /*
873 * FTP session
874 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200875 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000876 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000877
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000878 if (opt & WGET_OPT_SPIDER) {
879 if (ENABLE_FEATURE_CLEAN_UP)
880 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000881 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000882 }
Eric Andersen79757c92001-04-05 21:45:54 +0000883
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000884 if (output_fd < 0) {
885 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
886 /* compat with wget: -O FILE can overwrite */
887 if (opt & WGET_OPT_OUTNAME)
888 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
889 output_fd = xopen(fname_out, o_flags);
890 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000891
Denys Vlasenko7f432802009-06-28 01:02:24 +0200892 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100893 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000894
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200895 if (dfp != sfp) {
896 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000897 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100898 if (ftpcmd(NULL, NULL, sfp) != 226)
899 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
900 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000901 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000902
903 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000904}