blob: f2d7daf2f886ce99bb472710c3ac5b5029f97d85 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000018 // May be used if we ever will want to free() all xstrdup()s...
19 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000020 const char *path;
21 const char *user;
22 char *host;
23 int port;
24 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000025};
26
Denis Vlasenko77105632007-09-24 15:04:00 +000027
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020028/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000029struct globals {
30 off_t content_len; /* Content-length of the file */
31 off_t beg_range; /* Range at which continue begins */
32#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000033 off_t transferred; /* Number of bytes transferred so far */
34 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010035 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000036#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020037#if ENABLE_FEATURE_WGET_TIMEOUT
38 unsigned timeout_seconds;
39#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020040 smallint chunked; /* chunked transfer encoding */
41 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010042 /* Local downloads do benefit from big buffer.
43 * With 512 byte buffer, it was measured to be
44 * an order of magnitude slower than with big one.
45 */
46 uint64_t just_to_align_next_member;
47 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010048} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010049#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020050#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010051 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
53} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000054
55
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Must match option string! */
57enum {
58 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020059 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020060 WGET_OPT_QUIET = (1 << 2),
61 WGET_OPT_OUTNAME = (1 << 3),
62 WGET_OPT_PREFIX = (1 << 4),
63 WGET_OPT_PROXY = (1 << 5),
64 WGET_OPT_USER_AGENT = (1 << 6),
65 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
66 WGET_OPT_RETRIES = (1 << 8),
67 WGET_OPT_PASSIVE = (1 << 9),
68 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
69 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
70};
71
72enum {
73 PROGRESS_START = -1,
74 PROGRESS_END = 0,
75 PROGRESS_BUMP = 1,
76};
Denis Vlasenko9cade082006-11-21 10:43:02 +000077#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000078static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000079{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020080 if (option_mask32 & WGET_OPT_QUIET)
81 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000082
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020083 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010084 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000085
Denys Vlasenkod55e1392011-02-11 18:56:13 +010086 bb_progress_update(&G.pmt, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020087 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020090 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010091 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000092 }
93}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000095static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000096#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000097
Denis Vlasenko47ddd012007-09-24 18:24:17 +000098
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020099/* IPv6 knows scoped address types i.e. link and site local addresses. Link
100 * local addresses can have a scope identifier to specify the
101 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
102 * identifier is only valid on a single node.
103 *
104 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
105 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
106 * in the Host header as invalid requests, see
107 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
108 */
109static void strip_ipv6_scope_id(char *host)
110{
111 char *scope, *cp;
112
113 /* bbox wget actually handles IPv6 addresses without [], like
114 * wget "http://::1/xxx", but this is not standard.
115 * To save code, _here_ we do not support it. */
116
117 if (host[0] != '[')
118 return; /* not IPv6 */
119
120 scope = strchr(host, '%');
121 if (!scope)
122 return;
123
124 /* Remove the IPv6 zone identifier from the host address */
125 cp = strchr(host, ']');
126 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
127 /* malformed address (not "[xx]:nn" or "[xx]") */
128 return;
129 }
130
131 /* cp points to "]...", scope points to "%eth0]..." */
132 overlapping_strcpy(scope, cp);
133}
134
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100135#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000136/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
137 * and a short count if an eof or non-interrupt error is encountered. */
138static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000139{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000140 size_t ret;
141 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000142
143 do {
144 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000145 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000146 ret = fread(p, 1, nmemb, stream);
147 p += ret;
148 nmemb -= ret;
149 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000150
Denis Vlasenko12d21292007-06-27 21:40:07 +0000151 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000152}
153
Denis Vlasenko12d21292007-06-27 21:40:07 +0000154/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000155 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
156static char *safe_fgets(char *s, int size, FILE *stream)
157{
158 char *ret;
159
160 do {
161 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000162 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000163 ret = fgets(s, size, stream);
164 } while (ret == NULL && ferror(stream) && errno == EINTR);
165
166 return ret;
167}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100168#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000169
Denis Vlasenko9cade082006-11-21 10:43:02 +0000170#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100171/* Base64-encode character string. */
172static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000173{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000174 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100175 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
176 len = sizeof(G.wget_buf)/4*3 - 10;
177 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
178 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000179}
180#endif
181
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200182static char* sanitize_string(char *s)
183{
184 unsigned char *p = (void *) s;
185 while (*p >= ' ')
186 p++;
187 *p = '\0';
188 return s;
189}
190
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000191static FILE *open_socket(len_and_sockaddr *lsa)
192{
193 FILE *fp;
194
195 /* glibc 2.4 seems to try seeking on it - ??! */
196 /* hopefully it understands what ESPIPE means... */
197 fp = fdopen(xconnect_stream(lsa), "r+");
198 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100199 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000200
201 return fp;
202}
203
Denys Vlasenkof836f012011-02-10 23:02:28 +0100204/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
205static char fgets_and_trim(FILE *fp)
206{
207 char c;
208 char *buf_ptr;
209
210 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
211 bb_perror_msg_and_die("error getting response");
212
213 buf_ptr = strchrnul(G.wget_buf, '\n');
214 c = *buf_ptr;
215 *buf_ptr = '\0';
216 buf_ptr = strchrnul(G.wget_buf, '\r');
217 *buf_ptr = '\0';
218
219 log_io("< %s", G.wget_buf);
220
221 return c;
222}
223
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100224static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000225{
226 int result;
227 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100228 if (!s2)
229 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000230 fprintf(fp, "%s%s\r\n", s1, s2);
231 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100232 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000233 }
234
235 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100236 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100237 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000238
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100239 G.wget_buf[3] = '\0';
240 result = xatoi_positive(G.wget_buf);
241 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000242 return result;
243}
244
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245static void parse_url(char *src_url, struct host_info *h)
246{
247 char *url, *p, *sp;
248
249 /* h->allocated = */ url = xstrdup(src_url);
250
251 if (strncmp(url, "http://", 7) == 0) {
252 h->port = bb_lookup_port("http", "tcp", 80);
253 h->host = url + 7;
254 h->is_ftp = 0;
255 } else if (strncmp(url, "ftp://", 6) == 0) {
256 h->port = bb_lookup_port("ftp", "tcp", 21);
257 h->host = url + 6;
258 h->is_ftp = 1;
259 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200260 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261
262 // FYI:
263 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
264 // 'GET /?var=a/b HTTP 1.0'
265 // and saves 'index.html?var=a%2Fb' (we save 'b')
266 // wget 'http://busybox.net?login=john@doe':
267 // request: 'GET /?login=john@doe HTTP/1.0'
268 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
269 // wget 'http://busybox.net#test/test':
270 // request: 'GET / HTTP/1.0'
271 // saves: 'index.html' (we save 'test')
272 //
273 // We also don't add unique .N suffix if file exists...
274 sp = strchr(h->host, '/');
275 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
276 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
277 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000278 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000279 } else if (*sp == '/') {
280 *sp = '\0';
281 h->path = sp + 1;
282 } else { // '#' or '?'
283 // http://busybox.net?login=john@doe is a valid URL
284 // memmove converts to:
285 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000286 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000287 h->host--;
288 sp[-1] = '\0';
289 h->path = sp;
290 }
291
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200292 // We used to set h->user to NULL here, but this interferes
293 // with handling of code 302 ("object was moved")
294
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 if (sp != NULL) {
297 h->user = h->host;
298 *sp = '\0';
299 h->host = sp + 1;
300 }
301
302 sp = h->host;
303}
304
Denys Vlasenkof836f012011-02-10 23:02:28 +0100305static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306{
307 char *s, *hdrval;
308 int c;
309
310 /* *istrunc = 0; */
311
312 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100313 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000314
Denys Vlasenkof836f012011-02-10 23:02:28 +0100315 /* end of the headers? */
316 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000317 return NULL;
318
319 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100320 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200321 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100322 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200323 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000324
325 /* verify we are at the end of the header name */
326 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100327 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328
329 /* locate the start of the header value */
330 *s++ = '\0';
331 hdrval = skip_whitespace(s);
332
Denys Vlasenkof836f012011-02-10 23:02:28 +0100333 if (c != '\n') {
334 /* Rats! The buffer isn't big enough to hold the entire header value */
335 while (c = getc(fp), c != EOF && c != '\n')
336 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337 }
338
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000339 return hdrval;
340}
341
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000342#if ENABLE_FEATURE_WGET_LONG_OPTIONS
343static char *URL_escape(const char *str)
344{
345 /* URL encode, see RFC 2396 */
346 char *dst;
347 char *res = dst = xmalloc(strlen(str) * 3 + 1);
348 unsigned char c;
349
350 while (1) {
351 c = *str++;
352 if (c == '\0'
353 /* || strchr("!&'()*-.=_~", c) - more code */
354 || c == '!'
355 || c == '&'
356 || c == '\''
357 || c == '('
358 || c == ')'
359 || c == '*'
360 || c == '-'
361 || c == '.'
362 || c == '='
363 || c == '_'
364 || c == '~'
365 || (c >= '0' && c <= '9')
366 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
367 ) {
368 *dst++ = c;
369 if (c == '\0')
370 return res;
371 } else {
372 *dst++ = '%';
373 *dst++ = bb_hexdigits_upcase[c >> 4];
374 *dst++ = bb_hexdigits_upcase[c & 0xf];
375 }
376 }
377}
378#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000379
Denys Vlasenko7f432802009-06-28 01:02:24 +0200380static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
381{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200382 FILE *sfp;
383 char *str;
384 int port;
385
386 if (!target->user)
387 target->user = xstrdup("anonymous:busybox@");
388
389 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 if (ftpcmd(NULL, NULL, sfp) != 220)
391 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200392
393 /*
394 * Splitting username:password pair,
395 * trying to log in
396 */
397 str = strchr(target->user, ':');
398 if (str)
399 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100400 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200401 case 230:
402 break;
403 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100404 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405 break;
406 /* fall through (failed login) */
407 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 }
410
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100411 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200412
413 /*
414 * Querying file size
415 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100416 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
417 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100418 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200419 bb_error_msg_and_die("SIZE value is garbage");
420 }
421 G.got_clen = 1;
422 }
423
424 /*
425 * Entering passive mode
426 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 }
431 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
432 // Server's IP is N1.N2.N3.N4 (we ignore it)
433 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100434 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200435 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100436 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200437 if (!str) goto pasv_error;
438 port = xatou_range(str+1, 0, 255);
439 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100440 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200441 if (!str) goto pasv_error;
442 port += xatou_range(str+1, 0, 255) * 256;
443 set_nport(lsa, htons(port));
444
445 *dfpp = open_socket(lsa);
446
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100447 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100448 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
449 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100450 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200451 }
452
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100453 if (ftpcmd("RETR ", target->path, sfp) > 150)
454 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200455
456 return sfp;
457}
458
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
460{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200461#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
462# if ENABLE_FEATURE_WGET_TIMEOUT
463 unsigned second_cnt;
464# endif
465 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200466
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200467 polldata.fd = fileno(dfp);
468 polldata.events = POLLIN | POLLPRI;
Denys Vlasenkoda0df472010-08-08 04:21:50 +0200469 ndelay_on(polldata.fd);
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200470#endif
471 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472
473 if (G.chunked)
474 goto get_clen;
475
476 /* Loops only if chunked */
477 while (1) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100478 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200479 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100480 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200481
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100482 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100483 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100484 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100485 if ((int)G.content_len <= 0)
486 break;
487 rdsz = (unsigned)G.content_len;
488 }
489 }
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200490#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
491# if ENABLE_FEATURE_WGET_TIMEOUT
492 second_cnt = G.timeout_seconds;
493# endif
494 while (1) {
495 if (safe_poll(&polldata, 1, 1000) != 0)
496 break; /* error, EOF, or data is available */
497# if ENABLE_FEATURE_WGET_TIMEOUT
498 if (second_cnt != 0 && --second_cnt == 0) {
499 progress_meter(PROGRESS_END);
500 bb_perror_msg_and_die("download timed out");
501 }
502# endif
503 /* Needed for "stalled" indicator */
504 progress_meter(PROGRESS_BUMP);
505 }
506#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100507 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200508 if (n <= 0) {
509 if (ferror(dfp)) {
510 /* perror will not work: ferror doesn't set errno */
511 bb_error_msg_and_die(bb_msg_read_error);
512 }
513 break;
514 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100515 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200516#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100517 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200518 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200519#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100520 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100521 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100522 if (G.content_len == 0)
523 break;
524 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200525 }
526
527 if (!G.chunked)
528 break;
529
Denys Vlasenkof836f012011-02-10 23:02:28 +0100530 fgets_and_trim(dfp); /* This is a newline */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200531 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100532 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100533 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200534 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100535 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200536 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100537 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200538 }
539
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200540 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200541}
542
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000543int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000544int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000545{
Eric Andersen79757c92001-04-05 21:45:54 +0000546 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000547 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000548 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200549 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200550 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000551 char *dir_prefix = NULL;
552#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000553 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000554 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000555 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000556#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200557 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000558 FILE *dfp; /* socket to ftp server (data) */
559 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000560 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200561 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000562 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000563 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000564
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000565 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000566 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000567 enum {
568 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
569 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000570#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000571 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000572 /* name, has_arg, val */
573 "continue\0" No_argument "c"
574 "spider\0" No_argument "s"
575 "quiet\0" No_argument "q"
576 "output-document\0" Required_argument "O"
577 "directory-prefix\0" Required_argument "P"
578 "proxy\0" Required_argument "Y"
579 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200580#if ENABLE_FEATURE_WGET_TIMEOUT
581 "timeout\0" Required_argument "T"
582#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000583 /* Ignored: */
584 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000585 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000586 "passive-ftp\0" No_argument "\xff"
587 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000588 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100589 /* Ignored (we don't do ssl) */
590 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000591 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000592#endif
593
594 INIT_G();
595
596#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000597 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000598#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000599 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200600 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
601 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000602 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000603 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200604 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
605 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000606 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
607 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000608 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000609#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000610 if (headers_llist) {
611 int size = 1;
612 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000613 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000614 while (ll) {
615 size += strlen(ll->data) + 2;
616 ll = ll->link;
617 }
618 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000619 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000620 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000621 }
622 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000623#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000624
Denys Vlasenko7f432802009-06-28 01:02:24 +0200625 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200626
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200627 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000628 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000629
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000630 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200631 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000632 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000633 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200634 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100635 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000636 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000637 } else {
638 use_proxy = 0;
639 }
Robert Griebld7760112002-05-14 23:36:45 +0000640 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200641 if (!use_proxy) {
642 server.port = target.port;
643 if (ENABLE_FEATURE_IPV6) {
644 server.host = xstrdup(target.host);
645 } else {
646 server.host = target.host;
647 }
648 }
649
650 if (ENABLE_FEATURE_IPV6)
651 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000652
Denis Vlasenko818322b2007-09-24 18:27:04 +0000653 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000654 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000655 fname_out = bb_get_last_path_component_nostrip(target.path);
656 /* handle "wget http://kernel.org//" */
657 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000658 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000659 /* -P DIR is considered only if there was no -O FILE */
660 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000661 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000662 } else {
663 if (LONE_DASH(fname_out)) {
664 /* -O - */
665 output_fd = 1;
666 opt &= ~WGET_OPT_CONTINUE;
667 }
Eric Andersen29edd002000-12-09 16:55:35 +0000668 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000669#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100670 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000671#endif
672
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000673 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000674 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100675 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200676 */
Eric Andersen29edd002000-12-09 16:55:35 +0000677
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000678 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000679 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000680 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000681 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100682 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000683 }
684 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200685 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000686 }
687
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200688 redir_limit = 5;
689 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000690 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000691 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200692 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
693 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
694 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000695 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200696 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000697 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000698 /*
699 * HTTP session
700 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200701 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200702 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200703
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200704 /* Open socket to http server */
705 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200706
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200707 /* Send HTTP request */
708 if (use_proxy) {
709 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
710 target.is_ftp ? "f" : "ht", target.host,
711 target.path);
712 } else {
713 if (opt & WGET_OPT_POST_DATA)
714 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
715 else
716 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
717 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000718
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200719 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
720 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000721
Denys Vlasenko9213a552011-02-10 13:23:45 +0100722 /* Ask server to close the connection as soon as we are done
723 * (IOW: we do not intend to send more requests)
724 */
725 fprintf(sfp, "Connection: close\r\n");
726
Denis Vlasenko9cade082006-11-21 10:43:02 +0000727#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200728 if (target.user) {
729 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100730 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200731 }
732 if (use_proxy && server.user) {
733 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100734 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200735 }
Eric Andersen79757c92001-04-05 21:45:54 +0000736#endif
737
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100738 if (G.beg_range)
739 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100740
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000741#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200742 if (extra_headers)
743 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000744
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200745 if (opt & WGET_OPT_POST_DATA) {
746 char *estr = URL_escape(post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100747 fprintf(sfp,
748 "Content-Type: application/x-www-form-urlencoded\r\n"
749 "Content-Length: %u\r\n"
750 "\r\n"
751 "%s",
752 (int) strlen(estr), estr
753 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200754 free(estr);
755 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000756#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100757 {
758 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200759 }
Eric Andersen79757c92001-04-05 21:45:54 +0000760
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200761 fflush(sfp);
762
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200763 /*
764 * Retrieve HTTP response line and check for "200" status code.
765 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000766 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100767 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000768
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100769 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200770 str = skip_non_whitespace(str);
771 str = skip_whitespace(str);
772 // FIXME: no error check
773 // xatou wouldn't work: "200 OK"
774 status = atoi(str);
775 switch (status) {
776 case 0:
777 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100778 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200779 /* eat all remaining headers */;
780 goto read_response;
781 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000782/*
783Response 204 doesn't say "null file", it says "metadata
784has changed but data didn't":
785
786"10.2.5 204 No Content
787The server has fulfilled the request but does not need to return
788an entity-body, and might want to return updated metainformation.
789The response MAY include new or updated metainformation in the form
790of entity-headers, which if present SHOULD be associated with
791the requested variant.
792
793If the client is a user agent, it SHOULD NOT change its document
794view from that which caused the request to be sent. This response
795is primarily intended to allow input for actions to take place
796without causing a change to the user agent's active document view,
797although any new or updated metainformation SHOULD be applied
798to the document currently in the user agent's active view.
799
800The 204 response MUST NOT include a message-body, and thus
801is always terminated by the first empty line after the header fields."
802
803However, in real world it was observed that some web servers
804(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
805*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200806 case 204:
807 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200808 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200809 case 301:
810 case 302:
811 case 303:
812 break;
813 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100814 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000815 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200816 /* fall through */
817 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100818 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200819 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000820
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200821 /*
822 * Retrieve HTTP headers.
823 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100824 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200825 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200826 smalluint key;
827 /* strip trailing whitespace */
828 char *s = strchrnul(str, '\0') - 1;
829 while (s >= str && (*s == ' ' || *s == '\t')) {
830 *s = '\0';
831 s--;
832 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100833 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200834 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100835 G.content_len = BB_STRTOOFF(str, NULL, 10);
836 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200837 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000838 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200839 G.got_clen = 1;
840 continue;
841 }
842 if (key == KEY_transfer_encoding) {
843 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
844 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
845 G.chunked = G.got_clen = 1;
846 }
847 if (key == KEY_location && status >= 300) {
848 if (--redir_limit == 0)
849 bb_error_msg_and_die("too many redirections");
850 fclose(sfp);
851 G.got_clen = 0;
852 G.chunked = 0;
853 if (str[0] == '/')
854 /* free(target.allocated); */
855 target.path = /* target.allocated = */ xstrdup(str+1);
856 /* lsa stays the same: it's on the same server */
857 else {
858 parse_url(str, &target);
859 if (!use_proxy) {
860 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200861 /* strip_ipv6_scope_id(target.host); - no! */
862 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200863 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000864 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200865 goto resolve_lsa;
866 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000867 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200868 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000869 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200870 }
871// if (status >= 300)
872// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000873
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200874 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000875 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000876
877 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000878 /*
879 * FTP session
880 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200881 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000882 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000883
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000884 if (opt & WGET_OPT_SPIDER) {
885 if (ENABLE_FEATURE_CLEAN_UP)
886 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000887 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000888 }
Eric Andersen79757c92001-04-05 21:45:54 +0000889
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000890 if (output_fd < 0) {
891 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
892 /* compat with wget: -O FILE can overwrite */
893 if (opt & WGET_OPT_OUTNAME)
894 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
895 output_fd = xopen(fname_out, o_flags);
896 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000897
Denys Vlasenko7f432802009-06-28 01:02:24 +0200898 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100899 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000900
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200901 if (dfp != sfp) {
902 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000903 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100904 if (ftpcmd(NULL, NULL, sfp) != 226)
905 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
906 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000907 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000908
909 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000910}