blob: 44eb4cf188a1bc9cffabe30dc8f208ee39ce3e37 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000018 // May be used if we ever will want to free() all xstrdup()s...
19 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000020 const char *path;
21 const char *user;
22 char *host;
23 int port;
24 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000025};
26
Denis Vlasenko77105632007-09-24 15:04:00 +000027
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020028/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000029struct globals {
30 off_t content_len; /* Content-length of the file */
31 off_t beg_range; /* Range at which continue begins */
32#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000033 off_t transferred; /* Number of bytes transferred so far */
34 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010035 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000036#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020037#if ENABLE_FEATURE_WGET_TIMEOUT
38 unsigned timeout_seconds;
39#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020040 smallint chunked; /* chunked transfer encoding */
41 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010042 /* Local downloads do benefit from big buffer.
43 * With 512 byte buffer, it was measured to be
44 * an order of magnitude slower than with big one.
45 */
46 uint64_t just_to_align_next_member;
47 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010048} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010049#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020050#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010051 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
53} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000054
55
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Must match option string! */
57enum {
58 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020059 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020060 WGET_OPT_QUIET = (1 << 2),
61 WGET_OPT_OUTNAME = (1 << 3),
62 WGET_OPT_PREFIX = (1 << 4),
63 WGET_OPT_PROXY = (1 << 5),
64 WGET_OPT_USER_AGENT = (1 << 6),
65 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
66 WGET_OPT_RETRIES = (1 << 8),
67 WGET_OPT_PASSIVE = (1 << 9),
68 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
69 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
70};
71
72enum {
73 PROGRESS_START = -1,
74 PROGRESS_END = 0,
75 PROGRESS_BUMP = 1,
76};
Denis Vlasenko9cade082006-11-21 10:43:02 +000077#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000078static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000079{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020080 if (option_mask32 & WGET_OPT_QUIET)
81 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000082
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020083 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010084 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000085
Denys Vlasenkod55e1392011-02-11 18:56:13 +010086 bb_progress_update(&G.pmt, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020087 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020090 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010091 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000092 }
93}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000095static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000096#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000097
Denis Vlasenko47ddd012007-09-24 18:24:17 +000098
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020099/* IPv6 knows scoped address types i.e. link and site local addresses. Link
100 * local addresses can have a scope identifier to specify the
101 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
102 * identifier is only valid on a single node.
103 *
104 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
105 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
106 * in the Host header as invalid requests, see
107 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
108 */
109static void strip_ipv6_scope_id(char *host)
110{
111 char *scope, *cp;
112
113 /* bbox wget actually handles IPv6 addresses without [], like
114 * wget "http://::1/xxx", but this is not standard.
115 * To save code, _here_ we do not support it. */
116
117 if (host[0] != '[')
118 return; /* not IPv6 */
119
120 scope = strchr(host, '%');
121 if (!scope)
122 return;
123
124 /* Remove the IPv6 zone identifier from the host address */
125 cp = strchr(host, ']');
126 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
127 /* malformed address (not "[xx]:nn" or "[xx]") */
128 return;
129 }
130
131 /* cp points to "]...", scope points to "%eth0]..." */
132 overlapping_strcpy(scope, cp);
133}
134
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100135#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000136/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
137 * and a short count if an eof or non-interrupt error is encountered. */
138static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000139{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000140 size_t ret;
141 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000142
143 do {
144 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000145 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000146 ret = fread(p, 1, nmemb, stream);
147 p += ret;
148 nmemb -= ret;
149 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000150
Denis Vlasenko12d21292007-06-27 21:40:07 +0000151 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000152}
153
Denis Vlasenko12d21292007-06-27 21:40:07 +0000154/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000155 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
156static char *safe_fgets(char *s, int size, FILE *stream)
157{
158 char *ret;
159
160 do {
161 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000162 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000163 ret = fgets(s, size, stream);
164 } while (ret == NULL && ferror(stream) && errno == EINTR);
165
166 return ret;
167}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100168#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000169
Denis Vlasenko9cade082006-11-21 10:43:02 +0000170#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100171/* Base64-encode character string. */
172static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000173{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000174 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100175 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
176 len = sizeof(G.wget_buf)/4*3 - 10;
177 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
178 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000179}
180#endif
181
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200182static char* sanitize_string(char *s)
183{
184 unsigned char *p = (void *) s;
185 while (*p >= ' ')
186 p++;
187 *p = '\0';
188 return s;
189}
190
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000191static FILE *open_socket(len_and_sockaddr *lsa)
192{
193 FILE *fp;
194
195 /* glibc 2.4 seems to try seeking on it - ??! */
196 /* hopefully it understands what ESPIPE means... */
197 fp = fdopen(xconnect_stream(lsa), "r+");
198 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100199 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000200
201 return fp;
202}
203
Denys Vlasenkof836f012011-02-10 23:02:28 +0100204/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
205static char fgets_and_trim(FILE *fp)
206{
207 char c;
208 char *buf_ptr;
209
210 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
211 bb_perror_msg_and_die("error getting response");
212
213 buf_ptr = strchrnul(G.wget_buf, '\n');
214 c = *buf_ptr;
215 *buf_ptr = '\0';
216 buf_ptr = strchrnul(G.wget_buf, '\r');
217 *buf_ptr = '\0';
218
219 log_io("< %s", G.wget_buf);
220
221 return c;
222}
223
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100224static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000225{
226 int result;
227 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100228 if (!s2)
229 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000230 fprintf(fp, "%s%s\r\n", s1, s2);
231 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100232 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000233 }
234
235 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100236 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100237 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000238
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100239 G.wget_buf[3] = '\0';
240 result = xatoi_positive(G.wget_buf);
241 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000242 return result;
243}
244
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245static void parse_url(char *src_url, struct host_info *h)
246{
247 char *url, *p, *sp;
248
249 /* h->allocated = */ url = xstrdup(src_url);
250
251 if (strncmp(url, "http://", 7) == 0) {
252 h->port = bb_lookup_port("http", "tcp", 80);
253 h->host = url + 7;
254 h->is_ftp = 0;
255 } else if (strncmp(url, "ftp://", 6) == 0) {
256 h->port = bb_lookup_port("ftp", "tcp", 21);
257 h->host = url + 6;
258 h->is_ftp = 1;
259 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200260 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261
262 // FYI:
263 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
264 // 'GET /?var=a/b HTTP 1.0'
265 // and saves 'index.html?var=a%2Fb' (we save 'b')
266 // wget 'http://busybox.net?login=john@doe':
267 // request: 'GET /?login=john@doe HTTP/1.0'
268 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
269 // wget 'http://busybox.net#test/test':
270 // request: 'GET / HTTP/1.0'
271 // saves: 'index.html' (we save 'test')
272 //
273 // We also don't add unique .N suffix if file exists...
274 sp = strchr(h->host, '/');
275 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
276 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
277 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000278 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000279 } else if (*sp == '/') {
280 *sp = '\0';
281 h->path = sp + 1;
282 } else { // '#' or '?'
283 // http://busybox.net?login=john@doe is a valid URL
284 // memmove converts to:
285 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000286 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000287 h->host--;
288 sp[-1] = '\0';
289 h->path = sp;
290 }
291
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200292 // We used to set h->user to NULL here, but this interferes
293 // with handling of code 302 ("object was moved")
294
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 if (sp != NULL) {
297 h->user = h->host;
298 *sp = '\0';
299 h->host = sp + 1;
300 }
301
302 sp = h->host;
303}
304
Denys Vlasenkof836f012011-02-10 23:02:28 +0100305static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306{
307 char *s, *hdrval;
308 int c;
309
310 /* *istrunc = 0; */
311
312 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100313 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000314
Denys Vlasenkof836f012011-02-10 23:02:28 +0100315 /* end of the headers? */
316 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000317 return NULL;
318
319 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100320 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200321 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100322 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200323 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000324
325 /* verify we are at the end of the header name */
326 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100327 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328
329 /* locate the start of the header value */
330 *s++ = '\0';
331 hdrval = skip_whitespace(s);
332
Denys Vlasenkof836f012011-02-10 23:02:28 +0100333 if (c != '\n') {
334 /* Rats! The buffer isn't big enough to hold the entire header value */
335 while (c = getc(fp), c != EOF && c != '\n')
336 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337 }
338
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000339 return hdrval;
340}
341
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000342#if ENABLE_FEATURE_WGET_LONG_OPTIONS
343static char *URL_escape(const char *str)
344{
345 /* URL encode, see RFC 2396 */
346 char *dst;
347 char *res = dst = xmalloc(strlen(str) * 3 + 1);
348 unsigned char c;
349
350 while (1) {
351 c = *str++;
352 if (c == '\0'
353 /* || strchr("!&'()*-.=_~", c) - more code */
354 || c == '!'
355 || c == '&'
356 || c == '\''
357 || c == '('
358 || c == ')'
359 || c == '*'
360 || c == '-'
361 || c == '.'
362 || c == '='
363 || c == '_'
364 || c == '~'
365 || (c >= '0' && c <= '9')
366 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
367 ) {
368 *dst++ = c;
369 if (c == '\0')
370 return res;
371 } else {
372 *dst++ = '%';
373 *dst++ = bb_hexdigits_upcase[c >> 4];
374 *dst++ = bb_hexdigits_upcase[c & 0xf];
375 }
376 }
377}
378#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000379
Denys Vlasenko7f432802009-06-28 01:02:24 +0200380static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
381{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200382 FILE *sfp;
383 char *str;
384 int port;
385
386 if (!target->user)
387 target->user = xstrdup("anonymous:busybox@");
388
389 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 if (ftpcmd(NULL, NULL, sfp) != 220)
391 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200392
393 /*
394 * Splitting username:password pair,
395 * trying to log in
396 */
397 str = strchr(target->user, ':');
398 if (str)
399 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100400 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200401 case 230:
402 break;
403 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100404 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405 break;
406 /* fall through (failed login) */
407 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 }
410
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100411 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200412
413 /*
414 * Querying file size
415 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100416 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
417 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100418 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200419 bb_error_msg_and_die("SIZE value is garbage");
420 }
421 G.got_clen = 1;
422 }
423
424 /*
425 * Entering passive mode
426 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 }
431 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
432 // Server's IP is N1.N2.N3.N4 (we ignore it)
433 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100434 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200435 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100436 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200437 if (!str) goto pasv_error;
438 port = xatou_range(str+1, 0, 255);
439 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100440 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200441 if (!str) goto pasv_error;
442 port += xatou_range(str+1, 0, 255) * 256;
443 set_nport(lsa, htons(port));
444
445 *dfpp = open_socket(lsa);
446
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100447 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100448 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
449 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100450 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200451 }
452
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100453 if (ftpcmd("RETR ", target->path, sfp) > 150)
454 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200455
456 return sfp;
457}
458
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
460{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200461#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
462# if ENABLE_FEATURE_WGET_TIMEOUT
463 unsigned second_cnt;
464# endif
465 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200466
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200467 polldata.fd = fileno(dfp);
468 polldata.events = POLLIN | POLLPRI;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200469#endif
470 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200471
472 if (G.chunked)
473 goto get_clen;
474
475 /* Loops only if chunked */
476 while (1) {
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100477
478#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
479 /* Must use nonblocking I/O, otherwise fread will loop
480 * and *block* until it reads full buffer,
481 * which messes up progress bar and/or timeout logic.
482 * Because of nonblocking I/O, we need to dance
483 * very carefully around EAGAIN. See explanation at
484 * clearerr() call.
485 */
486 ndelay_on(polldata.fd);
487#endif
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100488 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200489 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100490 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200491
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100492 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100493 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100494 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100495 if ((int)G.content_len <= 0)
496 break;
497 rdsz = (unsigned)G.content_len;
498 }
499 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100500
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200501#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
502# if ENABLE_FEATURE_WGET_TIMEOUT
503 second_cnt = G.timeout_seconds;
504# endif
505 while (1) {
506 if (safe_poll(&polldata, 1, 1000) != 0)
507 break; /* error, EOF, or data is available */
508# if ENABLE_FEATURE_WGET_TIMEOUT
509 if (second_cnt != 0 && --second_cnt == 0) {
510 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100511 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200512 }
513# endif
514 /* Needed for "stalled" indicator */
515 progress_meter(PROGRESS_BUMP);
516 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100517
Denys Vlasenko8766a792011-02-11 21:42:00 +0100518 /* fread internally uses read loop, which in our case
519 * is usually exited when we get EAGAIN.
520 * In this case, libc sets error marker on the stream.
521 * Need to clear it before next fread to avoid possible
522 * rare false positive ferror below. Rare because usually
523 * fread gets more than zero bytes, and we don't fall
524 * into if (n <= 0) ...
525 */
526 clearerr(dfp);
527 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100528#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100529 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100530 /* man fread:
531 * If error occurs, or EOF is reached, the return value
532 * is a short item count (or zero).
533 * fread does not distinguish between EOF and error.
534 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200535 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100536#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
537 if (errno == EAGAIN) /* poll lied, there is no data? */
538 continue; /* yes */
539#endif
540 if (ferror(dfp))
541 bb_perror_msg_and_die(bb_msg_read_error);
542 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200543 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100544
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100545 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100546
Denys Vlasenko7f432802009-06-28 01:02:24 +0200547#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100548 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200549 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200550#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100551 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100552 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100553 if (G.content_len == 0)
554 break;
555 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200556 }
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100557#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
558 clearerr(dfp);
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100559 ndelay_off(polldata.fd); /* else fgets can get very unhappy */
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100560#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200561 if (!G.chunked)
562 break;
563
Denys Vlasenkoc60f4462011-02-11 22:23:23 +0100564 fgets_and_trim(dfp); /* Eat empty line */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200565 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100566 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100567 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200568 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100569 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200570 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100571 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200572 }
573
Denys Vlasenko88ad9da2011-02-11 23:06:21 +0100574 G.chunked = 0; /* make progress meter show 100% even for chunked */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200575 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200576}
577
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000578int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000579int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000580{
Eric Andersen79757c92001-04-05 21:45:54 +0000581 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000582 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000583 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200584 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200585 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000586 char *dir_prefix = NULL;
587#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000588 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000589 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000590 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000591#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200592 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000593 FILE *dfp; /* socket to ftp server (data) */
594 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000595 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200596 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000597 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000598 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000599
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000600 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000601 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000602 enum {
603 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
604 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000605#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000606 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000607 /* name, has_arg, val */
608 "continue\0" No_argument "c"
609 "spider\0" No_argument "s"
610 "quiet\0" No_argument "q"
611 "output-document\0" Required_argument "O"
612 "directory-prefix\0" Required_argument "P"
613 "proxy\0" Required_argument "Y"
614 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200615#if ENABLE_FEATURE_WGET_TIMEOUT
616 "timeout\0" Required_argument "T"
617#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000618 /* Ignored: */
619 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000620 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000621 "passive-ftp\0" No_argument "\xff"
622 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000623 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100624 /* Ignored (we don't do ssl) */
625 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000626 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000627#endif
628
629 INIT_G();
630
631#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000632 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000633#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000634 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200635 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
636 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000637 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000638 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200639 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
640 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000641 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
642 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000643 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000644#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000645 if (headers_llist) {
646 int size = 1;
647 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000648 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000649 while (ll) {
650 size += strlen(ll->data) + 2;
651 ll = ll->link;
652 }
653 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000654 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000655 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000656 }
657 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000658#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000659
Denys Vlasenko7f432802009-06-28 01:02:24 +0200660 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200661
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200662 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000663 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000664
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000665 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200666 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000667 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000668 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200669 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100670 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000671 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000672 } else {
673 use_proxy = 0;
674 }
Robert Griebld7760112002-05-14 23:36:45 +0000675 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200676 if (!use_proxy) {
677 server.port = target.port;
678 if (ENABLE_FEATURE_IPV6) {
679 server.host = xstrdup(target.host);
680 } else {
681 server.host = target.host;
682 }
683 }
684
685 if (ENABLE_FEATURE_IPV6)
686 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000687
Denis Vlasenko818322b2007-09-24 18:27:04 +0000688 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000689 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000690 fname_out = bb_get_last_path_component_nostrip(target.path);
691 /* handle "wget http://kernel.org//" */
692 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000693 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000694 /* -P DIR is considered only if there was no -O FILE */
695 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000696 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000697 } else {
698 if (LONE_DASH(fname_out)) {
699 /* -O - */
700 output_fd = 1;
701 opt &= ~WGET_OPT_CONTINUE;
702 }
Eric Andersen29edd002000-12-09 16:55:35 +0000703 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000704#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100705 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000706#endif
707
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000708 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000709 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100710 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200711 */
Eric Andersen29edd002000-12-09 16:55:35 +0000712
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000713 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000714 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000715 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000716 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100717 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000718 }
719 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200720 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000721 }
722
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200723 redir_limit = 5;
724 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000725 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000726 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200727 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
728 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
729 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000730 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200731 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000732 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000733 /*
734 * HTTP session
735 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200736 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200737 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200738
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200739 /* Open socket to http server */
740 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200741
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200742 /* Send HTTP request */
743 if (use_proxy) {
744 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
745 target.is_ftp ? "f" : "ht", target.host,
746 target.path);
747 } else {
748 if (opt & WGET_OPT_POST_DATA)
749 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
750 else
751 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
752 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000753
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200754 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
755 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000756
Denys Vlasenko9213a552011-02-10 13:23:45 +0100757 /* Ask server to close the connection as soon as we are done
758 * (IOW: we do not intend to send more requests)
759 */
760 fprintf(sfp, "Connection: close\r\n");
761
Denis Vlasenko9cade082006-11-21 10:43:02 +0000762#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200763 if (target.user) {
764 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100765 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200766 }
767 if (use_proxy && server.user) {
768 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100769 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200770 }
Eric Andersen79757c92001-04-05 21:45:54 +0000771#endif
772
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100773 if (G.beg_range)
774 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100775
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000776#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200777 if (extra_headers)
778 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000779
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200780 if (opt & WGET_OPT_POST_DATA) {
781 char *estr = URL_escape(post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100782 fprintf(sfp,
783 "Content-Type: application/x-www-form-urlencoded\r\n"
784 "Content-Length: %u\r\n"
785 "\r\n"
786 "%s",
787 (int) strlen(estr), estr
788 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200789 free(estr);
790 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000791#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100792 {
793 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200794 }
Eric Andersen79757c92001-04-05 21:45:54 +0000795
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200796 fflush(sfp);
797
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200798 /*
799 * Retrieve HTTP response line and check for "200" status code.
800 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000801 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100802 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000803
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100804 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200805 str = skip_non_whitespace(str);
806 str = skip_whitespace(str);
807 // FIXME: no error check
808 // xatou wouldn't work: "200 OK"
809 status = atoi(str);
810 switch (status) {
811 case 0:
812 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100813 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200814 /* eat all remaining headers */;
815 goto read_response;
816 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000817/*
818Response 204 doesn't say "null file", it says "metadata
819has changed but data didn't":
820
821"10.2.5 204 No Content
822The server has fulfilled the request but does not need to return
823an entity-body, and might want to return updated metainformation.
824The response MAY include new or updated metainformation in the form
825of entity-headers, which if present SHOULD be associated with
826the requested variant.
827
828If the client is a user agent, it SHOULD NOT change its document
829view from that which caused the request to be sent. This response
830is primarily intended to allow input for actions to take place
831without causing a change to the user agent's active document view,
832although any new or updated metainformation SHOULD be applied
833to the document currently in the user agent's active view.
834
835The 204 response MUST NOT include a message-body, and thus
836is always terminated by the first empty line after the header fields."
837
838However, in real world it was observed that some web servers
839(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
840*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200841 case 204:
842 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200843 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200844 case 301:
845 case 302:
846 case 303:
847 break;
848 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100849 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000850 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200851 /* fall through */
852 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100853 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200854 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000855
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200856 /*
857 * Retrieve HTTP headers.
858 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100859 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200860 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200861 smalluint key;
862 /* strip trailing whitespace */
863 char *s = strchrnul(str, '\0') - 1;
864 while (s >= str && (*s == ' ' || *s == '\t')) {
865 *s = '\0';
866 s--;
867 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100868 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200869 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100870 G.content_len = BB_STRTOOFF(str, NULL, 10);
871 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200872 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000873 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200874 G.got_clen = 1;
875 continue;
876 }
877 if (key == KEY_transfer_encoding) {
878 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
879 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
880 G.chunked = G.got_clen = 1;
881 }
882 if (key == KEY_location && status >= 300) {
883 if (--redir_limit == 0)
884 bb_error_msg_and_die("too many redirections");
885 fclose(sfp);
886 G.got_clen = 0;
887 G.chunked = 0;
888 if (str[0] == '/')
889 /* free(target.allocated); */
890 target.path = /* target.allocated = */ xstrdup(str+1);
891 /* lsa stays the same: it's on the same server */
892 else {
893 parse_url(str, &target);
894 if (!use_proxy) {
895 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200896 /* strip_ipv6_scope_id(target.host); - no! */
897 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200898 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000899 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200900 goto resolve_lsa;
901 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000902 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200903 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000904 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200905 }
906// if (status >= 300)
907// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000908
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200909 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000910 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000911
912 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000913 /*
914 * FTP session
915 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200916 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000917 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000918
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000919 if (opt & WGET_OPT_SPIDER) {
920 if (ENABLE_FEATURE_CLEAN_UP)
921 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000922 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000923 }
Eric Andersen79757c92001-04-05 21:45:54 +0000924
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000925 if (output_fd < 0) {
926 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
927 /* compat with wget: -O FILE can overwrite */
928 if (opt & WGET_OPT_OUTNAME)
929 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
930 output_fd = xopen(fname_out, o_flags);
931 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000932
Denys Vlasenko7f432802009-06-28 01:02:24 +0200933 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100934 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000935
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200936 if (dfp != sfp) {
937 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000938 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100939 if (ftpcmd(NULL, NULL, sfp) != 226)
940 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
941 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000942 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000943
944 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000945}