blob: 673113bfc616fe0375a37cd4733543af8477f6b9 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000018 // May be used if we ever will want to free() all xstrdup()s...
19 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000020 const char *path;
21 const char *user;
22 char *host;
23 int port;
24 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000025};
26
Denis Vlasenko77105632007-09-24 15:04:00 +000027
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020028/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000029struct globals {
30 off_t content_len; /* Content-length of the file */
31 off_t beg_range; /* Range at which continue begins */
32#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000033 off_t transferred; /* Number of bytes transferred so far */
34 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010035 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000036#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020037#if ENABLE_FEATURE_WGET_TIMEOUT
38 unsigned timeout_seconds;
39#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020040 smallint chunked; /* chunked transfer encoding */
41 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010042 /* Local downloads do benefit from big buffer.
43 * With 512 byte buffer, it was measured to be
44 * an order of magnitude slower than with big one.
45 */
46 uint64_t just_to_align_next_member;
47 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010048} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010049#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020050#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010051 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
53} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000054
55
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Must match option string! */
57enum {
58 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020059 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020060 WGET_OPT_QUIET = (1 << 2),
61 WGET_OPT_OUTNAME = (1 << 3),
62 WGET_OPT_PREFIX = (1 << 4),
63 WGET_OPT_PROXY = (1 << 5),
64 WGET_OPT_USER_AGENT = (1 << 6),
65 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
66 WGET_OPT_RETRIES = (1 << 8),
67 WGET_OPT_PASSIVE = (1 << 9),
68 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
69 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
70};
71
72enum {
73 PROGRESS_START = -1,
74 PROGRESS_END = 0,
75 PROGRESS_BUMP = 1,
76};
Denis Vlasenko9cade082006-11-21 10:43:02 +000077#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000078static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000079{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020080 if (option_mask32 & WGET_OPT_QUIET)
81 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000082
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020083 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010084 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000085
Denys Vlasenkod55e1392011-02-11 18:56:13 +010086 bb_progress_update(&G.pmt, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020087 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020090 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010091 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000092 }
93}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000095static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000096#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000097
Denis Vlasenko47ddd012007-09-24 18:24:17 +000098
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020099/* IPv6 knows scoped address types i.e. link and site local addresses. Link
100 * local addresses can have a scope identifier to specify the
101 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
102 * identifier is only valid on a single node.
103 *
104 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
105 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
106 * in the Host header as invalid requests, see
107 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
108 */
109static void strip_ipv6_scope_id(char *host)
110{
111 char *scope, *cp;
112
113 /* bbox wget actually handles IPv6 addresses without [], like
114 * wget "http://::1/xxx", but this is not standard.
115 * To save code, _here_ we do not support it. */
116
117 if (host[0] != '[')
118 return; /* not IPv6 */
119
120 scope = strchr(host, '%');
121 if (!scope)
122 return;
123
124 /* Remove the IPv6 zone identifier from the host address */
125 cp = strchr(host, ']');
126 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
127 /* malformed address (not "[xx]:nn" or "[xx]") */
128 return;
129 }
130
131 /* cp points to "]...", scope points to "%eth0]..." */
132 overlapping_strcpy(scope, cp);
133}
134
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100135#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000136/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
137 * and a short count if an eof or non-interrupt error is encountered. */
138static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000139{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000140 size_t ret;
141 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000142
143 do {
144 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000145 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000146 ret = fread(p, 1, nmemb, stream);
147 p += ret;
148 nmemb -= ret;
149 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000150
Denis Vlasenko12d21292007-06-27 21:40:07 +0000151 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000152}
153
Denis Vlasenko12d21292007-06-27 21:40:07 +0000154/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000155 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
156static char *safe_fgets(char *s, int size, FILE *stream)
157{
158 char *ret;
159
160 do {
161 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000162 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000163 ret = fgets(s, size, stream);
164 } while (ret == NULL && ferror(stream) && errno == EINTR);
165
166 return ret;
167}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100168#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000169
Denis Vlasenko9cade082006-11-21 10:43:02 +0000170#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100171/* Base64-encode character string. */
172static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000173{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000174 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100175 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
176 len = sizeof(G.wget_buf)/4*3 - 10;
177 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
178 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000179}
180#endif
181
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200182static char* sanitize_string(char *s)
183{
184 unsigned char *p = (void *) s;
185 while (*p >= ' ')
186 p++;
187 *p = '\0';
188 return s;
189}
190
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000191static FILE *open_socket(len_and_sockaddr *lsa)
192{
193 FILE *fp;
194
195 /* glibc 2.4 seems to try seeking on it - ??! */
196 /* hopefully it understands what ESPIPE means... */
197 fp = fdopen(xconnect_stream(lsa), "r+");
198 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100199 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000200
201 return fp;
202}
203
Denys Vlasenkof836f012011-02-10 23:02:28 +0100204/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
205static char fgets_and_trim(FILE *fp)
206{
207 char c;
208 char *buf_ptr;
209
210 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
211 bb_perror_msg_and_die("error getting response");
212
213 buf_ptr = strchrnul(G.wget_buf, '\n');
214 c = *buf_ptr;
215 *buf_ptr = '\0';
216 buf_ptr = strchrnul(G.wget_buf, '\r');
217 *buf_ptr = '\0';
218
219 log_io("< %s", G.wget_buf);
220
221 return c;
222}
223
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100224static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000225{
226 int result;
227 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100228 if (!s2)
229 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000230 fprintf(fp, "%s%s\r\n", s1, s2);
231 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100232 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000233 }
234
235 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100236 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100237 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000238
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100239 G.wget_buf[3] = '\0';
240 result = xatoi_positive(G.wget_buf);
241 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000242 return result;
243}
244
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245static void parse_url(char *src_url, struct host_info *h)
246{
247 char *url, *p, *sp;
248
249 /* h->allocated = */ url = xstrdup(src_url);
250
251 if (strncmp(url, "http://", 7) == 0) {
252 h->port = bb_lookup_port("http", "tcp", 80);
253 h->host = url + 7;
254 h->is_ftp = 0;
255 } else if (strncmp(url, "ftp://", 6) == 0) {
256 h->port = bb_lookup_port("ftp", "tcp", 21);
257 h->host = url + 6;
258 h->is_ftp = 1;
259 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200260 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261
262 // FYI:
263 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
264 // 'GET /?var=a/b HTTP 1.0'
265 // and saves 'index.html?var=a%2Fb' (we save 'b')
266 // wget 'http://busybox.net?login=john@doe':
267 // request: 'GET /?login=john@doe HTTP/1.0'
268 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
269 // wget 'http://busybox.net#test/test':
270 // request: 'GET / HTTP/1.0'
271 // saves: 'index.html' (we save 'test')
272 //
273 // We also don't add unique .N suffix if file exists...
274 sp = strchr(h->host, '/');
275 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
276 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
277 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000278 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000279 } else if (*sp == '/') {
280 *sp = '\0';
281 h->path = sp + 1;
282 } else { // '#' or '?'
283 // http://busybox.net?login=john@doe is a valid URL
284 // memmove converts to:
285 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000286 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000287 h->host--;
288 sp[-1] = '\0';
289 h->path = sp;
290 }
291
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200292 // We used to set h->user to NULL here, but this interferes
293 // with handling of code 302 ("object was moved")
294
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 if (sp != NULL) {
297 h->user = h->host;
298 *sp = '\0';
299 h->host = sp + 1;
300 }
301
302 sp = h->host;
303}
304
Denys Vlasenkof836f012011-02-10 23:02:28 +0100305static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306{
307 char *s, *hdrval;
308 int c;
309
310 /* *istrunc = 0; */
311
312 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100313 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000314
Denys Vlasenkof836f012011-02-10 23:02:28 +0100315 /* end of the headers? */
316 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000317 return NULL;
318
319 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100320 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200321 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100322 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200323 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000324
325 /* verify we are at the end of the header name */
326 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100327 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328
329 /* locate the start of the header value */
330 *s++ = '\0';
331 hdrval = skip_whitespace(s);
332
Denys Vlasenkof836f012011-02-10 23:02:28 +0100333 if (c != '\n') {
334 /* Rats! The buffer isn't big enough to hold the entire header value */
335 while (c = getc(fp), c != EOF && c != '\n')
336 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337 }
338
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000339 return hdrval;
340}
341
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000342#if ENABLE_FEATURE_WGET_LONG_OPTIONS
343static char *URL_escape(const char *str)
344{
345 /* URL encode, see RFC 2396 */
346 char *dst;
347 char *res = dst = xmalloc(strlen(str) * 3 + 1);
348 unsigned char c;
349
350 while (1) {
351 c = *str++;
352 if (c == '\0'
353 /* || strchr("!&'()*-.=_~", c) - more code */
354 || c == '!'
355 || c == '&'
356 || c == '\''
357 || c == '('
358 || c == ')'
359 || c == '*'
360 || c == '-'
361 || c == '.'
362 || c == '='
363 || c == '_'
364 || c == '~'
365 || (c >= '0' && c <= '9')
366 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
367 ) {
368 *dst++ = c;
369 if (c == '\0')
370 return res;
371 } else {
372 *dst++ = '%';
373 *dst++ = bb_hexdigits_upcase[c >> 4];
374 *dst++ = bb_hexdigits_upcase[c & 0xf];
375 }
376 }
377}
378#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000379
Denys Vlasenko7f432802009-06-28 01:02:24 +0200380static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
381{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200382 FILE *sfp;
383 char *str;
384 int port;
385
386 if (!target->user)
387 target->user = xstrdup("anonymous:busybox@");
388
389 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 if (ftpcmd(NULL, NULL, sfp) != 220)
391 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200392
393 /*
394 * Splitting username:password pair,
395 * trying to log in
396 */
397 str = strchr(target->user, ':');
398 if (str)
399 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100400 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200401 case 230:
402 break;
403 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100404 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405 break;
406 /* fall through (failed login) */
407 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 }
410
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100411 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200412
413 /*
414 * Querying file size
415 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100416 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
417 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100418 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200419 bb_error_msg_and_die("SIZE value is garbage");
420 }
421 G.got_clen = 1;
422 }
423
424 /*
425 * Entering passive mode
426 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 }
431 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
432 // Server's IP is N1.N2.N3.N4 (we ignore it)
433 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100434 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200435 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100436 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200437 if (!str) goto pasv_error;
438 port = xatou_range(str+1, 0, 255);
439 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100440 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200441 if (!str) goto pasv_error;
442 port += xatou_range(str+1, 0, 255) * 256;
443 set_nport(lsa, htons(port));
444
445 *dfpp = open_socket(lsa);
446
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100447 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100448 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
449 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100450 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200451 }
452
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100453 if (ftpcmd("RETR ", target->path, sfp) > 150)
454 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200455
456 return sfp;
457}
458
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
460{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200461#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
462# if ENABLE_FEATURE_WGET_TIMEOUT
463 unsigned second_cnt;
464# endif
465 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200466
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200467 polldata.fd = fileno(dfp);
468 polldata.events = POLLIN | POLLPRI;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100469
470 /* Must use nonblocking I/O, otherwise fread will loop
471 * and *block* until it reads full buffer,
472 * which messes up progress bar and/or timing out.
473 * Because of nonblocking I/O, we need to dance
474 * very carefully around EAGAIN. See explanation at
475 * clearerr() call.
476 */
Denys Vlasenkoda0df472010-08-08 04:21:50 +0200477 ndelay_on(polldata.fd);
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200478#endif
479 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200480
481 if (G.chunked)
482 goto get_clen;
483
484 /* Loops only if chunked */
485 while (1) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100486 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200487 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100488 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200489
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100490 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100491 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100492 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100493 if ((int)G.content_len <= 0)
494 break;
495 rdsz = (unsigned)G.content_len;
496 }
497 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100498
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200499#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
500# if ENABLE_FEATURE_WGET_TIMEOUT
501 second_cnt = G.timeout_seconds;
502# endif
503 while (1) {
504 if (safe_poll(&polldata, 1, 1000) != 0)
505 break; /* error, EOF, or data is available */
506# if ENABLE_FEATURE_WGET_TIMEOUT
507 if (second_cnt != 0 && --second_cnt == 0) {
508 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100509 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200510 }
511# endif
512 /* Needed for "stalled" indicator */
513 progress_meter(PROGRESS_BUMP);
514 }
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100515
Denys Vlasenko8766a792011-02-11 21:42:00 +0100516 /* fread internally uses read loop, which in our case
517 * is usually exited when we get EAGAIN.
518 * In this case, libc sets error marker on the stream.
519 * Need to clear it before next fread to avoid possible
520 * rare false positive ferror below. Rare because usually
521 * fread gets more than zero bytes, and we don't fall
522 * into if (n <= 0) ...
523 */
524 clearerr(dfp);
525 errno = 0;
Denys Vlasenkof9af3752011-02-11 22:01:33 +0100526#endif
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100527 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100528 /* man fread:
529 * If error occurs, or EOF is reached, the return value
530 * is a short item count (or zero).
531 * fread does not distinguish between EOF and error.
532 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200533 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100534#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
535 if (errno == EAGAIN) /* poll lied, there is no data? */
536 continue; /* yes */
537#endif
538 if (ferror(dfp))
539 bb_perror_msg_and_die(bb_msg_read_error);
540 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200541 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100542
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100543 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100544
Denys Vlasenko7f432802009-06-28 01:02:24 +0200545#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100546 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200547 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200548#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100549 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100550 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100551 if (G.content_len == 0)
552 break;
553 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200554 }
555
556 if (!G.chunked)
557 break;
558
Denys Vlasenkof836f012011-02-10 23:02:28 +0100559 fgets_and_trim(dfp); /* This is a newline */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200560 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100561 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100562 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200563 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100564 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200565 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100566 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200567 }
568
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200569 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200570}
571
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000572int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000573int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000574{
Eric Andersen79757c92001-04-05 21:45:54 +0000575 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000576 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000577 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200578 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200579 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000580 char *dir_prefix = NULL;
581#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000582 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000583 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000584 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000585#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200586 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000587 FILE *dfp; /* socket to ftp server (data) */
588 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000589 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200590 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000591 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000592 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000593
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000594 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000595 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000596 enum {
597 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
598 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000599#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000600 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000601 /* name, has_arg, val */
602 "continue\0" No_argument "c"
603 "spider\0" No_argument "s"
604 "quiet\0" No_argument "q"
605 "output-document\0" Required_argument "O"
606 "directory-prefix\0" Required_argument "P"
607 "proxy\0" Required_argument "Y"
608 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200609#if ENABLE_FEATURE_WGET_TIMEOUT
610 "timeout\0" Required_argument "T"
611#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000612 /* Ignored: */
613 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000614 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000615 "passive-ftp\0" No_argument "\xff"
616 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000617 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100618 /* Ignored (we don't do ssl) */
619 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000620 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000621#endif
622
623 INIT_G();
624
625#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000626 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000627#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000628 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200629 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
630 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000631 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000632 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200633 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
634 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000635 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
636 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000637 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000638#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000639 if (headers_llist) {
640 int size = 1;
641 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000642 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000643 while (ll) {
644 size += strlen(ll->data) + 2;
645 ll = ll->link;
646 }
647 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000648 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000649 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000650 }
651 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000652#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000653
Denys Vlasenko7f432802009-06-28 01:02:24 +0200654 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200655
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200656 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000657 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000658
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000659 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200660 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000661 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000662 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200663 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100664 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000665 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000666 } else {
667 use_proxy = 0;
668 }
Robert Griebld7760112002-05-14 23:36:45 +0000669 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200670 if (!use_proxy) {
671 server.port = target.port;
672 if (ENABLE_FEATURE_IPV6) {
673 server.host = xstrdup(target.host);
674 } else {
675 server.host = target.host;
676 }
677 }
678
679 if (ENABLE_FEATURE_IPV6)
680 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000681
Denis Vlasenko818322b2007-09-24 18:27:04 +0000682 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000683 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000684 fname_out = bb_get_last_path_component_nostrip(target.path);
685 /* handle "wget http://kernel.org//" */
686 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000687 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000688 /* -P DIR is considered only if there was no -O FILE */
689 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000690 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000691 } else {
692 if (LONE_DASH(fname_out)) {
693 /* -O - */
694 output_fd = 1;
695 opt &= ~WGET_OPT_CONTINUE;
696 }
Eric Andersen29edd002000-12-09 16:55:35 +0000697 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000698#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100699 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000700#endif
701
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000702 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000703 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100704 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200705 */
Eric Andersen29edd002000-12-09 16:55:35 +0000706
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000707 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000708 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000709 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000710 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100711 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000712 }
713 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200714 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000715 }
716
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200717 redir_limit = 5;
718 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000719 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000720 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200721 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
722 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
723 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000724 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200725 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000726 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000727 /*
728 * HTTP session
729 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200730 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200731 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200732
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200733 /* Open socket to http server */
734 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200735
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200736 /* Send HTTP request */
737 if (use_proxy) {
738 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
739 target.is_ftp ? "f" : "ht", target.host,
740 target.path);
741 } else {
742 if (opt & WGET_OPT_POST_DATA)
743 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
744 else
745 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
746 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000747
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200748 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
749 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000750
Denys Vlasenko9213a552011-02-10 13:23:45 +0100751 /* Ask server to close the connection as soon as we are done
752 * (IOW: we do not intend to send more requests)
753 */
754 fprintf(sfp, "Connection: close\r\n");
755
Denis Vlasenko9cade082006-11-21 10:43:02 +0000756#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200757 if (target.user) {
758 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100759 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200760 }
761 if (use_proxy && server.user) {
762 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100763 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200764 }
Eric Andersen79757c92001-04-05 21:45:54 +0000765#endif
766
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100767 if (G.beg_range)
768 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100769
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000770#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200771 if (extra_headers)
772 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000773
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200774 if (opt & WGET_OPT_POST_DATA) {
775 char *estr = URL_escape(post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100776 fprintf(sfp,
777 "Content-Type: application/x-www-form-urlencoded\r\n"
778 "Content-Length: %u\r\n"
779 "\r\n"
780 "%s",
781 (int) strlen(estr), estr
782 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200783 free(estr);
784 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000785#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100786 {
787 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200788 }
Eric Andersen79757c92001-04-05 21:45:54 +0000789
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200790 fflush(sfp);
791
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200792 /*
793 * Retrieve HTTP response line and check for "200" status code.
794 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000795 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100796 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000797
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100798 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200799 str = skip_non_whitespace(str);
800 str = skip_whitespace(str);
801 // FIXME: no error check
802 // xatou wouldn't work: "200 OK"
803 status = atoi(str);
804 switch (status) {
805 case 0:
806 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100807 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200808 /* eat all remaining headers */;
809 goto read_response;
810 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000811/*
812Response 204 doesn't say "null file", it says "metadata
813has changed but data didn't":
814
815"10.2.5 204 No Content
816The server has fulfilled the request but does not need to return
817an entity-body, and might want to return updated metainformation.
818The response MAY include new or updated metainformation in the form
819of entity-headers, which if present SHOULD be associated with
820the requested variant.
821
822If the client is a user agent, it SHOULD NOT change its document
823view from that which caused the request to be sent. This response
824is primarily intended to allow input for actions to take place
825without causing a change to the user agent's active document view,
826although any new or updated metainformation SHOULD be applied
827to the document currently in the user agent's active view.
828
829The 204 response MUST NOT include a message-body, and thus
830is always terminated by the first empty line after the header fields."
831
832However, in real world it was observed that some web servers
833(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
834*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200835 case 204:
836 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200837 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200838 case 301:
839 case 302:
840 case 303:
841 break;
842 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100843 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000844 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200845 /* fall through */
846 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100847 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200848 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000849
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200850 /*
851 * Retrieve HTTP headers.
852 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100853 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200854 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200855 smalluint key;
856 /* strip trailing whitespace */
857 char *s = strchrnul(str, '\0') - 1;
858 while (s >= str && (*s == ' ' || *s == '\t')) {
859 *s = '\0';
860 s--;
861 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100862 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200863 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100864 G.content_len = BB_STRTOOFF(str, NULL, 10);
865 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200866 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000867 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200868 G.got_clen = 1;
869 continue;
870 }
871 if (key == KEY_transfer_encoding) {
872 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
873 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
874 G.chunked = G.got_clen = 1;
875 }
876 if (key == KEY_location && status >= 300) {
877 if (--redir_limit == 0)
878 bb_error_msg_and_die("too many redirections");
879 fclose(sfp);
880 G.got_clen = 0;
881 G.chunked = 0;
882 if (str[0] == '/')
883 /* free(target.allocated); */
884 target.path = /* target.allocated = */ xstrdup(str+1);
885 /* lsa stays the same: it's on the same server */
886 else {
887 parse_url(str, &target);
888 if (!use_proxy) {
889 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200890 /* strip_ipv6_scope_id(target.host); - no! */
891 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200892 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000893 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200894 goto resolve_lsa;
895 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000896 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200897 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000898 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200899 }
900// if (status >= 300)
901// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000902
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200903 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000904 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000905
906 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000907 /*
908 * FTP session
909 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200910 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000911 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000912
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000913 if (opt & WGET_OPT_SPIDER) {
914 if (ENABLE_FEATURE_CLEAN_UP)
915 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000916 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000917 }
Eric Andersen79757c92001-04-05 21:45:54 +0000918
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000919 if (output_fd < 0) {
920 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
921 /* compat with wget: -O FILE can overwrite */
922 if (opt & WGET_OPT_OUTNAME)
923 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
924 output_fd = xopen(fname_out, o_flags);
925 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000926
Denys Vlasenko7f432802009-06-28 01:02:24 +0200927 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100928 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000929
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200930 if (dfp != sfp) {
931 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000932 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100933 if (ftpcmd(NULL, NULL, sfp) != 226)
934 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
935 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000936 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000937
938 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000939}