blob: 48688640a07f72fa22d8bb0c1f56e8a323d2dc75 [file] [log] [blame]
Eric Andersen96700832000-09-04 15:15:55 +00001/* vi: set sw=4 ts=4: */
2/*
Eric Andersen79757c92001-04-05 21:45:54 +00003 * wget - retrieve a file using HTTP or FTP
Eric Andersen96700832000-09-04 15:15:55 +00004 *
Eric Andersen4e573f42000-11-14 23:29:24 +00005 * Chip Rosenthal Covad Communications <chip@laserlink.net>
Denys Vlasenko0ef64bd2010-08-16 20:14:46 +02006 * Licensed under GPLv2, see file LICENSE in this source tree.
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +02007 *
8 * Copyright (C) 2010 Bradley M. Kuhn <bkuhn@ebb.org>
Denys Vlasenkofb132e42010-10-29 11:46:52 +02009 * Kuhn's copyrights are licensed GPLv2-or-later. File as a whole remains GPLv2.
Eric Andersen96700832000-09-04 15:15:55 +000010 */
Denis Vlasenkob6adbf12007-05-26 19:00:18 +000011#include "libbb.h"
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +000012
Denys Vlasenkof836f012011-02-10 23:02:28 +010013//#define log_io(...) bb_error_msg(__VA_ARGS__)
14#define log_io(...) ((void)0)
15
16
Eric Andersen79757c92001-04-05 21:45:54 +000017struct host_info {
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +000018 // May be used if we ever will want to free() all xstrdup()s...
19 /* char *allocated; */
Denis Vlasenko818322b2007-09-24 18:27:04 +000020 const char *path;
21 const char *user;
22 char *host;
23 int port;
24 smallint is_ftp;
Eric Andersen79757c92001-04-05 21:45:54 +000025};
26
Denis Vlasenko77105632007-09-24 15:04:00 +000027
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020028/* Globals */
Denis Vlasenko77105632007-09-24 15:04:00 +000029struct globals {
30 off_t content_len; /* Content-length of the file */
31 off_t beg_range; /* Range at which continue begins */
32#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko77105632007-09-24 15:04:00 +000033 off_t transferred; /* Number of bytes transferred so far */
34 const char *curfile; /* Name of current file being transferred */
Magnus Dammf5914992009-11-08 16:34:43 +010035 bb_progress_t pmt;
Denis Vlasenko77105632007-09-24 15:04:00 +000036#endif
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020037#if ENABLE_FEATURE_WGET_TIMEOUT
38 unsigned timeout_seconds;
39#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +020040 smallint chunked; /* chunked transfer encoding */
41 smallint got_clen; /* got content-length: from server */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010042 /* Local downloads do benefit from big buffer.
43 * With 512 byte buffer, it was measured to be
44 * an order of magnitude slower than with big one.
45 */
46 uint64_t just_to_align_next_member;
47 char wget_buf[CONFIG_FEATURE_COPYBUF_KB*1024];
Denys Vlasenko98a4c7c2010-02-04 15:00:15 +010048} FIX_ALIASING;
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010049#define G (*ptr_to_globals)
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020050#define INIT_G() do { \
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +010051 SET_PTR_TO_GLOBALS(xzalloc(sizeof(G))); \
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020052 IF_FEATURE_WGET_TIMEOUT(G.timeout_seconds = 900;) \
53} while (0)
Denis Vlasenko77105632007-09-24 15:04:00 +000054
55
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020056/* Must match option string! */
57enum {
58 WGET_OPT_CONTINUE = (1 << 0),
Denys Vlasenkofb132e42010-10-29 11:46:52 +020059 WGET_OPT_SPIDER = (1 << 1),
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020060 WGET_OPT_QUIET = (1 << 2),
61 WGET_OPT_OUTNAME = (1 << 3),
62 WGET_OPT_PREFIX = (1 << 4),
63 WGET_OPT_PROXY = (1 << 5),
64 WGET_OPT_USER_AGENT = (1 << 6),
65 WGET_OPT_NETWORK_READ_TIMEOUT = (1 << 7),
66 WGET_OPT_RETRIES = (1 << 8),
67 WGET_OPT_PASSIVE = (1 << 9),
68 WGET_OPT_HEADER = (1 << 10) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
69 WGET_OPT_POST_DATA = (1 << 11) * ENABLE_FEATURE_WGET_LONG_OPTIONS,
70};
71
72enum {
73 PROGRESS_START = -1,
74 PROGRESS_END = 0,
75 PROGRESS_BUMP = 1,
76};
Denis Vlasenko9cade082006-11-21 10:43:02 +000077#if ENABLE_FEATURE_WGET_STATUSBAR
Denis Vlasenko00d84172008-11-24 07:34:42 +000078static void progress_meter(int flag)
Denis Vlasenko47ddd012007-09-24 18:24:17 +000079{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020080 if (option_mask32 & WGET_OPT_QUIET)
81 return;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000082
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020083 if (flag == PROGRESS_START)
Denys Vlasenkod55e1392011-02-11 18:56:13 +010084 bb_progress_init(&G.pmt, G.curfile);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000085
Denys Vlasenkod55e1392011-02-11 18:56:13 +010086 bb_progress_update(&G.pmt, G.beg_range, G.transferred,
Denys Vlasenkoc5bbd5d2010-07-12 03:27:09 +020087 G.chunked ? 0 : G.beg_range + G.transferred + G.content_len);
Denis Vlasenko47ddd012007-09-24 18:24:17 +000088
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020089 if (flag == PROGRESS_END) {
Denys Vlasenko19ced5c2010-06-06 21:53:09 +020090 bb_putchar_stderr('\n');
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +010091 G.transferred = 0;
Denis Vlasenko47ddd012007-09-24 18:24:17 +000092 }
93}
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +020094#else
Denis Vlasenko00d84172008-11-24 07:34:42 +000095static ALWAYS_INLINE void progress_meter(int flag UNUSED_PARAM) { }
Eric Andersenb520e082000-10-03 00:21:45 +000096#endif
Eric Andersenc7bda1c2004-03-15 08:29:22 +000097
Denis Vlasenko47ddd012007-09-24 18:24:17 +000098
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +020099/* IPv6 knows scoped address types i.e. link and site local addresses. Link
100 * local addresses can have a scope identifier to specify the
101 * interface/link an address is valid on (e.g. fe80::1%eth0). This scope
102 * identifier is only valid on a single node.
103 *
104 * RFC 4007 says that the scope identifier MUST NOT be sent across the wire,
105 * unless all nodes agree on the semantic. Apache e.g. regards zone identifiers
106 * in the Host header as invalid requests, see
107 * https://issues.apache.org/bugzilla/show_bug.cgi?id=35122
108 */
109static void strip_ipv6_scope_id(char *host)
110{
111 char *scope, *cp;
112
113 /* bbox wget actually handles IPv6 addresses without [], like
114 * wget "http://::1/xxx", but this is not standard.
115 * To save code, _here_ we do not support it. */
116
117 if (host[0] != '[')
118 return; /* not IPv6 */
119
120 scope = strchr(host, '%');
121 if (!scope)
122 return;
123
124 /* Remove the IPv6 zone identifier from the host address */
125 cp = strchr(host, ']');
126 if (!cp || (cp[1] != ':' && cp[1] != '\0')) {
127 /* malformed address (not "[xx]:nn" or "[xx]") */
128 return;
129 }
130
131 /* cp points to "]...", scope points to "%eth0]..." */
132 overlapping_strcpy(scope, cp);
133}
134
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100135#if 0 /* were needed when we used signal-driven progress bar */
Denis Vlasenko12d21292007-06-27 21:40:07 +0000136/* Read NMEMB bytes into PTR from STREAM. Returns the number of bytes read,
137 * and a short count if an eof or non-interrupt error is encountered. */
138static size_t safe_fread(void *ptr, size_t nmemb, FILE *stream)
Matt Kraai854125f2001-05-09 19:15:46 +0000139{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000140 size_t ret;
141 char *p = (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000142
143 do {
144 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000145 errno = 0;
Denis Vlasenko12d21292007-06-27 21:40:07 +0000146 ret = fread(p, 1, nmemb, stream);
147 p += ret;
148 nmemb -= ret;
149 } while (nmemb && ferror(stream) && errno == EINTR);
Matt Kraai854125f2001-05-09 19:15:46 +0000150
Denis Vlasenko12d21292007-06-27 21:40:07 +0000151 return p - (char*)ptr;
Matt Kraai854125f2001-05-09 19:15:46 +0000152}
153
Denis Vlasenko12d21292007-06-27 21:40:07 +0000154/* Read a line or SIZE-1 bytes into S, whichever is less, from STREAM.
Matt Kraai854125f2001-05-09 19:15:46 +0000155 * Returns S, or NULL if an eof or non-interrupt error is encountered. */
156static char *safe_fgets(char *s, int size, FILE *stream)
157{
158 char *ret;
159
160 do {
161 clearerr(stream);
Denis Vlasenko00d84172008-11-24 07:34:42 +0000162 errno = 0;
Matt Kraai854125f2001-05-09 19:15:46 +0000163 ret = fgets(s, size, stream);
164 } while (ret == NULL && ferror(stream) && errno == EINTR);
165
166 return ret;
167}
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100168#endif
Matt Kraai854125f2001-05-09 19:15:46 +0000169
Denis Vlasenko9cade082006-11-21 10:43:02 +0000170#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100171/* Base64-encode character string. */
172static char *base64enc(const char *str)
Denis Vlasenko3526a132006-09-09 12:20:57 +0000173{
Denis Vlasenko12d21292007-06-27 21:40:07 +0000174 unsigned len = strlen(str);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100175 if (len > sizeof(G.wget_buf)/4*3 - 10) /* paranoia */
176 len = sizeof(G.wget_buf)/4*3 - 10;
177 bb_uuencode(G.wget_buf, str, len, bb_uuenc_tbl_base64);
178 return G.wget_buf;
Eric Andersen79757c92001-04-05 21:45:54 +0000179}
180#endif
181
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200182static char* sanitize_string(char *s)
183{
184 unsigned char *p = (void *) s;
185 while (*p >= ' ')
186 p++;
187 *p = '\0';
188 return s;
189}
190
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000191static FILE *open_socket(len_and_sockaddr *lsa)
192{
193 FILE *fp;
194
195 /* glibc 2.4 seems to try seeking on it - ??! */
196 /* hopefully it understands what ESPIPE means... */
197 fp = fdopen(xconnect_stream(lsa), "r+");
198 if (fp == NULL)
Denys Vlasenkodee0fc92011-02-10 10:01:49 +0100199 bb_perror_msg_and_die(bb_msg_memory_exhausted);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000200
201 return fp;
202}
203
Denys Vlasenkof836f012011-02-10 23:02:28 +0100204/* Returns '\n' if it was seen, else '\0'. Trims at first '\r' or '\n' */
205static char fgets_and_trim(FILE *fp)
206{
207 char c;
208 char *buf_ptr;
209
210 if (fgets(G.wget_buf, sizeof(G.wget_buf) - 1, fp) == NULL)
211 bb_perror_msg_and_die("error getting response");
212
213 buf_ptr = strchrnul(G.wget_buf, '\n');
214 c = *buf_ptr;
215 *buf_ptr = '\0';
216 buf_ptr = strchrnul(G.wget_buf, '\r');
217 *buf_ptr = '\0';
218
219 log_io("< %s", G.wget_buf);
220
221 return c;
222}
223
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100224static int ftpcmd(const char *s1, const char *s2, FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000225{
226 int result;
227 if (s1) {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100228 if (!s2)
229 s2 = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000230 fprintf(fp, "%s%s\r\n", s1, s2);
231 fflush(fp);
Denys Vlasenkof836f012011-02-10 23:02:28 +0100232 log_io("> %s%s", s1, s2);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000233 }
234
235 do {
Denys Vlasenkof836f012011-02-10 23:02:28 +0100236 fgets_and_trim(fp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100237 } while (!isdigit(G.wget_buf[0]) || G.wget_buf[3] != ' ');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000238
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100239 G.wget_buf[3] = '\0';
240 result = xatoi_positive(G.wget_buf);
241 G.wget_buf[3] = ' ';
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000242 return result;
243}
244
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000245static void parse_url(char *src_url, struct host_info *h)
246{
247 char *url, *p, *sp;
248
249 /* h->allocated = */ url = xstrdup(src_url);
250
251 if (strncmp(url, "http://", 7) == 0) {
252 h->port = bb_lookup_port("http", "tcp", 80);
253 h->host = url + 7;
254 h->is_ftp = 0;
255 } else if (strncmp(url, "ftp://", 6) == 0) {
256 h->port = bb_lookup_port("ftp", "tcp", 21);
257 h->host = url + 6;
258 h->is_ftp = 1;
259 } else
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200260 bb_error_msg_and_die("not an http or ftp url: %s", sanitize_string(url));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000261
262 // FYI:
263 // "Real" wget 'http://busybox.net?var=a/b' sends this request:
264 // 'GET /?var=a/b HTTP 1.0'
265 // and saves 'index.html?var=a%2Fb' (we save 'b')
266 // wget 'http://busybox.net?login=john@doe':
267 // request: 'GET /?login=john@doe HTTP/1.0'
268 // saves: 'index.html?login=john@doe' (we save '?login=john@doe')
269 // wget 'http://busybox.net#test/test':
270 // request: 'GET / HTTP/1.0'
271 // saves: 'index.html' (we save 'test')
272 //
273 // We also don't add unique .N suffix if file exists...
274 sp = strchr(h->host, '/');
275 p = strchr(h->host, '?'); if (!sp || (p && sp > p)) sp = p;
276 p = strchr(h->host, '#'); if (!sp || (p && sp > p)) sp = p;
277 if (!sp) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000278 h->path = "";
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000279 } else if (*sp == '/') {
280 *sp = '\0';
281 h->path = sp + 1;
282 } else { // '#' or '?'
283 // http://busybox.net?login=john@doe is a valid URL
284 // memmove converts to:
285 // http:/busybox.nett?login=john@doe...
Denis Vlasenko818322b2007-09-24 18:27:04 +0000286 memmove(h->host - 1, h->host, sp - h->host);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000287 h->host--;
288 sp[-1] = '\0';
289 h->path = sp;
290 }
291
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200292 // We used to set h->user to NULL here, but this interferes
293 // with handling of code 302 ("object was moved")
294
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000295 sp = strrchr(h->host, '@');
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000296 if (sp != NULL) {
297 h->user = h->host;
298 *sp = '\0';
299 h->host = sp + 1;
300 }
301
302 sp = h->host;
303}
304
Denys Vlasenkof836f012011-02-10 23:02:28 +0100305static char *gethdr(FILE *fp)
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000306{
307 char *s, *hdrval;
308 int c;
309
310 /* *istrunc = 0; */
311
312 /* retrieve header line */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100313 c = fgets_and_trim(fp);
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000314
Denys Vlasenkof836f012011-02-10 23:02:28 +0100315 /* end of the headers? */
316 if (G.wget_buf[0] == '\0')
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000317 return NULL;
318
319 /* convert the header name to lower case */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100320 for (s = G.wget_buf; isalnum(*s) || *s == '-' || *s == '.'; ++s) {
Denys Vlasenko48363312010-04-04 15:29:32 +0200321 /* tolower for "A-Z", no-op for "0-9a-z-." */
Denys Vlasenkof836f012011-02-10 23:02:28 +0100322 *s |= 0x20;
Denys Vlasenko48363312010-04-04 15:29:32 +0200323 }
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000324
325 /* verify we are at the end of the header name */
326 if (*s != ':')
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100327 bb_error_msg_and_die("bad header line: %s", sanitize_string(G.wget_buf));
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000328
329 /* locate the start of the header value */
330 *s++ = '\0';
331 hdrval = skip_whitespace(s);
332
Denys Vlasenkof836f012011-02-10 23:02:28 +0100333 if (c != '\n') {
334 /* Rats! The buffer isn't big enough to hold the entire header value */
335 while (c = getc(fp), c != EOF && c != '\n')
336 continue;
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000337 }
338
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000339 return hdrval;
340}
341
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000342#if ENABLE_FEATURE_WGET_LONG_OPTIONS
343static char *URL_escape(const char *str)
344{
345 /* URL encode, see RFC 2396 */
346 char *dst;
347 char *res = dst = xmalloc(strlen(str) * 3 + 1);
348 unsigned char c;
349
350 while (1) {
351 c = *str++;
352 if (c == '\0'
353 /* || strchr("!&'()*-.=_~", c) - more code */
354 || c == '!'
355 || c == '&'
356 || c == '\''
357 || c == '('
358 || c == ')'
359 || c == '*'
360 || c == '-'
361 || c == '.'
362 || c == '='
363 || c == '_'
364 || c == '~'
365 || (c >= '0' && c <= '9')
366 || ((c|0x20) >= 'a' && (c|0x20) <= 'z')
367 ) {
368 *dst++ = c;
369 if (c == '\0')
370 return res;
371 } else {
372 *dst++ = '%';
373 *dst++ = bb_hexdigits_upcase[c >> 4];
374 *dst++ = bb_hexdigits_upcase[c & 0xf];
375 }
376 }
377}
378#endif
Denis Vlasenko47ddd012007-09-24 18:24:17 +0000379
Denys Vlasenko7f432802009-06-28 01:02:24 +0200380static FILE* prepare_ftp_session(FILE **dfpp, struct host_info *target, len_and_sockaddr *lsa)
381{
Denys Vlasenko7f432802009-06-28 01:02:24 +0200382 FILE *sfp;
383 char *str;
384 int port;
385
386 if (!target->user)
387 target->user = xstrdup("anonymous:busybox@");
388
389 sfp = open_socket(lsa);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100390 if (ftpcmd(NULL, NULL, sfp) != 220)
391 bb_error_msg_and_die("%s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200392
393 /*
394 * Splitting username:password pair,
395 * trying to log in
396 */
397 str = strchr(target->user, ':');
398 if (str)
399 *str++ = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100400 switch (ftpcmd("USER ", target->user, sfp)) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200401 case 230:
402 break;
403 case 331:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100404 if (ftpcmd("PASS ", str, sfp) == 230)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200405 break;
406 /* fall through (failed login) */
407 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100408 bb_error_msg_and_die("ftp login: %s", sanitize_string(G.wget_buf + 4));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200409 }
410
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100411 ftpcmd("TYPE I", NULL, sfp);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200412
413 /*
414 * Querying file size
415 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100416 if (ftpcmd("SIZE ", target->path, sfp) == 213) {
417 G.content_len = BB_STRTOOFF(G.wget_buf + 4, NULL, 10);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100418 if (G.content_len < 0 || errno) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200419 bb_error_msg_and_die("SIZE value is garbage");
420 }
421 G.got_clen = 1;
422 }
423
424 /*
425 * Entering passive mode
426 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100427 if (ftpcmd("PASV", NULL, sfp) != 227) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200428 pasv_error:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100429 bb_error_msg_and_die("bad response to %s: %s", "PASV", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200430 }
431 // Response is "227 garbageN1,N2,N3,N4,P1,P2[)garbage]
432 // Server's IP is N1.N2.N3.N4 (we ignore it)
433 // Server's port for data connection is P1*256+P2
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100434 str = strrchr(G.wget_buf, ')');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200435 if (str) str[0] = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100436 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200437 if (!str) goto pasv_error;
438 port = xatou_range(str+1, 0, 255);
439 *str = '\0';
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100440 str = strrchr(G.wget_buf, ',');
Denys Vlasenko7f432802009-06-28 01:02:24 +0200441 if (!str) goto pasv_error;
442 port += xatou_range(str+1, 0, 255) * 256;
443 set_nport(lsa, htons(port));
444
445 *dfpp = open_socket(lsa);
446
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100447 if (G.beg_range) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100448 sprintf(G.wget_buf, "REST %"OFF_FMT"u", G.beg_range);
449 if (ftpcmd(G.wget_buf, NULL, sfp) == 350)
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100450 G.content_len -= G.beg_range;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200451 }
452
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100453 if (ftpcmd("RETR ", target->path, sfp) > 150)
454 bb_error_msg_and_die("bad response to %s: %s", "RETR", sanitize_string(G.wget_buf));
Denys Vlasenko7f432802009-06-28 01:02:24 +0200455
456 return sfp;
457}
458
Denys Vlasenko7f432802009-06-28 01:02:24 +0200459static void NOINLINE retrieve_file_data(FILE *dfp, int output_fd)
460{
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200461#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
462# if ENABLE_FEATURE_WGET_TIMEOUT
463 unsigned second_cnt;
464# endif
465 struct pollfd polldata;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200466
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200467 polldata.fd = fileno(dfp);
468 polldata.events = POLLIN | POLLPRI;
Denys Vlasenkoda0df472010-08-08 04:21:50 +0200469 ndelay_on(polldata.fd);
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200470#endif
471 progress_meter(PROGRESS_START);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200472
473 if (G.chunked)
474 goto get_clen;
475
476 /* Loops only if chunked */
477 while (1) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100478 while (1) {
Denys Vlasenko7f432802009-06-28 01:02:24 +0200479 int n;
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100480 unsigned rdsz;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200481
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100482 rdsz = sizeof(G.wget_buf);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100483 if (G.got_clen) {
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100484 if (G.content_len < (off_t)sizeof(G.wget_buf)) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100485 if ((int)G.content_len <= 0)
486 break;
487 rdsz = (unsigned)G.content_len;
488 }
489 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100490
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200491#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
492# if ENABLE_FEATURE_WGET_TIMEOUT
493 second_cnt = G.timeout_seconds;
494# endif
495 while (1) {
496 if (safe_poll(&polldata, 1, 1000) != 0)
497 break; /* error, EOF, or data is available */
498# if ENABLE_FEATURE_WGET_TIMEOUT
499 if (second_cnt != 0 && --second_cnt == 0) {
500 progress_meter(PROGRESS_END);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100501 bb_error_msg_and_die("download timed out");
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200502 }
503# endif
504 /* Needed for "stalled" indicator */
505 progress_meter(PROGRESS_BUMP);
506 }
507#endif
Denys Vlasenko8766a792011-02-11 21:42:00 +0100508 /* fread internally uses read loop, which in our case
509 * is usually exited when we get EAGAIN.
510 * In this case, libc sets error marker on the stream.
511 * Need to clear it before next fread to avoid possible
512 * rare false positive ferror below. Rare because usually
513 * fread gets more than zero bytes, and we don't fall
514 * into if (n <= 0) ...
515 */
516 clearerr(dfp);
517 errno = 0;
Denys Vlasenko0fac2f72011-02-10 09:55:05 +0100518 n = fread(G.wget_buf, 1, rdsz, dfp);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100519 /* man fread:
520 * If error occurs, or EOF is reached, the return value
521 * is a short item count (or zero).
522 * fread does not distinguish between EOF and error.
523 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200524 if (n <= 0) {
Denys Vlasenko8766a792011-02-11 21:42:00 +0100525#if ENABLE_FEATURE_WGET_STATUSBAR || ENABLE_FEATURE_WGET_TIMEOUT
526 if (errno == EAGAIN) /* poll lied, there is no data? */
527 continue; /* yes */
528#endif
529 if (ferror(dfp))
530 bb_perror_msg_and_die(bb_msg_read_error);
531 break; /* EOF, not error */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200532 }
Denys Vlasenko8766a792011-02-11 21:42:00 +0100533
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100534 xwrite(output_fd, G.wget_buf, n);
Denys Vlasenko8766a792011-02-11 21:42:00 +0100535
Denys Vlasenko7f432802009-06-28 01:02:24 +0200536#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100537 G.transferred += n;
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200538 progress_meter(PROGRESS_BUMP);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200539#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100540 if (G.got_clen) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100541 G.content_len -= n;
Denys Vlasenko9213a552011-02-10 13:23:45 +0100542 if (G.content_len == 0)
543 break;
544 }
Denys Vlasenko7f432802009-06-28 01:02:24 +0200545 }
546
547 if (!G.chunked)
548 break;
549
Denys Vlasenkof836f012011-02-10 23:02:28 +0100550 fgets_and_trim(dfp); /* This is a newline */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200551 get_clen:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100552 fgets_and_trim(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100553 G.content_len = STRTOOFF(G.wget_buf, NULL, 16);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200554 /* FIXME: error check? */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100555 if (G.content_len == 0)
Denys Vlasenko7f432802009-06-28 01:02:24 +0200556 break; /* all done! */
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100557 G.got_clen = 1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200558 }
559
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200560 progress_meter(PROGRESS_END);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200561}
562
Denis Vlasenko9b49a5e2007-10-11 10:05:36 +0000563int wget_main(int argc, char **argv) MAIN_EXTERNALLY_VISIBLE;
Denis Vlasenkoa60f84e2008-07-05 09:18:54 +0000564int wget_main(int argc UNUSED_PARAM, char **argv)
Eric Andersen96700832000-09-04 15:15:55 +0000565{
Eric Andersen79757c92001-04-05 21:45:54 +0000566 struct host_info server, target;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000567 len_and_sockaddr *lsa;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000568 unsigned opt;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200569 int redir_limit;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200570 char *proxy = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000571 char *dir_prefix = NULL;
572#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000573 char *post_data;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000574 char *extra_headers = NULL;
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000575 llist_t *headers_llist = NULL;
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000576#endif
Denys Vlasenko7f432802009-06-28 01:02:24 +0200577 FILE *sfp; /* socket to web/ftp server */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000578 FILE *dfp; /* socket to ftp server (data) */
579 char *fname_out; /* where to direct output (-O) */
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000580 int output_fd = -1;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200581 bool use_proxy; /* Use proxies if env vars are set */
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000582 const char *proxy_flag = "on"; /* Use proxies if env vars are set */
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000583 const char *user_agent = "Wget";/* "User-Agent" header field */
Denis Vlasenko77105632007-09-24 15:04:00 +0000584
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000585 static const char keywords[] ALIGN1 =
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000586 "content-length\0""transfer-encoding\0""chunked\0""location\0";
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000587 enum {
588 KEY_content_length = 1, KEY_transfer_encoding, KEY_chunked, KEY_location
589 };
Bernhard Reutner-Fischer289e86a2006-08-20 20:01:24 +0000590#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko6ca409e2007-08-12 20:58:27 +0000591 static const char wget_longopts[] ALIGN1 =
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000592 /* name, has_arg, val */
593 "continue\0" No_argument "c"
594 "spider\0" No_argument "s"
595 "quiet\0" No_argument "q"
596 "output-document\0" Required_argument "O"
597 "directory-prefix\0" Required_argument "P"
598 "proxy\0" Required_argument "Y"
599 "user-agent\0" Required_argument "U"
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200600#if ENABLE_FEATURE_WGET_TIMEOUT
601 "timeout\0" Required_argument "T"
602#endif
Denis Vlasenko50af9262009-03-02 15:08:06 +0000603 /* Ignored: */
604 // "tries\0" Required_argument "t"
Denis Vlasenko50af9262009-03-02 15:08:06 +0000605 /* Ignored (we always use PASV): */
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000606 "passive-ftp\0" No_argument "\xff"
607 "header\0" Required_argument "\xfe"
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000608 "post-data\0" Required_argument "\xfd"
Bernhard Reutner-Fischer3fdba182010-02-10 19:37:29 +0100609 /* Ignored (we don't do ssl) */
610 "no-check-certificate\0" No_argument "\xfc"
Denis Vlasenko990d0f62007-07-24 15:54:42 +0000611 ;
Denis Vlasenko77105632007-09-24 15:04:00 +0000612#endif
613
614 INIT_G();
615
616#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenkobdc88fd2007-07-23 17:14:14 +0000617 applet_long_options = wget_longopts;
Bernhard Reutner-Fischer8d3a6f72006-05-31 14:11:38 +0000618#endif
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000619 /* server.allocated = target.allocated = NULL; */
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200620 opt_complementary = "-1" IF_FEATURE_WGET_TIMEOUT(":T+") IF_FEATURE_WGET_LONG_OPTIONS(":\xfe::");
621 opt = getopt32(argv, "csqO:P:Y:U:T:" /*ignored:*/ "t:",
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000622 &fname_out, &dir_prefix,
Denis Vlasenko540ab702008-06-29 00:32:35 +0000623 &proxy_flag, &user_agent,
Bradley M. Kuhnc97131c2010-08-08 02:51:20 +0200624 IF_FEATURE_WGET_TIMEOUT(&G.timeout_seconds) IF_NOT_FEATURE_WGET_TIMEOUT(NULL),
625 NULL /* -t RETRIES */
Denis Vlasenko5e34ff22009-04-21 11:09:40 +0000626 IF_FEATURE_WGET_LONG_OPTIONS(, &headers_llist)
627 IF_FEATURE_WGET_LONG_OPTIONS(, &post_data)
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000628 );
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000629#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denis Vlasenko7534e082006-10-23 23:21:58 +0000630 if (headers_llist) {
631 int size = 1;
632 char *cp;
Denis Vlasenko8d9f4952007-04-08 15:08:42 +0000633 llist_t *ll = headers_llist;
Denis Vlasenko7534e082006-10-23 23:21:58 +0000634 while (ll) {
635 size += strlen(ll->data) + 2;
636 ll = ll->link;
637 }
638 extra_headers = cp = xmalloc(size);
Glenn L McGrath514aeab2003-12-19 12:08:56 +0000639 while (headers_llist) {
Denis Vlasenkod50dda82008-06-15 05:40:56 +0000640 cp += sprintf(cp, "%s\r\n", (char*)llist_pop(&headers_llist));
Eric Andersen96700832000-09-04 15:15:55 +0000641 }
642 }
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000643#endif
Tim Rikerc1ef7bd2006-01-25 00:08:53 +0000644
Denys Vlasenko7f432802009-06-28 01:02:24 +0200645 /* TODO: compat issue: should handle "wget URL1 URL2..." */
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200646
Vladimir Dronnikovbe168b12009-10-05 02:18:01 +0200647 target.user = NULL;
Eric Andersen79757c92001-04-05 21:45:54 +0000648 parse_url(argv[optind], &target);
Eric Andersen79757c92001-04-05 21:45:54 +0000649
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000650 /* Use the proxy if necessary */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200651 use_proxy = (strcmp(proxy_flag, "off") != 0);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000652 if (use_proxy) {
Robert Griebld7760112002-05-14 23:36:45 +0000653 proxy = getenv(target.is_ftp ? "ftp_proxy" : "http_proxy");
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200654 if (proxy && proxy[0]) {
Denys Vlasenko81fe2b12010-02-11 04:23:43 +0100655 server.user = NULL;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000656 parse_url(proxy, &server);
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000657 } else {
658 use_proxy = 0;
659 }
Robert Griebld7760112002-05-14 23:36:45 +0000660 }
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200661 if (!use_proxy) {
662 server.port = target.port;
663 if (ENABLE_FEATURE_IPV6) {
664 server.host = xstrdup(target.host);
665 } else {
666 server.host = target.host;
667 }
668 }
669
670 if (ENABLE_FEATURE_IPV6)
671 strip_ipv6_scope_id(target.host);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000672
Denis Vlasenko818322b2007-09-24 18:27:04 +0000673 /* Guess an output filename, if there was no -O FILE */
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000674 if (!(opt & WGET_OPT_OUTNAME)) {
Denis Vlasenko818322b2007-09-24 18:27:04 +0000675 fname_out = bb_get_last_path_component_nostrip(target.path);
676 /* handle "wget http://kernel.org//" */
677 if (fname_out[0] == '/' || !fname_out[0])
Denis Vlasenkob6aae0f2007-01-29 22:51:25 +0000678 fname_out = (char*)"index.html";
Denis Vlasenko818322b2007-09-24 18:27:04 +0000679 /* -P DIR is considered only if there was no -O FILE */
680 if (dir_prefix)
Matt Kraai0382eb82001-07-19 19:13:55 +0000681 fname_out = concat_path_file(dir_prefix, fname_out);
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000682 } else {
683 if (LONE_DASH(fname_out)) {
684 /* -O - */
685 output_fd = 1;
686 opt &= ~WGET_OPT_CONTINUE;
687 }
Eric Andersen29edd002000-12-09 16:55:35 +0000688 }
Denis Vlasenko818322b2007-09-24 18:27:04 +0000689#if ENABLE_FEATURE_WGET_STATUSBAR
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100690 G.curfile = bb_get_last_path_component_nostrip(fname_out);
Denis Vlasenko818322b2007-09-24 18:27:04 +0000691#endif
692
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000693 /* Impossible?
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000694 if ((opt & WGET_OPT_CONTINUE) && !fname_out)
Denys Vlasenko6331cf02009-11-13 09:08:27 +0100695 bb_error_msg_and_die("can't specify continue (-c) without a filename (-O)");
Denys Vlasenko7f432802009-06-28 01:02:24 +0200696 */
Eric Andersen29edd002000-12-09 16:55:35 +0000697
Bernhard Reutner-Fischer7e8a53a2007-04-10 09:37:29 +0000698 /* Determine where to start transfer */
Denis Vlasenko4e4662c2006-11-23 13:10:23 +0000699 if (opt & WGET_OPT_CONTINUE) {
Denis Vlasenko7039a662006-10-08 17:54:47 +0000700 output_fd = open(fname_out, O_WRONLY);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000701 if (output_fd >= 0) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100702 G.beg_range = xlseek(output_fd, 0, SEEK_END);
Denis Vlasenkoa94554d2006-09-23 17:49:09 +0000703 }
704 /* File doesn't exist. We do not create file here yet.
Denys Vlasenko7f432802009-06-28 01:02:24 +0200705 * We are not sure it exists on remove side */
Eric Andersen96700832000-09-04 15:15:55 +0000706 }
707
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200708 redir_limit = 5;
709 resolve_lsa:
Denis Vlasenko42823d52007-02-04 02:39:08 +0000710 lsa = xhost2sockaddr(server.host, server.port);
Denis Vlasenkoa552eeb2006-09-26 09:22:12 +0000711 if (!(opt & WGET_OPT_QUIET)) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200712 char *s = xmalloc_sockaddr2dotted(&lsa->u.sa);
713 fprintf(stderr, "Connecting to %s (%s)\n", server.host, s);
714 free(s);
Eric Andersene6dc4392003-10-31 09:31:46 +0000715 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200716 establish_session:
Glenn L McGrathf1c4b112004-02-22 00:27:34 +0000717 if (use_proxy || !target.is_ftp) {
Eric Andersen79757c92001-04-05 21:45:54 +0000718 /*
719 * HTTP session
720 */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200721 char *str;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200722 int status;
Denys Vlasenko7f432802009-06-28 01:02:24 +0200723
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200724 /* Open socket to http server */
725 sfp = open_socket(lsa);
Denys Vlasenko7f432802009-06-28 01:02:24 +0200726
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200727 /* Send HTTP request */
728 if (use_proxy) {
729 fprintf(sfp, "GET %stp://%s/%s HTTP/1.1\r\n",
730 target.is_ftp ? "f" : "ht", target.host,
731 target.path);
732 } else {
733 if (opt & WGET_OPT_POST_DATA)
734 fprintf(sfp, "POST /%s HTTP/1.1\r\n", target.path);
735 else
736 fprintf(sfp, "GET /%s HTTP/1.1\r\n", target.path);
737 }
Glenn L McGrathe7bdfcc2003-08-28 22:03:19 +0000738
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200739 fprintf(sfp, "Host: %s\r\nUser-Agent: %s\r\n",
740 target.host, user_agent);
Eric Andersen79757c92001-04-05 21:45:54 +0000741
Denys Vlasenko9213a552011-02-10 13:23:45 +0100742 /* Ask server to close the connection as soon as we are done
743 * (IOW: we do not intend to send more requests)
744 */
745 fprintf(sfp, "Connection: close\r\n");
746
Denis Vlasenko9cade082006-11-21 10:43:02 +0000747#if ENABLE_FEATURE_WGET_AUTHENTICATION
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200748 if (target.user) {
749 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n"+6,
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100750 base64enc(target.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200751 }
752 if (use_proxy && server.user) {
753 fprintf(sfp, "Proxy-Authorization: Basic %s\r\n",
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100754 base64enc(server.user));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200755 }
Eric Andersen79757c92001-04-05 21:45:54 +0000756#endif
757
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100758 if (G.beg_range)
759 fprintf(sfp, "Range: bytes=%"OFF_FMT"u-\r\n", G.beg_range);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100760
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000761#if ENABLE_FEATURE_WGET_LONG_OPTIONS
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200762 if (extra_headers)
763 fputs(extra_headers, sfp);
Denis Vlasenko5a2ad692009-03-04 14:13:37 +0000764
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200765 if (opt & WGET_OPT_POST_DATA) {
766 char *estr = URL_escape(post_data);
Denys Vlasenko9213a552011-02-10 13:23:45 +0100767 fprintf(sfp,
768 "Content-Type: application/x-www-form-urlencoded\r\n"
769 "Content-Length: %u\r\n"
770 "\r\n"
771 "%s",
772 (int) strlen(estr), estr
773 );
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200774 free(estr);
775 } else
Denis Vlasenkoc8400a22006-10-25 00:33:44 +0000776#endif
Denys Vlasenko9213a552011-02-10 13:23:45 +0100777 {
778 fprintf(sfp, "\r\n");
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200779 }
Eric Andersen79757c92001-04-05 21:45:54 +0000780
Nguyễn Thái Ngọc Duyebec11d2010-09-23 15:18:41 +0200781 fflush(sfp);
782
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200783 /*
784 * Retrieve HTTP response line and check for "200" status code.
785 */
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000786 read_response:
Denys Vlasenkof836f012011-02-10 23:02:28 +0100787 fgets_and_trim(sfp);
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000788
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100789 str = G.wget_buf;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200790 str = skip_non_whitespace(str);
791 str = skip_whitespace(str);
792 // FIXME: no error check
793 // xatou wouldn't work: "200 OK"
794 status = atoi(str);
795 switch (status) {
796 case 0:
797 case 100:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100798 while (gethdr(sfp /*, &n*/) != NULL)
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200799 /* eat all remaining headers */;
800 goto read_response;
801 case 200:
Denis Vlasenko50b5cac2008-06-22 16:28:02 +0000802/*
803Response 204 doesn't say "null file", it says "metadata
804has changed but data didn't":
805
806"10.2.5 204 No Content
807The server has fulfilled the request but does not need to return
808an entity-body, and might want to return updated metainformation.
809The response MAY include new or updated metainformation in the form
810of entity-headers, which if present SHOULD be associated with
811the requested variant.
812
813If the client is a user agent, it SHOULD NOT change its document
814view from that which caused the request to be sent. This response
815is primarily intended to allow input for actions to take place
816without causing a change to the user agent's active document view,
817although any new or updated metainformation SHOULD be applied
818to the document currently in the user agent's active view.
819
820The 204 response MUST NOT include a message-body, and thus
821is always terminated by the first empty line after the header fields."
822
823However, in real world it was observed that some web servers
824(e.g. Boa/0.94.14rc21) simply use code 204 when file size is zero.
825*/
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200826 case 204:
827 break;
Denys Vlasenkofb132e42010-10-29 11:46:52 +0200828 case 300: /* redirection */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200829 case 301:
830 case 302:
831 case 303:
832 break;
833 case 206:
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100834 if (G.beg_range)
Denis Vlasenko023b57d2006-10-15 17:05:55 +0000835 break;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200836 /* fall through */
837 default:
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100838 bb_error_msg_and_die("server returned error: %s", sanitize_string(G.wget_buf));
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200839 }
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000840
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200841 /*
842 * Retrieve HTTP headers.
843 */
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100844 while ((str = gethdr(sfp /*, &n*/)) != NULL) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200845 /* gethdr converted "FOO:" string to lowercase */
Matthijs van de Water0d586662009-08-22 20:19:48 +0200846 smalluint key;
847 /* strip trailing whitespace */
848 char *s = strchrnul(str, '\0') - 1;
849 while (s >= str && (*s == ' ' || *s == '\t')) {
850 *s = '\0';
851 s--;
852 }
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100853 key = index_in_strings(keywords, G.wget_buf) + 1;
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200854 if (key == KEY_content_length) {
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100855 G.content_len = BB_STRTOOFF(str, NULL, 10);
856 if (G.content_len < 0 || errno) {
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200857 bb_error_msg_and_die("content-length %s is garbage", sanitize_string(str));
Eric Andersen79757c92001-04-05 21:45:54 +0000858 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200859 G.got_clen = 1;
860 continue;
861 }
862 if (key == KEY_transfer_encoding) {
863 if (index_in_strings(keywords, str_tolower(str)) + 1 != KEY_chunked)
864 bb_error_msg_and_die("transfer encoding '%s' is not supported", sanitize_string(str));
865 G.chunked = G.got_clen = 1;
866 }
867 if (key == KEY_location && status >= 300) {
868 if (--redir_limit == 0)
869 bb_error_msg_and_die("too many redirections");
870 fclose(sfp);
871 G.got_clen = 0;
872 G.chunked = 0;
873 if (str[0] == '/')
874 /* free(target.allocated); */
875 target.path = /* target.allocated = */ xstrdup(str+1);
876 /* lsa stays the same: it's on the same server */
877 else {
878 parse_url(str, &target);
879 if (!use_proxy) {
880 server.host = target.host;
Denys Vlasenko7d5ddf12009-06-30 20:36:27 +0200881 /* strip_ipv6_scope_id(target.host); - no! */
882 /* we assume remote never gives us IPv6 addr with scope id */
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200883 server.port = target.port;
Denis Vlasenko6536a9b2007-01-12 10:35:23 +0000884 free(lsa);
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200885 goto resolve_lsa;
886 } /* else: lsa stays the same: we use proxy */
Eric Andersen79757c92001-04-05 21:45:54 +0000887 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200888 goto establish_session;
Eric Andersen79757c92001-04-05 21:45:54 +0000889 }
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200890 }
891// if (status >= 300)
892// bb_error_msg_and_die("bad redirection (no Location: header from server)");
Eric Andersenc7bda1c2004-03-15 08:29:22 +0000893
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200894 /* For HTTP, data is pumped over the same connection */
Eric Andersen79757c92001-04-05 21:45:54 +0000895 dfp = sfp;
Denis Vlasenko96e9d3c2006-10-07 14:28:55 +0000896
897 } else {
Eric Andersen79757c92001-04-05 21:45:54 +0000898 /*
899 * FTP session
900 */
Denys Vlasenko7f432802009-06-28 01:02:24 +0200901 sfp = prepare_ftp_session(&dfp, &target, lsa);
Eric Andersen96700832000-09-04 15:15:55 +0000902 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000903
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000904 if (opt & WGET_OPT_SPIDER) {
905 if (ENABLE_FEATURE_CLEAN_UP)
906 fclose(sfp);
Denis Vlasenko77105632007-09-24 15:04:00 +0000907 return EXIT_SUCCESS;
Bernhard Reutner-Fischer2e75dcc2007-04-05 10:31:47 +0000908 }
Eric Andersen79757c92001-04-05 21:45:54 +0000909
Denis Vlasenkoa36535b2007-09-27 15:07:23 +0000910 if (output_fd < 0) {
911 int o_flags = O_WRONLY | O_CREAT | O_TRUNC | O_EXCL;
912 /* compat with wget: -O FILE can overwrite */
913 if (opt & WGET_OPT_OUTNAME)
914 o_flags = O_WRONLY | O_CREAT | O_TRUNC;
915 output_fd = xopen(fname_out, o_flags);
916 }
Denis Vlasenkof8aa1092006-10-01 10:58:54 +0000917
Denys Vlasenko7f432802009-06-28 01:02:24 +0200918 retrieve_file_data(dfp, output_fd);
Denys Vlasenkoa3aa3e32009-12-11 12:36:10 +0100919 xclose(output_fd);
Rob Landley19a39402006-06-13 17:10:26 +0000920
Denys Vlasenkof1fab092009-06-28 03:33:57 +0200921 if (dfp != sfp) {
922 /* It's ftp. Close it properly */
Eric Andersen79757c92001-04-05 21:45:54 +0000923 fclose(dfp);
Denys Vlasenkodf4e16c2011-02-10 06:29:06 +0100924 if (ftpcmd(NULL, NULL, sfp) != 226)
925 bb_error_msg_and_die("ftp error: %s", sanitize_string(G.wget_buf + 4));
926 /* ftpcmd("QUIT", NULL, sfp); - why bother? */
Eric Andersen79757c92001-04-05 21:45:54 +0000927 }
Denis Vlasenko77105632007-09-24 15:04:00 +0000928
929 return EXIT_SUCCESS;
Eric Andersen96700832000-09-04 15:15:55 +0000930}