blob: 9c1de2bcd2d5dbf73160eb7d32b8cf29cbb5136c [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * Daniel.Veillard@w3.org
12 */
13
14/* TODO add compression support, Send the Accept- , and decompress on the
15 fly with ZLIB if found at compile-time */
16
17#ifdef WIN32
18#define INCLUDE_WINSOCK
19#include "win32config.h"
20#else
21#include "config.h"
22#endif
23
24#include <libxml/xmlversion.h>
25
26#ifdef LIBXML_HTTP_ENABLED
27#include <stdio.h>
28#include <string.h>
29
30#ifdef HAVE_STDLIB_H
31#include <stdlib.h>
32#endif
33#ifdef HAVE_UNISTD_H
34#include <unistd.h>
35#endif
36#ifdef HAVE_SYS_SOCKET_H
37#include <sys/socket.h>
38#endif
39#ifdef HAVE_NETINET_IN_H
40#include <netinet/in.h>
41#endif
42#ifdef HAVE_ARPA_INET_H
43#include <arpa/inet.h>
44#endif
45#ifdef HAVE_NETDB_H
46#include <netdb.h>
47#endif
48#ifdef HAVE_FCNTL_H
49#include <fcntl.h>
50#endif
51#ifdef HAVE_ERRNO_H
52#include <errno.h>
53#endif
54#ifdef HAVE_SYS_TIME_H
55#include <sys/time.h>
56#endif
57#ifdef HAVE_SYS_SELECT_H
58#include <sys/select.h>
59#endif
60#ifdef HAVE_STRINGS_H
61#include <strings.h>
62#endif
63#ifdef SUPPORT_IP6
64#include <resolv.h>
65#endif
66
67#ifdef VMS
68#include <stropts>
69#define SOCKLEN_T unsigned int
70#define SOCKET int
71#endif
72
73#include <libxml/xmlmemory.h>
74#include <libxml/parser.h> /* for xmlStr(n)casecmp() */
75#include <libxml/nanohttp.h>
76
77/**
78 * A couple portability macros
79 */
80#ifndef _WINSOCKAPI_
81#define closesocket(s) close(s)
82#define SOCKET int
83#endif
84
85#ifdef STANDALONE
86#define DEBUG_HTTP
87#define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
88#define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
89#endif
90
91#define XML_NANO_HTTP_MAX_REDIR 10
92
93#define XML_NANO_HTTP_CHUNK 4096
94
95#define XML_NANO_HTTP_CLOSED 0
96#define XML_NANO_HTTP_WRITE 1
97#define XML_NANO_HTTP_READ 2
98#define XML_NANO_HTTP_NONE 4
99
100typedef struct xmlNanoHTTPCtxt {
101 char *protocol; /* the protocol name */
102 char *hostname; /* the host name */
103 int port; /* the port */
104 char *path; /* the path within the URL */
105 SOCKET fd; /* the file descriptor for the socket */
106 int state; /* WRITE / READ / CLOSED */
107 char *out; /* buffer sent (zero terminated) */
108 char *outptr; /* index within the buffer sent */
109 char *in; /* the receiving buffer */
110 char *content; /* the start of the content */
111 char *inptr; /* the next byte to read from network */
112 char *inrptr; /* the next byte to give back to the client */
113 int inlen; /* len of the input buffer */
114 int last; /* return code for last operation */
115 int returnValue; /* the protocol return value */
116 char *contentType; /* the MIME type for the input */
117 char *location; /* the new URL in case of redirect */
118 char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
119} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
120
121static int initialized = 0;
122static char *proxy = NULL; /* the proxy name if any */
123static int proxyPort; /* the proxy port if any */
124static unsigned int timeout = 60;/* the select() timeout in seconds */
125
126/**
127 * A portability function
128 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000129static int socket_errno(void) {
Owen Taylor3473f882001-02-23 17:55:21 +0000130#ifdef _WINSOCKAPI_
131 return(WSAGetLastError());
132#else
133 return(errno);
134#endif
135}
136
137/**
138 * xmlNanoHTTPInit:
139 *
140 * Initialize the HTTP protocol layer.
141 * Currently it just checks for proxy informations
142 */
143
144void
145xmlNanoHTTPInit(void) {
146 const char *env;
147#ifdef _WINSOCKAPI_
148 WSADATA wsaData;
149#endif
150
151 if (initialized)
152 return;
153
154#ifdef _WINSOCKAPI_
155 if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
156 return;
157#endif
158
159 if (proxy == NULL) {
160 proxyPort = 80;
161 env = getenv("no_proxy");
162 if (env != NULL)
163 goto done;
164 env = getenv("http_proxy");
165 if (env != NULL) {
166 xmlNanoHTTPScanProxy(env);
167 goto done;
168 }
169 env = getenv("HTTP_PROXY");
170 if (env != NULL) {
171 xmlNanoHTTPScanProxy(env);
172 goto done;
173 }
174 }
175done:
176 initialized = 1;
177}
178
179/**
180 * xmlNanoHTTPClenup:
181 *
182 * Cleanup the HTTP protocol layer.
183 */
184
185void
186xmlNanoHTTPCleanup(void) {
187 if (proxy != NULL)
188 xmlFree(proxy);
189#ifdef _WINSOCKAPI_
190 if (initialized)
191 WSACleanup();
192#endif
193 initialized = 0;
194 return;
195}
196
197/**
Owen Taylor3473f882001-02-23 17:55:21 +0000198 * xmlNanoHTTPScanURL:
199 * @ctxt: an HTTP context
200 * @URL: The URL used to initialize the context
201 *
202 * (Re)Initialize an HTTP context by parsing the URL and finding
203 * the protocol host port and path it indicates.
204 */
205
206static void
207xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
208 const char *cur = URL;
209 char buf[4096];
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000210 int indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000211 int port = 0;
212
213 if (ctxt->protocol != NULL) {
214 xmlFree(ctxt->protocol);
215 ctxt->protocol = NULL;
216 }
217 if (ctxt->hostname != NULL) {
218 xmlFree(ctxt->hostname);
219 ctxt->hostname = NULL;
220 }
221 if (ctxt->path != NULL) {
222 xmlFree(ctxt->path);
223 ctxt->path = NULL;
224 }
225 if (URL == NULL) return;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000226 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000227 while (*cur != 0) {
228 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000229 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000230 ctxt->protocol = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000231 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000232 cur += 3;
233 break;
234 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000235 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000236 }
237 if (*cur == 0) return;
238
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000239 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000240 while (1) {
241 if (cur[0] == ':') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000242 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000243 ctxt->hostname = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000244 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000245 cur += 1;
246 while ((*cur >= '0') && (*cur <= '9')) {
247 port *= 10;
248 port += *cur - '0';
249 cur++;
250 }
251 if (port != 0) ctxt->port = port;
252 while ((cur[0] != '/') && (*cur != 0))
253 cur++;
254 break;
255 }
256 if ((*cur == '/') || (*cur == 0)) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000257 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000258 ctxt->hostname = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000259 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000260 break;
261 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000262 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000263 }
264 if (*cur == 0)
265 ctxt->path = xmlMemStrdup("/");
266 else {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000267 indx = 0;
268 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000269 while (*cur != 0)
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000270 buf[indx++] = *cur++;
271 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000272 ctxt->path = xmlMemStrdup(buf);
273 }
274}
275
276/**
277 * xmlNanoHTTPScanProxy:
278 * @URL: The proxy URL used to initialize the proxy context
279 *
280 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
281 * the protocol host port it indicates.
282 * Should be like http://myproxy/ or http://myproxy:3128/
283 * A NULL URL cleans up proxy informations.
284 */
285
286void
287xmlNanoHTTPScanProxy(const char *URL) {
288 const char *cur = URL;
289 char buf[4096];
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000290 int indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000291 int port = 0;
292
293 if (proxy != NULL) {
294 xmlFree(proxy);
295 proxy = NULL;
296 }
297 if (proxyPort != 0) {
298 proxyPort = 0;
299 }
300#ifdef DEBUG_HTTP
301 if (URL == NULL)
302 xmlGenericError(xmlGenericErrorContext,
303 "Removing HTTP proxy info\n");
304 else
305 xmlGenericError(xmlGenericErrorContext,
306 "Using HTTP proxy %s\n", URL);
307#endif
308 if (URL == NULL) return;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000309 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000310 while (*cur != 0) {
311 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000312 buf[indx] = 0;
313 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000314 cur += 3;
315 break;
316 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000317 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000318 }
319 if (*cur == 0) return;
320
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000321 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000322 while (1) {
323 if (cur[0] == ':') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000324 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000325 proxy = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000326 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000327 cur += 1;
328 while ((*cur >= '0') && (*cur <= '9')) {
329 port *= 10;
330 port += *cur - '0';
331 cur++;
332 }
333 if (port != 0) proxyPort = port;
334 while ((cur[0] != '/') && (*cur != 0))
335 cur++;
336 break;
337 }
338 if ((*cur == '/') || (*cur == 0)) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000339 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000340 proxy = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000341 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000342 break;
343 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000344 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000345 }
346}
347
348/**
349 * xmlNanoHTTPNewCtxt:
350 * @URL: The URL used to initialize the context
351 *
352 * Allocate and initialize a new HTTP context.
353 *
354 * Returns an HTTP context or NULL in case of error.
355 */
356
357static xmlNanoHTTPCtxtPtr
358xmlNanoHTTPNewCtxt(const char *URL) {
359 xmlNanoHTTPCtxtPtr ret;
360
361 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
362 if (ret == NULL) return(NULL);
363
364 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
365 ret->port = 80;
366 ret->returnValue = 0;
367 ret->fd = -1;
368
369 xmlNanoHTTPScanURL(ret, URL);
370
371 return(ret);
372}
373
374/**
375 * xmlNanoHTTPFreeCtxt:
376 * @ctxt: an HTTP context
377 *
378 * Frees the context after closing the connection.
379 */
380
381static void
382xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
383 if (ctxt == NULL) return;
384 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
385 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
386 if (ctxt->path != NULL) xmlFree(ctxt->path);
387 if (ctxt->out != NULL) xmlFree(ctxt->out);
388 if (ctxt->in != NULL) xmlFree(ctxt->in);
389 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
390 if (ctxt->location != NULL) xmlFree(ctxt->location);
391 if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
392 ctxt->state = XML_NANO_HTTP_NONE;
393 if (ctxt->fd >= 0) closesocket(ctxt->fd);
394 ctxt->fd = -1;
395 xmlFree(ctxt);
396}
397
398/**
399 * xmlNanoHTTPSend:
400 * @ctxt: an HTTP context
401 *
402 * Send the input needed to initiate the processing on the server side
403 */
404
405static void
406xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
407 if (ctxt->state & XML_NANO_HTTP_WRITE) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000408 unsigned int total_sent = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000409 while (total_sent <strlen(ctxt->outptr)) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000410 unsigned int nsent = send(ctxt->fd, ctxt->outptr+total_sent,
411 strlen(ctxt->outptr)-total_sent, 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000412 if (nsent>0)
413 total_sent += nsent;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000414 }
Owen Taylor3473f882001-02-23 17:55:21 +0000415 ctxt->last = total_sent;
416 }
417}
418
419/**
420 * xmlNanoHTTPRecv:
421 * @ctxt: an HTTP context
422 *
423 * Read information coming from the HTTP connection.
424 * This is a blocking call (but it blocks in select(), not read()).
425 *
426 * Returns the number of byte read or -1 in case of error.
427 */
428
429static int
430xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
431 fd_set rfd;
432 struct timeval tv;
433
434
435 while (ctxt->state & XML_NANO_HTTP_READ) {
436 if (ctxt->in == NULL) {
437 ctxt->in = (char *) xmlMalloc(65000 * sizeof(char));
438 if (ctxt->in == NULL) {
439 ctxt->last = -1;
440 return(-1);
441 }
442 ctxt->inlen = 65000;
443 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
444 }
445 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
446 int delta = ctxt->inrptr - ctxt->in;
447 int len = ctxt->inptr - ctxt->inrptr;
448
449 memmove(ctxt->in, ctxt->inrptr, len);
450 ctxt->inrptr -= delta;
451 ctxt->content -= delta;
452 ctxt->inptr -= delta;
453 }
454 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
455 int d_inptr = ctxt->inptr - ctxt->in;
456 int d_content = ctxt->content - ctxt->in;
457 int d_inrptr = ctxt->inrptr - ctxt->in;
458
459 ctxt->inlen *= 2;
460 ctxt->in = (char *) xmlRealloc(ctxt->in, ctxt->inlen);
461 if (ctxt->in == NULL) {
462 ctxt->last = -1;
463 return(-1);
464 }
465 ctxt->inptr = ctxt->in + d_inptr;
466 ctxt->content = ctxt->in + d_content;
467 ctxt->inrptr = ctxt->in + d_inrptr;
468 }
469 ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
470 if (ctxt->last > 0) {
471 ctxt->inptr += ctxt->last;
472 return(ctxt->last);
473 }
474 if (ctxt->last == 0) {
475 return(0);
476 }
477 if (ctxt->last == -1) {
478 switch (socket_errno()) {
479 case EINPROGRESS:
480 case EWOULDBLOCK:
481#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
482 case EAGAIN:
483#endif
484 break;
485 default:
486 return(0);
487 }
488 }
489
490 tv.tv_sec = timeout;
491 tv.tv_usec = 0;
492 FD_ZERO(&rfd);
493 FD_SET(ctxt->fd, &rfd);
494
495 if (select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
496 return(0);
497 }
498 return(0);
499}
500
501/**
502 * xmlNanoHTTPReadLine:
503 * @ctxt: an HTTP context
504 *
505 * Read one line in the HTTP server output, usually for extracting
506 * the HTTP protocol informations from the answer header.
507 *
508 * Returns a newly allocated string with a copy of the line, or NULL
509 * which indicate the end of the input.
510 */
511
512static char *
513xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
514 char buf[4096];
515 char *bp = buf;
516
517 while (bp - buf < 4095) {
518 if (ctxt->inrptr == ctxt->inptr) {
519 if (xmlNanoHTTPRecv(ctxt) == 0) {
520 if (bp == buf)
521 return(NULL);
522 else
523 *bp = 0;
524 return(xmlMemStrdup(buf));
525 }
526 }
527 *bp = *ctxt->inrptr++;
528 if (*bp == '\n') {
529 *bp = 0;
530 return(xmlMemStrdup(buf));
531 }
532 if (*bp != '\r')
533 bp++;
534 }
535 buf[4095] = 0;
536 return(xmlMemStrdup(buf));
537}
538
539
540/**
541 * xmlNanoHTTPScanAnswer:
542 * @ctxt: an HTTP context
543 * @line: an HTTP header line
544 *
545 * Try to extract useful informations from the server answer.
546 * We currently parse and process:
547 * - The HTTP revision/ return code
548 * - The Content-Type
549 * - The Location for redirrect processing.
550 *
551 * Returns -1 in case of failure, the file descriptor number otherwise
552 */
553
554static void
555xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
556 const char *cur = line;
557
558 if (line == NULL) return;
559
560 if (!strncmp(line, "HTTP/", 5)) {
561 int version = 0;
562 int ret = 0;
563
564 cur += 5;
565 while ((*cur >= '0') && (*cur <= '9')) {
566 version *= 10;
567 version += *cur - '0';
568 cur++;
569 }
570 if (*cur == '.') {
571 cur++;
572 if ((*cur >= '0') && (*cur <= '9')) {
573 version *= 10;
574 version += *cur - '0';
575 cur++;
576 }
577 while ((*cur >= '0') && (*cur <= '9'))
578 cur++;
579 } else
580 version *= 10;
581 if ((*cur != ' ') && (*cur != '\t')) return;
582 while ((*cur == ' ') || (*cur == '\t')) cur++;
583 if ((*cur < '0') || (*cur > '9')) return;
584 while ((*cur >= '0') && (*cur <= '9')) {
585 ret *= 10;
586 ret += *cur - '0';
587 cur++;
588 }
589 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
590 ctxt->returnValue = ret;
591 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
592 cur += 13;
593 while ((*cur == ' ') || (*cur == '\t')) cur++;
594 if (ctxt->contentType != NULL)
595 xmlFree(ctxt->contentType);
596 ctxt->contentType = xmlMemStrdup(cur);
597 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
598 cur += 12;
599 if (ctxt->contentType != NULL) return;
600 while ((*cur == ' ') || (*cur == '\t')) cur++;
601 ctxt->contentType = xmlMemStrdup(cur);
602 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
603 cur += 9;
604 while ((*cur == ' ') || (*cur == '\t')) cur++;
605 if (ctxt->location != NULL)
606 xmlFree(ctxt->location);
607 ctxt->location = xmlMemStrdup(cur);
608 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
609 cur += 17;
610 while ((*cur == ' ') || (*cur == '\t')) cur++;
611 if (ctxt->authHeader != NULL)
612 xmlFree(ctxt->authHeader);
613 ctxt->authHeader = xmlMemStrdup(cur);
614 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
615 cur += 19;
616 while ((*cur == ' ') || (*cur == '\t')) cur++;
617 if (ctxt->authHeader != NULL)
618 xmlFree(ctxt->authHeader);
619 ctxt->authHeader = xmlMemStrdup(cur);
620 }
621}
622
623/**
624 * xmlNanoHTTPConnectAttempt:
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000625 * @addr: a socket adress structure
Owen Taylor3473f882001-02-23 17:55:21 +0000626 *
627 * Attempt a connection to the given IP:port endpoint. It forces
628 * non-blocking semantic on the socket, and allow 60 seconds for
629 * the host to answer.
630 *
631 * Returns -1 in case of failure, the file descriptor number otherwise
632 */
633
634static int
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000635xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
Owen Taylor3473f882001-02-23 17:55:21 +0000636{
637 SOCKET s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
638 fd_set wfd;
639 struct timeval tv;
640 int status;
641
642 if (s==-1) {
643#ifdef DEBUG_HTTP
644 perror("socket");
645#endif
646 return(-1);
647 }
648
649#ifdef _WINSOCKAPI_
650 {
651 u_long one = 1;
652
653 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
654 }
655#else /* _WINSOCKAPI_ */
656#if defined(VMS)
657 {
658 int enable = 1;
659 status = ioctl(s, FIONBIO, &enable);
660 }
661#else /* VMS */
662 if ((status = fcntl(s, F_GETFL, 0)) != -1) {
663#ifdef O_NONBLOCK
664 status |= O_NONBLOCK;
665#else /* O_NONBLOCK */
666#ifdef F_NDELAY
667 status |= F_NDELAY;
668#endif /* F_NDELAY */
669#endif /* !O_NONBLOCK */
670 status = fcntl(s, F_SETFL, status);
671 }
672 if (status < 0) {
673#ifdef DEBUG_HTTP
674 perror("nonblocking");
675#endif
676 closesocket(s);
677 return(-1);
678 }
679#endif /* !VMS */
680#endif /* !_WINSOCKAPI_ */
681
682
683 if ((connect(s, addr, sizeof(*addr))==-1)) {
684 switch (socket_errno()) {
685 case EINPROGRESS:
686 case EWOULDBLOCK:
687 break;
688 default:
689 perror("connect");
690 closesocket(s);
691 return(-1);
692 }
693 }
694
695 tv.tv_sec = timeout;
696 tv.tv_usec = 0;
697
698 FD_ZERO(&wfd);
699 FD_SET(s, &wfd);
700
701 switch(select(s+1, NULL, &wfd, NULL, &tv))
702 {
703 case 0:
704 /* Time out */
705 closesocket(s);
706 return(-1);
707 case -1:
708 /* Ermm.. ?? */
709#ifdef DEBUG_HTTP
710 perror("select");
711#endif
712 closesocket(s);
713 return(-1);
714 }
715
716 if ( FD_ISSET(s, &wfd) ) {
717 SOCKLEN_T len;
718 len = sizeof(status);
719 if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&status, &len) < 0 ) {
720 /* Solaris error code */
721 return (-1);
722 }
723 if ( status ) {
724 closesocket(s);
725 errno = status;
726 return (-1);
727 }
728 } else {
729 /* pbm */
730 return (-1);
731 }
732
733 return(s);
734}
735
736/**
737 * xmlNanoHTTPConnectHost:
738 * @host: the host name
739 * @port: the port number
740 *
741 * Attempt a connection to the given host:port endpoint. It tries
742 * the multiple IP provided by the DNS if available.
743 *
744 * Returns -1 in case of failure, the file descriptor number otherwise
745 */
746
747static int
748xmlNanoHTTPConnectHost(const char *host, int port)
749{
750 struct hostent *h;
751 struct sockaddr *addr;
752 struct in_addr ia;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000753 struct sockaddr_in sockin;
Owen Taylor3473f882001-02-23 17:55:21 +0000754#ifdef SUPPORT_IP6
755 struct in6_addr ia6;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000756 struct sockaddr_in6 sockin6;
Owen Taylor3473f882001-02-23 17:55:21 +0000757#endif
758 int i;
759 int s;
760
761#if defined(SUPPORT_IP6) && defined(RES_USE_INET6)
762 if (!(_res.options & RES_INIT))
763 res_init();
764 _res.options |= RES_USE_INET6;
765#endif
766 h=gethostbyname(host);
767 if (h==NULL)
768 {
769#ifdef DEBUG_HTTP
770 xmlGenericError(xmlGenericErrorContext,"unable to resolve '%s'.\n", host);
771#endif
772 return(-1);
773 }
774
775 for(i=0; h->h_addr_list[i]; i++)
776 {
777 if (h->h_addrtype == AF_INET) {
778 /* A records (IPv4) */
779 memcpy(&ia, h->h_addr_list[i], h->h_length);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000780 sockin.sin_family = h->h_addrtype;
781 sockin.sin_addr = ia;
782 sockin.sin_port = htons(port);
783 addr = (struct sockaddr *)&sockin;
Owen Taylor3473f882001-02-23 17:55:21 +0000784#ifdef SUPPORT_IP6
785 } else if (h->h_addrtype == AF_INET6) {
786 /* AAAA records (IPv6) */
787 memcpy(&ia6, h->h_addr_list[i], h->h_length);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000788 sockin6.sin_family = h->h_addrtype;
789 sockin6.sin_addr = ia6;
790 sockin6.sin_port = htons(port);
791 addr = (struct sockaddr *)&sockin6;
Owen Taylor3473f882001-02-23 17:55:21 +0000792#endif
793 } else
794 break; /* for */
795
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000796 s = xmlNanoHTTPConnectAttempt(addr);
Owen Taylor3473f882001-02-23 17:55:21 +0000797 if (s != -1)
798 return(s);
799 }
800
801#ifdef DEBUG_HTTP
802 xmlGenericError(xmlGenericErrorContext,
803 "unable to connect to '%s'.\n", host);
804#endif
805 return(-1);
806}
807
808
809/**
810 * xmlNanoHTTPOpen:
811 * @URL: The URL to load
812 * @contentType: if available the Content-Type information will be
813 * returned at that location
814 *
815 * This function try to open a connection to the indicated resource
816 * via HTTP GET.
817 *
818 * Returns NULL in case of failure, otherwise a request handler.
819 * The contentType, if provided must be freed by the caller
820 */
821
822void*
823xmlNanoHTTPOpen(const char *URL, char **contentType) {
824 if (contentType != NULL) *contentType = NULL;
825 return xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL);
826}
827
828/**
829 * xmlNanoHTTPRead:
830 * @ctx: the HTTP context
831 * @dest: a buffer
832 * @len: the buffer length
833 *
834 * This function tries to read @len bytes from the existing HTTP connection
835 * and saves them in @dest. This is a blocking call.
836 *
837 * Returns the number of byte read. 0 is an indication of an end of connection.
838 * -1 indicates a parameter error.
839 */
840int
841xmlNanoHTTPRead(void *ctx, void *dest, int len) {
842 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
843
844 if (ctx == NULL) return(-1);
845 if (dest == NULL) return(-1);
846 if (len <= 0) return(0);
847
848 while (ctxt->inptr - ctxt->inrptr < len) {
849 if (xmlNanoHTTPRecv(ctxt) == 0) break;
850 }
851 if (ctxt->inptr - ctxt->inrptr < len)
852 len = ctxt->inptr - ctxt->inrptr;
853 memcpy(dest, ctxt->inrptr, len);
854 ctxt->inrptr += len;
855 return(len);
856}
857
858/**
859 * xmlNanoHTTPClose:
860 * @ctx: the HTTP context
861 *
862 * This function closes an HTTP context, it ends up the connection and
863 * free all data related to it.
864 */
865void
866xmlNanoHTTPClose(void *ctx) {
867 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
868
869 if (ctx == NULL) return;
870
871 xmlNanoHTTPFreeCtxt(ctxt);
872}
873
874/**
875 * xmlNanoHTTPMethod:
876 * @URL: The URL to load
877 * @method: the HTTP method to use
878 * @input: the input string if any
879 * @contentType: the Content-Type information IN and OUT
880 * @headers: the extra headers
881 *
882 * This function try to open a connection to the indicated resource
883 * via HTTP using the given @method, adding the given extra headers
884 * and the input buffer for the request content.
885 *
886 * Returns NULL in case of failure, otherwise a request handler.
887 * The contentType, if provided must be freed by the caller
888 */
889
890void*
891xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
892 char **contentType, const char *headers) {
893 xmlNanoHTTPCtxtPtr ctxt;
894 char *bp, *p;
895 int blen, ilen, ret;
896 int head;
897 int nbRedirects = 0;
898 char *redirURL = NULL;
899
900 if (URL == NULL) return(NULL);
901 if (method == NULL) method = "GET";
902 xmlNanoHTTPInit();
903
904retry:
905 if (redirURL == NULL)
906 ctxt = xmlNanoHTTPNewCtxt(URL);
907 else {
908 ctxt = xmlNanoHTTPNewCtxt(redirURL);
909 xmlFree(redirURL);
910 redirURL = NULL;
911 }
912
913 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
914 xmlNanoHTTPFreeCtxt(ctxt);
915 if (redirURL != NULL) xmlFree(redirURL);
916 return(NULL);
917 }
918 if (ctxt->hostname == NULL) {
919 xmlNanoHTTPFreeCtxt(ctxt);
920 return(NULL);
921 }
922 if (proxy) {
923 blen = strlen(ctxt->hostname) * 2 + 16;
924 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
925 }
926 else {
927 blen = strlen(ctxt->hostname);
928 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
929 }
930 if (ret < 0) {
931 xmlNanoHTTPFreeCtxt(ctxt);
932 return(NULL);
933 }
934 ctxt->fd = ret;
935
936 if (input != NULL) {
937 ilen = strlen(input);
938 blen += ilen + 32;
939 }
940 else
941 ilen = 0;
942 if (headers != NULL)
943 blen += strlen(headers);
944 if (contentType && *contentType)
945 blen += strlen(*contentType) + 16;
946 blen += strlen(method) + strlen(ctxt->path) + 23;
947 bp = xmlMalloc(blen);
948 if (proxy) {
949 if (ctxt->port != 80) {
950 sprintf(bp, "%s http://%s:%d%s", method, ctxt->hostname,
951 ctxt->port, ctxt->path);
952 }
953 else
954 sprintf(bp, "%s http://%s%s", method, ctxt->hostname, ctxt->path);
955 }
956 else
957 sprintf(bp, "%s %s", method, ctxt->path);
958 p = bp + strlen(bp);
959 sprintf(p, " HTTP/1.0\r\nHost: %s\r\n", ctxt->hostname);
960 p += strlen(p);
961 if (contentType != NULL && *contentType) {
962 sprintf(p, "Content-Type: %s\r\n", *contentType);
963 p += strlen(p);
964 }
965 if (headers != NULL) {
966 strcpy(p, headers);
967 p += strlen(p);
968 }
969 if (input != NULL)
970 sprintf(p, "Content-Length: %d\r\n\r\n%s", ilen, input);
971 else
972 strcpy(p, "\r\n");
973#ifdef DEBUG_HTTP
974 xmlGenericError(xmlGenericErrorContext,
975 "-> %s%s", proxy? "(Proxy) " : "", bp);
976 if ((blen -= strlen(bp)+1) < 0)
977 xmlGenericError(xmlGenericErrorContext,
978 "ERROR: overflowed buffer by %d bytes\n", -blen);
979#endif
980 ctxt->outptr = ctxt->out = bp;
981 ctxt->state = XML_NANO_HTTP_WRITE;
982 xmlNanoHTTPSend(ctxt);
983 ctxt->state = XML_NANO_HTTP_READ;
984 head = 1;
985
986 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
987 if (head && (*p == 0)) {
988 head = 0;
989 ctxt->content = ctxt->inrptr;
990 xmlFree(p);
991 break;
992 }
993 xmlNanoHTTPScanAnswer(ctxt, p);
994
995#ifdef DEBUG_HTTP
996 xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
997#endif
998 xmlFree(p);
999 }
1000
1001 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1002 (ctxt->returnValue < 400)) {
1003#ifdef DEBUG_HTTP
1004 xmlGenericError(xmlGenericErrorContext,
1005 "\nRedirect to: %s\n", ctxt->location);
1006#endif
1007 while (xmlNanoHTTPRecv(ctxt)) ;
1008 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1009 nbRedirects++;
1010 redirURL = xmlMemStrdup(ctxt->location);
1011 xmlNanoHTTPFreeCtxt(ctxt);
1012 goto retry;
1013 }
1014 xmlNanoHTTPFreeCtxt(ctxt);
1015#ifdef DEBUG_HTTP
1016 xmlGenericError(xmlGenericErrorContext,
1017 "Too many redirects, aborting ...\n");
1018#endif
1019 return(NULL);
1020
1021 }
1022
1023 if (contentType != NULL) {
1024 if (ctxt->contentType != NULL)
1025 *contentType = xmlMemStrdup(ctxt->contentType);
1026 else
1027 *contentType = NULL;
1028 }
1029
1030#ifdef DEBUG_HTTP
1031 if (ctxt->contentType != NULL)
1032 xmlGenericError(xmlGenericErrorContext,
1033 "\nCode %d, content-type '%s'\n\n",
1034 ctxt->returnValue, ctxt->contentType);
1035 else
1036 xmlGenericError(xmlGenericErrorContext,
1037 "\nCode %d, no content-type\n\n",
1038 ctxt->returnValue);
1039#endif
1040
1041 return((void *) ctxt);
1042}
1043
1044/**
1045 * xmlNanoHTTPFetch:
1046 * @URL: The URL to load
1047 * @filename: the filename where the content should be saved
1048 * @contentType: if available the Content-Type information will be
1049 * returned at that location
1050 *
1051 * This function try to fetch the indicated resource via HTTP GET
1052 * and save it's content in the file.
1053 *
1054 * Returns -1 in case of failure, 0 incase of success. The contentType,
1055 * if provided must be freed by the caller
1056 */
1057int
1058xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1059 void *ctxt;
1060 char buf[4096];
1061 int fd;
1062 int len;
1063
1064 ctxt = xmlNanoHTTPOpen(URL, contentType);
1065 if (ctxt == NULL) return(-1);
1066
1067 if (!strcmp(filename, "-"))
1068 fd = 0;
1069 else {
1070 fd = open(filename, O_CREAT | O_WRONLY, 00644);
1071 if (fd < 0) {
1072 xmlNanoHTTPClose(ctxt);
1073 if ((contentType != NULL) && (*contentType != NULL)) {
1074 xmlFree(*contentType);
1075 *contentType = NULL;
1076 }
1077 return(-1);
1078 }
1079 }
1080
1081 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1082 write(fd, buf, len);
1083 }
1084
1085 xmlNanoHTTPClose(ctxt);
1086 close(fd);
1087 return(0);
1088}
1089
1090/**
1091 * xmlNanoHTTPSave:
1092 * @ctxt: the HTTP context
1093 * @filename: the filename where the content should be saved
1094 *
1095 * This function saves the output of the HTTP transaction to a file
1096 * It closes and free the context at the end
1097 *
1098 * Returns -1 in case of failure, 0 incase of success.
1099 */
1100int
1101xmlNanoHTTPSave(void *ctxt, const char *filename) {
1102 char buf[4096];
1103 int fd;
1104 int len;
1105
1106 if (ctxt == NULL) return(-1);
1107
1108 if (!strcmp(filename, "-"))
1109 fd = 0;
1110 else {
1111 fd = open(filename, O_CREAT | O_WRONLY);
1112 if (fd < 0) {
1113 xmlNanoHTTPClose(ctxt);
1114 return(-1);
1115 }
1116 }
1117
1118 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1119 write(fd, buf, len);
1120 }
1121
1122 xmlNanoHTTPClose(ctxt);
1123 return(0);
1124}
1125
1126/**
1127 * xmlNanoHTTPReturnCode:
1128 * @ctx: the HTTP context
1129 *
1130 * Returns the HTTP return code for the request.
1131 */
1132int
1133xmlNanoHTTPReturnCode(void *ctx) {
1134 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1135
1136 if (ctxt == NULL) return(-1);
1137
1138 return(ctxt->returnValue);
1139}
1140
1141/**
1142 * xmlNanoHTTPAuthHeader:
1143 * @ctx: the HTTP context
1144 *
1145 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1146 * header.
1147 */
1148const char *
1149xmlNanoHTTPAuthHeader(void *ctx) {
1150 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1151
1152 if (ctxt == NULL) return(NULL);
1153
1154 return(ctxt->authHeader);
1155}
1156
1157#ifdef STANDALONE
1158int main(int argc, char **argv) {
1159 char *contentType = NULL;
1160
1161 if (argv[1] != NULL) {
1162 if (argv[2] != NULL)
1163 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1164 else
1165 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1166 if (contentType != NULL) xmlFree(contentType);
1167 } else {
1168 xmlGenericError(xmlGenericErrorContext,
1169 "%s: minimal HTTP GET implementation\n", argv[0]);
1170 xmlGenericError(xmlGenericErrorContext,
1171 "\tusage %s [ URL [ filename ] ]\n", argv[0]);
1172 }
1173 xmlNanoHTTPCleanup();
1174 xmlMemoryDump();
1175 return(0);
1176}
1177#endif /* STANDALONE */
1178#else /* !LIBXML_HTTP_ENABLED */
1179#ifdef STANDALONE
1180#include <stdio.h>
1181int main(int argc, char **argv) {
1182 xmlGenericError(xmlGenericErrorContext,
1183 "%s : HTTP support not compiled in\n", argv[0]);
1184 return(0);
1185}
1186#endif /* STANDALONE */
1187#endif /* LIBXML_HTTP_ENABLED */