blob: 4991e0bb90788b87a17f73d256130daba1fdcd84 [file] [log] [blame]
Owen Taylor3473f882001-02-23 17:55:21 +00001/*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * Daniel.Veillard@w3.org
12 */
13
14/* TODO add compression support, Send the Accept- , and decompress on the
15 fly with ZLIB if found at compile-time */
16
Daniel Veillardf3afa7d2001-06-09 13:52:58 +000017#define NEED_SOCKETS
Bjorn Reese70a9da52001-04-21 16:57:29 +000018#include "libxml.h"
Owen Taylor3473f882001-02-23 17:55:21 +000019
20#ifdef LIBXML_HTTP_ENABLED
Owen Taylor3473f882001-02-23 17:55:21 +000021#include <string.h>
22
23#ifdef HAVE_STDLIB_H
24#include <stdlib.h>
25#endif
26#ifdef HAVE_UNISTD_H
27#include <unistd.h>
28#endif
29#ifdef HAVE_SYS_SOCKET_H
30#include <sys/socket.h>
31#endif
32#ifdef HAVE_NETINET_IN_H
33#include <netinet/in.h>
34#endif
35#ifdef HAVE_ARPA_INET_H
36#include <arpa/inet.h>
37#endif
38#ifdef HAVE_NETDB_H
39#include <netdb.h>
40#endif
41#ifdef HAVE_FCNTL_H
42#include <fcntl.h>
43#endif
44#ifdef HAVE_ERRNO_H
45#include <errno.h>
46#endif
47#ifdef HAVE_SYS_TIME_H
48#include <sys/time.h>
49#endif
50#ifdef HAVE_SYS_SELECT_H
51#include <sys/select.h>
52#endif
53#ifdef HAVE_STRINGS_H
54#include <strings.h>
55#endif
56#ifdef SUPPORT_IP6
57#include <resolv.h>
58#endif
59
60#ifdef VMS
61#include <stropts>
62#define SOCKLEN_T unsigned int
63#define SOCKET int
64#endif
65
66#include <libxml/xmlmemory.h>
67#include <libxml/parser.h> /* for xmlStr(n)casecmp() */
68#include <libxml/nanohttp.h>
69
70/**
71 * A couple portability macros
72 */
73#ifndef _WINSOCKAPI_
74#define closesocket(s) close(s)
75#define SOCKET int
76#endif
77
78#ifdef STANDALONE
79#define DEBUG_HTTP
80#define xmlStrncasecmp(a, b, n) strncasecmp((char *)a, (char *)b, n)
81#define xmlStrcasecmpi(a, b) strcasecmp((char *)a, (char *)b)
82#endif
83
84#define XML_NANO_HTTP_MAX_REDIR 10
85
86#define XML_NANO_HTTP_CHUNK 4096
87
88#define XML_NANO_HTTP_CLOSED 0
89#define XML_NANO_HTTP_WRITE 1
90#define XML_NANO_HTTP_READ 2
91#define XML_NANO_HTTP_NONE 4
92
93typedef struct xmlNanoHTTPCtxt {
94 char *protocol; /* the protocol name */
95 char *hostname; /* the host name */
96 int port; /* the port */
97 char *path; /* the path within the URL */
98 SOCKET fd; /* the file descriptor for the socket */
99 int state; /* WRITE / READ / CLOSED */
100 char *out; /* buffer sent (zero terminated) */
101 char *outptr; /* index within the buffer sent */
102 char *in; /* the receiving buffer */
103 char *content; /* the start of the content */
104 char *inptr; /* the next byte to read from network */
105 char *inrptr; /* the next byte to give back to the client */
106 int inlen; /* len of the input buffer */
107 int last; /* return code for last operation */
108 int returnValue; /* the protocol return value */
109 char *contentType; /* the MIME type for the input */
110 char *location; /* the new URL in case of redirect */
111 char *authHeader; /* contents of {WWW,Proxy}-Authenticate header */
112} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
113
114static int initialized = 0;
115static char *proxy = NULL; /* the proxy name if any */
116static int proxyPort; /* the proxy port if any */
117static unsigned int timeout = 60;/* the select() timeout in seconds */
118
119/**
120 * A portability function
121 */
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000122static int socket_errno(void) {
Owen Taylor3473f882001-02-23 17:55:21 +0000123#ifdef _WINSOCKAPI_
124 return(WSAGetLastError());
125#else
126 return(errno);
127#endif
128}
129
130/**
131 * xmlNanoHTTPInit:
132 *
133 * Initialize the HTTP protocol layer.
134 * Currently it just checks for proxy informations
135 */
136
137void
138xmlNanoHTTPInit(void) {
139 const char *env;
140#ifdef _WINSOCKAPI_
141 WSADATA wsaData;
142#endif
143
144 if (initialized)
145 return;
146
147#ifdef _WINSOCKAPI_
148 if (WSAStartup(MAKEWORD(1, 1), &wsaData) != 0)
149 return;
150#endif
151
152 if (proxy == NULL) {
153 proxyPort = 80;
154 env = getenv("no_proxy");
155 if (env != NULL)
156 goto done;
157 env = getenv("http_proxy");
158 if (env != NULL) {
159 xmlNanoHTTPScanProxy(env);
160 goto done;
161 }
162 env = getenv("HTTP_PROXY");
163 if (env != NULL) {
164 xmlNanoHTTPScanProxy(env);
165 goto done;
166 }
167 }
168done:
169 initialized = 1;
170}
171
172/**
173 * xmlNanoHTTPClenup:
174 *
175 * Cleanup the HTTP protocol layer.
176 */
177
178void
179xmlNanoHTTPCleanup(void) {
180 if (proxy != NULL)
181 xmlFree(proxy);
182#ifdef _WINSOCKAPI_
183 if (initialized)
184 WSACleanup();
185#endif
186 initialized = 0;
187 return;
188}
189
190/**
Owen Taylor3473f882001-02-23 17:55:21 +0000191 * xmlNanoHTTPScanURL:
192 * @ctxt: an HTTP context
193 * @URL: The URL used to initialize the context
194 *
195 * (Re)Initialize an HTTP context by parsing the URL and finding
196 * the protocol host port and path it indicates.
197 */
198
199static void
200xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
201 const char *cur = URL;
202 char buf[4096];
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000203 int indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000204 int port = 0;
205
206 if (ctxt->protocol != NULL) {
207 xmlFree(ctxt->protocol);
208 ctxt->protocol = NULL;
209 }
210 if (ctxt->hostname != NULL) {
211 xmlFree(ctxt->hostname);
212 ctxt->hostname = NULL;
213 }
214 if (ctxt->path != NULL) {
215 xmlFree(ctxt->path);
216 ctxt->path = NULL;
217 }
218 if (URL == NULL) return;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000219 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000220 while (*cur != 0) {
221 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000222 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000223 ctxt->protocol = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000224 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000225 cur += 3;
226 break;
227 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000228 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000229 }
230 if (*cur == 0) return;
231
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000232 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000233 while (1) {
234 if (cur[0] == ':') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000235 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000236 ctxt->hostname = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000237 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000238 cur += 1;
239 while ((*cur >= '0') && (*cur <= '9')) {
240 port *= 10;
241 port += *cur - '0';
242 cur++;
243 }
244 if (port != 0) ctxt->port = port;
245 while ((cur[0] != '/') && (*cur != 0))
246 cur++;
247 break;
248 }
249 if ((*cur == '/') || (*cur == 0)) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000250 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000251 ctxt->hostname = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000252 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000253 break;
254 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000255 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000256 }
257 if (*cur == 0)
258 ctxt->path = xmlMemStrdup("/");
259 else {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000260 indx = 0;
261 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000262 while (*cur != 0)
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000263 buf[indx++] = *cur++;
264 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000265 ctxt->path = xmlMemStrdup(buf);
266 }
267}
268
269/**
270 * xmlNanoHTTPScanProxy:
271 * @URL: The proxy URL used to initialize the proxy context
272 *
273 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
274 * the protocol host port it indicates.
275 * Should be like http://myproxy/ or http://myproxy:3128/
276 * A NULL URL cleans up proxy informations.
277 */
278
279void
280xmlNanoHTTPScanProxy(const char *URL) {
281 const char *cur = URL;
282 char buf[4096];
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000283 int indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000284 int port = 0;
285
286 if (proxy != NULL) {
287 xmlFree(proxy);
288 proxy = NULL;
289 }
290 if (proxyPort != 0) {
291 proxyPort = 0;
292 }
293#ifdef DEBUG_HTTP
294 if (URL == NULL)
295 xmlGenericError(xmlGenericErrorContext,
296 "Removing HTTP proxy info\n");
297 else
298 xmlGenericError(xmlGenericErrorContext,
299 "Using HTTP proxy %s\n", URL);
300#endif
301 if (URL == NULL) return;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000302 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000303 while (*cur != 0) {
304 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000305 buf[indx] = 0;
306 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000307 cur += 3;
308 break;
309 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000310 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000311 }
312 if (*cur == 0) return;
313
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000314 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000315 while (1) {
316 if (cur[0] == ':') {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000317 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000318 proxy = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000319 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000320 cur += 1;
321 while ((*cur >= '0') && (*cur <= '9')) {
322 port *= 10;
323 port += *cur - '0';
324 cur++;
325 }
326 if (port != 0) proxyPort = port;
327 while ((cur[0] != '/') && (*cur != 0))
328 cur++;
329 break;
330 }
331 if ((*cur == '/') || (*cur == 0)) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000332 buf[indx] = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000333 proxy = xmlMemStrdup(buf);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000334 indx = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000335 break;
336 }
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000337 buf[indx++] = *cur++;
Owen Taylor3473f882001-02-23 17:55:21 +0000338 }
339}
340
341/**
342 * xmlNanoHTTPNewCtxt:
343 * @URL: The URL used to initialize the context
344 *
345 * Allocate and initialize a new HTTP context.
346 *
347 * Returns an HTTP context or NULL in case of error.
348 */
349
350static xmlNanoHTTPCtxtPtr
351xmlNanoHTTPNewCtxt(const char *URL) {
352 xmlNanoHTTPCtxtPtr ret;
353
354 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
355 if (ret == NULL) return(NULL);
356
357 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
358 ret->port = 80;
359 ret->returnValue = 0;
360 ret->fd = -1;
361
362 xmlNanoHTTPScanURL(ret, URL);
363
364 return(ret);
365}
366
367/**
368 * xmlNanoHTTPFreeCtxt:
369 * @ctxt: an HTTP context
370 *
371 * Frees the context after closing the connection.
372 */
373
374static void
375xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
376 if (ctxt == NULL) return;
377 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
378 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
379 if (ctxt->path != NULL) xmlFree(ctxt->path);
380 if (ctxt->out != NULL) xmlFree(ctxt->out);
381 if (ctxt->in != NULL) xmlFree(ctxt->in);
382 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
383 if (ctxt->location != NULL) xmlFree(ctxt->location);
384 if (ctxt->authHeader != NULL) xmlFree(ctxt->authHeader);
385 ctxt->state = XML_NANO_HTTP_NONE;
386 if (ctxt->fd >= 0) closesocket(ctxt->fd);
387 ctxt->fd = -1;
388 xmlFree(ctxt);
389}
390
391/**
392 * xmlNanoHTTPSend:
393 * @ctxt: an HTTP context
394 *
395 * Send the input needed to initiate the processing on the server side
396 */
397
398static void
399xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
400 if (ctxt->state & XML_NANO_HTTP_WRITE) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000401 unsigned int total_sent = 0;
Owen Taylor3473f882001-02-23 17:55:21 +0000402 while (total_sent <strlen(ctxt->outptr)) {
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000403 unsigned int nsent = send(ctxt->fd, ctxt->outptr+total_sent,
404 strlen(ctxt->outptr)-total_sent, 0);
Owen Taylor3473f882001-02-23 17:55:21 +0000405 if (nsent>0)
406 total_sent += nsent;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000407 }
Owen Taylor3473f882001-02-23 17:55:21 +0000408 ctxt->last = total_sent;
409 }
410}
411
412/**
413 * xmlNanoHTTPRecv:
414 * @ctxt: an HTTP context
415 *
416 * Read information coming from the HTTP connection.
417 * This is a blocking call (but it blocks in select(), not read()).
418 *
419 * Returns the number of byte read or -1 in case of error.
420 */
421
422static int
423xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
424 fd_set rfd;
425 struct timeval tv;
426
427
428 while (ctxt->state & XML_NANO_HTTP_READ) {
429 if (ctxt->in == NULL) {
430 ctxt->in = (char *) xmlMalloc(65000 * sizeof(char));
431 if (ctxt->in == NULL) {
432 ctxt->last = -1;
433 return(-1);
434 }
435 ctxt->inlen = 65000;
436 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
437 }
438 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
439 int delta = ctxt->inrptr - ctxt->in;
440 int len = ctxt->inptr - ctxt->inrptr;
441
442 memmove(ctxt->in, ctxt->inrptr, len);
443 ctxt->inrptr -= delta;
444 ctxt->content -= delta;
445 ctxt->inptr -= delta;
446 }
447 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
448 int d_inptr = ctxt->inptr - ctxt->in;
449 int d_content = ctxt->content - ctxt->in;
450 int d_inrptr = ctxt->inrptr - ctxt->in;
451
452 ctxt->inlen *= 2;
453 ctxt->in = (char *) xmlRealloc(ctxt->in, ctxt->inlen);
454 if (ctxt->in == NULL) {
455 ctxt->last = -1;
456 return(-1);
457 }
458 ctxt->inptr = ctxt->in + d_inptr;
459 ctxt->content = ctxt->in + d_content;
460 ctxt->inrptr = ctxt->in + d_inrptr;
461 }
462 ctxt->last = recv(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK, 0);
463 if (ctxt->last > 0) {
464 ctxt->inptr += ctxt->last;
465 return(ctxt->last);
466 }
467 if (ctxt->last == 0) {
468 return(0);
469 }
470 if (ctxt->last == -1) {
471 switch (socket_errno()) {
472 case EINPROGRESS:
473 case EWOULDBLOCK:
474#if defined(EAGAIN) && EAGAIN != EWOULDBLOCK
475 case EAGAIN:
476#endif
477 break;
478 default:
479 return(0);
480 }
481 }
482
483 tv.tv_sec = timeout;
484 tv.tv_usec = 0;
485 FD_ZERO(&rfd);
486 FD_SET(ctxt->fd, &rfd);
487
488 if (select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
489 return(0);
490 }
491 return(0);
492}
493
494/**
495 * xmlNanoHTTPReadLine:
496 * @ctxt: an HTTP context
497 *
498 * Read one line in the HTTP server output, usually for extracting
499 * the HTTP protocol informations from the answer header.
500 *
501 * Returns a newly allocated string with a copy of the line, or NULL
502 * which indicate the end of the input.
503 */
504
505static char *
506xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
507 char buf[4096];
508 char *bp = buf;
509
510 while (bp - buf < 4095) {
511 if (ctxt->inrptr == ctxt->inptr) {
512 if (xmlNanoHTTPRecv(ctxt) == 0) {
513 if (bp == buf)
514 return(NULL);
515 else
516 *bp = 0;
517 return(xmlMemStrdup(buf));
518 }
519 }
520 *bp = *ctxt->inrptr++;
521 if (*bp == '\n') {
522 *bp = 0;
523 return(xmlMemStrdup(buf));
524 }
525 if (*bp != '\r')
526 bp++;
527 }
528 buf[4095] = 0;
529 return(xmlMemStrdup(buf));
530}
531
532
533/**
534 * xmlNanoHTTPScanAnswer:
535 * @ctxt: an HTTP context
536 * @line: an HTTP header line
537 *
538 * Try to extract useful informations from the server answer.
539 * We currently parse and process:
540 * - The HTTP revision/ return code
541 * - The Content-Type
542 * - The Location for redirrect processing.
543 *
544 * Returns -1 in case of failure, the file descriptor number otherwise
545 */
546
547static void
548xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
549 const char *cur = line;
550
551 if (line == NULL) return;
552
553 if (!strncmp(line, "HTTP/", 5)) {
554 int version = 0;
555 int ret = 0;
556
557 cur += 5;
558 while ((*cur >= '0') && (*cur <= '9')) {
559 version *= 10;
560 version += *cur - '0';
561 cur++;
562 }
563 if (*cur == '.') {
564 cur++;
565 if ((*cur >= '0') && (*cur <= '9')) {
566 version *= 10;
567 version += *cur - '0';
568 cur++;
569 }
570 while ((*cur >= '0') && (*cur <= '9'))
571 cur++;
572 } else
573 version *= 10;
574 if ((*cur != ' ') && (*cur != '\t')) return;
575 while ((*cur == ' ') || (*cur == '\t')) cur++;
576 if ((*cur < '0') || (*cur > '9')) return;
577 while ((*cur >= '0') && (*cur <= '9')) {
578 ret *= 10;
579 ret += *cur - '0';
580 cur++;
581 }
582 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
583 ctxt->returnValue = ret;
584 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Content-Type:", 13)) {
585 cur += 13;
586 while ((*cur == ' ') || (*cur == '\t')) cur++;
587 if (ctxt->contentType != NULL)
588 xmlFree(ctxt->contentType);
589 ctxt->contentType = xmlMemStrdup(cur);
590 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"ContentType:", 12)) {
591 cur += 12;
592 if (ctxt->contentType != NULL) return;
593 while ((*cur == ' ') || (*cur == '\t')) cur++;
594 ctxt->contentType = xmlMemStrdup(cur);
595 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Location:", 9)) {
596 cur += 9;
597 while ((*cur == ' ') || (*cur == '\t')) cur++;
598 if (ctxt->location != NULL)
599 xmlFree(ctxt->location);
600 ctxt->location = xmlMemStrdup(cur);
601 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"WWW-Authenticate:", 17)) {
602 cur += 17;
603 while ((*cur == ' ') || (*cur == '\t')) cur++;
604 if (ctxt->authHeader != NULL)
605 xmlFree(ctxt->authHeader);
606 ctxt->authHeader = xmlMemStrdup(cur);
607 } else if (!xmlStrncasecmp(BAD_CAST line, BAD_CAST"Proxy-Authenticate:", 19)) {
608 cur += 19;
609 while ((*cur == ' ') || (*cur == '\t')) cur++;
610 if (ctxt->authHeader != NULL)
611 xmlFree(ctxt->authHeader);
612 ctxt->authHeader = xmlMemStrdup(cur);
613 }
614}
615
616/**
617 * xmlNanoHTTPConnectAttempt:
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000618 * @addr: a socket adress structure
Owen Taylor3473f882001-02-23 17:55:21 +0000619 *
620 * Attempt a connection to the given IP:port endpoint. It forces
621 * non-blocking semantic on the socket, and allow 60 seconds for
622 * the host to answer.
623 *
624 * Returns -1 in case of failure, the file descriptor number otherwise
625 */
626
627static int
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000628xmlNanoHTTPConnectAttempt(struct sockaddr *addr)
Owen Taylor3473f882001-02-23 17:55:21 +0000629{
630 SOCKET s = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
631 fd_set wfd;
632 struct timeval tv;
633 int status;
634
635 if (s==-1) {
636#ifdef DEBUG_HTTP
637 perror("socket");
638#endif
639 return(-1);
640 }
641
642#ifdef _WINSOCKAPI_
643 {
644 u_long one = 1;
645
646 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
647 }
648#else /* _WINSOCKAPI_ */
649#if defined(VMS)
650 {
651 int enable = 1;
652 status = ioctl(s, FIONBIO, &enable);
653 }
654#else /* VMS */
655 if ((status = fcntl(s, F_GETFL, 0)) != -1) {
656#ifdef O_NONBLOCK
657 status |= O_NONBLOCK;
658#else /* O_NONBLOCK */
659#ifdef F_NDELAY
660 status |= F_NDELAY;
661#endif /* F_NDELAY */
662#endif /* !O_NONBLOCK */
663 status = fcntl(s, F_SETFL, status);
664 }
665 if (status < 0) {
666#ifdef DEBUG_HTTP
667 perror("nonblocking");
668#endif
669 closesocket(s);
670 return(-1);
671 }
672#endif /* !VMS */
673#endif /* !_WINSOCKAPI_ */
674
675
676 if ((connect(s, addr, sizeof(*addr))==-1)) {
677 switch (socket_errno()) {
678 case EINPROGRESS:
679 case EWOULDBLOCK:
680 break;
681 default:
682 perror("connect");
683 closesocket(s);
684 return(-1);
685 }
686 }
687
688 tv.tv_sec = timeout;
689 tv.tv_usec = 0;
690
691 FD_ZERO(&wfd);
692 FD_SET(s, &wfd);
693
694 switch(select(s+1, NULL, &wfd, NULL, &tv))
695 {
696 case 0:
697 /* Time out */
698 closesocket(s);
699 return(-1);
700 case -1:
701 /* Ermm.. ?? */
702#ifdef DEBUG_HTTP
703 perror("select");
704#endif
705 closesocket(s);
706 return(-1);
707 }
708
709 if ( FD_ISSET(s, &wfd) ) {
710 SOCKLEN_T len;
711 len = sizeof(status);
712 if (getsockopt(s, SOL_SOCKET, SO_ERROR, (char*)&status, &len) < 0 ) {
713 /* Solaris error code */
714 return (-1);
715 }
716 if ( status ) {
717 closesocket(s);
718 errno = status;
719 return (-1);
720 }
721 } else {
722 /* pbm */
723 return (-1);
724 }
725
726 return(s);
727}
728
729/**
730 * xmlNanoHTTPConnectHost:
731 * @host: the host name
732 * @port: the port number
733 *
734 * Attempt a connection to the given host:port endpoint. It tries
735 * the multiple IP provided by the DNS if available.
736 *
737 * Returns -1 in case of failure, the file descriptor number otherwise
738 */
739
740static int
741xmlNanoHTTPConnectHost(const char *host, int port)
742{
743 struct hostent *h;
744 struct sockaddr *addr;
745 struct in_addr ia;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000746 struct sockaddr_in sockin;
Owen Taylor3473f882001-02-23 17:55:21 +0000747#ifdef SUPPORT_IP6
748 struct in6_addr ia6;
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000749 struct sockaddr_in6 sockin6;
Owen Taylor3473f882001-02-23 17:55:21 +0000750#endif
751 int i;
752 int s;
753
754#if defined(SUPPORT_IP6) && defined(RES_USE_INET6)
755 if (!(_res.options & RES_INIT))
756 res_init();
757 _res.options |= RES_USE_INET6;
758#endif
759 h=gethostbyname(host);
760 if (h==NULL)
761 {
762#ifdef DEBUG_HTTP
763 xmlGenericError(xmlGenericErrorContext,"unable to resolve '%s'.\n", host);
764#endif
765 return(-1);
766 }
767
768 for(i=0; h->h_addr_list[i]; i++)
769 {
770 if (h->h_addrtype == AF_INET) {
771 /* A records (IPv4) */
772 memcpy(&ia, h->h_addr_list[i], h->h_length);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000773 sockin.sin_family = h->h_addrtype;
774 sockin.sin_addr = ia;
775 sockin.sin_port = htons(port);
776 addr = (struct sockaddr *)&sockin;
Owen Taylor3473f882001-02-23 17:55:21 +0000777#ifdef SUPPORT_IP6
778 } else if (h->h_addrtype == AF_INET6) {
779 /* AAAA records (IPv6) */
780 memcpy(&ia6, h->h_addr_list[i], h->h_length);
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000781 sockin6.sin_family = h->h_addrtype;
782 sockin6.sin_addr = ia6;
783 sockin6.sin_port = htons(port);
784 addr = (struct sockaddr *)&sockin6;
Owen Taylor3473f882001-02-23 17:55:21 +0000785#endif
786 } else
787 break; /* for */
788
Daniel Veillard56a4cb82001-03-24 17:00:36 +0000789 s = xmlNanoHTTPConnectAttempt(addr);
Owen Taylor3473f882001-02-23 17:55:21 +0000790 if (s != -1)
791 return(s);
792 }
793
794#ifdef DEBUG_HTTP
795 xmlGenericError(xmlGenericErrorContext,
796 "unable to connect to '%s'.\n", host);
797#endif
798 return(-1);
799}
800
801
802/**
803 * xmlNanoHTTPOpen:
804 * @URL: The URL to load
805 * @contentType: if available the Content-Type information will be
806 * returned at that location
807 *
808 * This function try to open a connection to the indicated resource
809 * via HTTP GET.
810 *
811 * Returns NULL in case of failure, otherwise a request handler.
812 * The contentType, if provided must be freed by the caller
813 */
814
815void*
816xmlNanoHTTPOpen(const char *URL, char **contentType) {
817 if (contentType != NULL) *contentType = NULL;
Daniel Veillard9403a042001-05-28 11:00:53 +0000818 return(xmlNanoHTTPMethod(URL, NULL, NULL, contentType, NULL));
819}
820
821/**
822 * xmlNanoHTTPOpenRedir:
823 * @URL: The URL to load
824 * @contentType: if available the Content-Type information will be
825 * returned at that location
826 * @redir: if availble the redirected URL will be returned
827 *
828 * This function try to open a connection to the indicated resource
829 * via HTTP GET.
830 *
831 * Returns NULL in case of failure, otherwise a request handler.
832 * The contentType, if provided must be freed by the caller
833 */
834
835void*
836xmlNanoHTTPOpenRedir(const char *URL, char **contentType, char **redir) {
837 if (contentType != NULL) *contentType = NULL;
838 if (redir != NULL) *redir = NULL;
839 return(xmlNanoHTTPMethodRedir(URL, NULL, NULL, contentType, redir, NULL));
Owen Taylor3473f882001-02-23 17:55:21 +0000840}
841
842/**
843 * xmlNanoHTTPRead:
844 * @ctx: the HTTP context
845 * @dest: a buffer
846 * @len: the buffer length
847 *
848 * This function tries to read @len bytes from the existing HTTP connection
849 * and saves them in @dest. This is a blocking call.
850 *
851 * Returns the number of byte read. 0 is an indication of an end of connection.
852 * -1 indicates a parameter error.
853 */
854int
855xmlNanoHTTPRead(void *ctx, void *dest, int len) {
856 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
857
858 if (ctx == NULL) return(-1);
859 if (dest == NULL) return(-1);
860 if (len <= 0) return(0);
861
862 while (ctxt->inptr - ctxt->inrptr < len) {
863 if (xmlNanoHTTPRecv(ctxt) == 0) break;
864 }
865 if (ctxt->inptr - ctxt->inrptr < len)
866 len = ctxt->inptr - ctxt->inrptr;
867 memcpy(dest, ctxt->inrptr, len);
868 ctxt->inrptr += len;
869 return(len);
870}
871
872/**
873 * xmlNanoHTTPClose:
874 * @ctx: the HTTP context
875 *
876 * This function closes an HTTP context, it ends up the connection and
877 * free all data related to it.
878 */
879void
880xmlNanoHTTPClose(void *ctx) {
881 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
882
883 if (ctx == NULL) return;
884
885 xmlNanoHTTPFreeCtxt(ctxt);
886}
887
888/**
Daniel Veillard9403a042001-05-28 11:00:53 +0000889 * xmlNanoHTTPMethodRedir:
Owen Taylor3473f882001-02-23 17:55:21 +0000890 * @URL: The URL to load
891 * @method: the HTTP method to use
892 * @input: the input string if any
893 * @contentType: the Content-Type information IN and OUT
Daniel Veillard9403a042001-05-28 11:00:53 +0000894 * @redir: the redirected URL OUT
Owen Taylor3473f882001-02-23 17:55:21 +0000895 * @headers: the extra headers
896 *
897 * This function try to open a connection to the indicated resource
898 * via HTTP using the given @method, adding the given extra headers
899 * and the input buffer for the request content.
900 *
901 * Returns NULL in case of failure, otherwise a request handler.
Daniel Veillard9403a042001-05-28 11:00:53 +0000902 * The contentType, or redir, if provided must be freed by the caller
Owen Taylor3473f882001-02-23 17:55:21 +0000903 */
904
905void*
Daniel Veillard9403a042001-05-28 11:00:53 +0000906xmlNanoHTTPMethodRedir(const char *URL, const char *method, const char *input,
907 char **contentType, char **redir, const char *headers) {
Owen Taylor3473f882001-02-23 17:55:21 +0000908 xmlNanoHTTPCtxtPtr ctxt;
909 char *bp, *p;
910 int blen, ilen, ret;
911 int head;
912 int nbRedirects = 0;
913 char *redirURL = NULL;
914
915 if (URL == NULL) return(NULL);
916 if (method == NULL) method = "GET";
917 xmlNanoHTTPInit();
918
919retry:
920 if (redirURL == NULL)
921 ctxt = xmlNanoHTTPNewCtxt(URL);
922 else {
923 ctxt = xmlNanoHTTPNewCtxt(redirURL);
Owen Taylor3473f882001-02-23 17:55:21 +0000924 }
925
926 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
927 xmlNanoHTTPFreeCtxt(ctxt);
928 if (redirURL != NULL) xmlFree(redirURL);
929 return(NULL);
930 }
931 if (ctxt->hostname == NULL) {
932 xmlNanoHTTPFreeCtxt(ctxt);
Daniel Veillard9403a042001-05-28 11:00:53 +0000933 if (redirURL != NULL) xmlFree(redirURL);
Owen Taylor3473f882001-02-23 17:55:21 +0000934 return(NULL);
935 }
936 if (proxy) {
937 blen = strlen(ctxt->hostname) * 2 + 16;
938 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
939 }
940 else {
941 blen = strlen(ctxt->hostname);
942 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
943 }
944 if (ret < 0) {
945 xmlNanoHTTPFreeCtxt(ctxt);
Daniel Veillard9403a042001-05-28 11:00:53 +0000946 if (redirURL != NULL) xmlFree(redirURL);
Owen Taylor3473f882001-02-23 17:55:21 +0000947 return(NULL);
948 }
949 ctxt->fd = ret;
950
951 if (input != NULL) {
952 ilen = strlen(input);
953 blen += ilen + 32;
954 }
955 else
956 ilen = 0;
957 if (headers != NULL)
958 blen += strlen(headers);
959 if (contentType && *contentType)
960 blen += strlen(*contentType) + 16;
961 blen += strlen(method) + strlen(ctxt->path) + 23;
962 bp = xmlMalloc(blen);
963 if (proxy) {
964 if (ctxt->port != 80) {
965 sprintf(bp, "%s http://%s:%d%s", method, ctxt->hostname,
966 ctxt->port, ctxt->path);
967 }
968 else
969 sprintf(bp, "%s http://%s%s", method, ctxt->hostname, ctxt->path);
970 }
971 else
972 sprintf(bp, "%s %s", method, ctxt->path);
973 p = bp + strlen(bp);
974 sprintf(p, " HTTP/1.0\r\nHost: %s\r\n", ctxt->hostname);
975 p += strlen(p);
976 if (contentType != NULL && *contentType) {
977 sprintf(p, "Content-Type: %s\r\n", *contentType);
978 p += strlen(p);
979 }
980 if (headers != NULL) {
981 strcpy(p, headers);
982 p += strlen(p);
983 }
984 if (input != NULL)
985 sprintf(p, "Content-Length: %d\r\n\r\n%s", ilen, input);
986 else
987 strcpy(p, "\r\n");
988#ifdef DEBUG_HTTP
989 xmlGenericError(xmlGenericErrorContext,
990 "-> %s%s", proxy? "(Proxy) " : "", bp);
991 if ((blen -= strlen(bp)+1) < 0)
992 xmlGenericError(xmlGenericErrorContext,
993 "ERROR: overflowed buffer by %d bytes\n", -blen);
994#endif
995 ctxt->outptr = ctxt->out = bp;
996 ctxt->state = XML_NANO_HTTP_WRITE;
997 xmlNanoHTTPSend(ctxt);
998 ctxt->state = XML_NANO_HTTP_READ;
999 head = 1;
1000
1001 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1002 if (head && (*p == 0)) {
1003 head = 0;
1004 ctxt->content = ctxt->inrptr;
1005 xmlFree(p);
1006 break;
1007 }
1008 xmlNanoHTTPScanAnswer(ctxt, p);
1009
1010#ifdef DEBUG_HTTP
1011 xmlGenericError(xmlGenericErrorContext, "<- %s\n", p);
1012#endif
1013 xmlFree(p);
1014 }
1015
1016 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1017 (ctxt->returnValue < 400)) {
1018#ifdef DEBUG_HTTP
1019 xmlGenericError(xmlGenericErrorContext,
1020 "\nRedirect to: %s\n", ctxt->location);
1021#endif
1022 while (xmlNanoHTTPRecv(ctxt)) ;
1023 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1024 nbRedirects++;
Daniel Veillard9403a042001-05-28 11:00:53 +00001025 if (redirURL != NULL)
1026 xmlFree(redirURL);
Owen Taylor3473f882001-02-23 17:55:21 +00001027 redirURL = xmlMemStrdup(ctxt->location);
1028 xmlNanoHTTPFreeCtxt(ctxt);
1029 goto retry;
1030 }
1031 xmlNanoHTTPFreeCtxt(ctxt);
Daniel Veillard9403a042001-05-28 11:00:53 +00001032 if (redirURL != NULL) xmlFree(redirURL);
Owen Taylor3473f882001-02-23 17:55:21 +00001033#ifdef DEBUG_HTTP
1034 xmlGenericError(xmlGenericErrorContext,
1035 "Too many redirects, aborting ...\n");
1036#endif
1037 return(NULL);
1038
1039 }
1040
1041 if (contentType != NULL) {
1042 if (ctxt->contentType != NULL)
1043 *contentType = xmlMemStrdup(ctxt->contentType);
1044 else
1045 *contentType = NULL;
1046 }
1047
Daniel Veillard9403a042001-05-28 11:00:53 +00001048 if ((redir != NULL) && (redirURL != NULL)) {
1049 *redir = redirURL;
1050 } else {
1051 if (redirURL != NULL)
1052 xmlFree(redirURL);
1053 if (redir != NULL)
1054 *redir = NULL;
1055 }
1056
Owen Taylor3473f882001-02-23 17:55:21 +00001057#ifdef DEBUG_HTTP
1058 if (ctxt->contentType != NULL)
1059 xmlGenericError(xmlGenericErrorContext,
1060 "\nCode %d, content-type '%s'\n\n",
1061 ctxt->returnValue, ctxt->contentType);
1062 else
1063 xmlGenericError(xmlGenericErrorContext,
1064 "\nCode %d, no content-type\n\n",
1065 ctxt->returnValue);
1066#endif
1067
1068 return((void *) ctxt);
1069}
1070
1071/**
Daniel Veillard9403a042001-05-28 11:00:53 +00001072 * xmlNanoHTTPMethod:
1073 * @URL: The URL to load
1074 * @method: the HTTP method to use
1075 * @input: the input string if any
1076 * @contentType: the Content-Type information IN and OUT
1077 * @headers: the extra headers
1078 *
1079 * This function try to open a connection to the indicated resource
1080 * via HTTP using the given @method, adding the given extra headers
1081 * and the input buffer for the request content.
1082 *
1083 * Returns NULL in case of failure, otherwise a request handler.
1084 * The contentType, if provided must be freed by the caller
1085 */
1086
1087void*
1088xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
1089 char **contentType, const char *headers) {
1090 return(xmlNanoHTTPMethodRedir(URL, method, input, contentType,
1091 NULL, headers));
1092}
1093
1094/**
Owen Taylor3473f882001-02-23 17:55:21 +00001095 * xmlNanoHTTPFetch:
1096 * @URL: The URL to load
1097 * @filename: the filename where the content should be saved
1098 * @contentType: if available the Content-Type information will be
1099 * returned at that location
1100 *
1101 * This function try to fetch the indicated resource via HTTP GET
1102 * and save it's content in the file.
1103 *
1104 * Returns -1 in case of failure, 0 incase of success. The contentType,
1105 * if provided must be freed by the caller
1106 */
1107int
1108xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1109 void *ctxt;
1110 char buf[4096];
1111 int fd;
1112 int len;
1113
1114 ctxt = xmlNanoHTTPOpen(URL, contentType);
1115 if (ctxt == NULL) return(-1);
1116
1117 if (!strcmp(filename, "-"))
1118 fd = 0;
1119 else {
1120 fd = open(filename, O_CREAT | O_WRONLY, 00644);
1121 if (fd < 0) {
1122 xmlNanoHTTPClose(ctxt);
1123 if ((contentType != NULL) && (*contentType != NULL)) {
1124 xmlFree(*contentType);
1125 *contentType = NULL;
1126 }
1127 return(-1);
1128 }
1129 }
1130
1131 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1132 write(fd, buf, len);
1133 }
1134
1135 xmlNanoHTTPClose(ctxt);
1136 close(fd);
1137 return(0);
1138}
1139
1140/**
1141 * xmlNanoHTTPSave:
1142 * @ctxt: the HTTP context
1143 * @filename: the filename where the content should be saved
1144 *
1145 * This function saves the output of the HTTP transaction to a file
1146 * It closes and free the context at the end
1147 *
1148 * Returns -1 in case of failure, 0 incase of success.
1149 */
1150int
1151xmlNanoHTTPSave(void *ctxt, const char *filename) {
1152 char buf[4096];
1153 int fd;
1154 int len;
1155
1156 if (ctxt == NULL) return(-1);
1157
1158 if (!strcmp(filename, "-"))
1159 fd = 0;
1160 else {
1161 fd = open(filename, O_CREAT | O_WRONLY);
1162 if (fd < 0) {
1163 xmlNanoHTTPClose(ctxt);
1164 return(-1);
1165 }
1166 }
1167
1168 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1169 write(fd, buf, len);
1170 }
1171
1172 xmlNanoHTTPClose(ctxt);
1173 return(0);
1174}
1175
1176/**
1177 * xmlNanoHTTPReturnCode:
1178 * @ctx: the HTTP context
1179 *
1180 * Returns the HTTP return code for the request.
1181 */
1182int
1183xmlNanoHTTPReturnCode(void *ctx) {
1184 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1185
1186 if (ctxt == NULL) return(-1);
1187
1188 return(ctxt->returnValue);
1189}
1190
1191/**
1192 * xmlNanoHTTPAuthHeader:
1193 * @ctx: the HTTP context
1194 *
1195 * Returns the stashed value of the WWW-Authenticate or Proxy-Authenticate
1196 * header.
1197 */
1198const char *
1199xmlNanoHTTPAuthHeader(void *ctx) {
1200 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1201
1202 if (ctxt == NULL) return(NULL);
1203
1204 return(ctxt->authHeader);
1205}
1206
1207#ifdef STANDALONE
1208int main(int argc, char **argv) {
1209 char *contentType = NULL;
1210
1211 if (argv[1] != NULL) {
1212 if (argv[2] != NULL)
1213 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1214 else
1215 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1216 if (contentType != NULL) xmlFree(contentType);
1217 } else {
1218 xmlGenericError(xmlGenericErrorContext,
1219 "%s: minimal HTTP GET implementation\n", argv[0]);
1220 xmlGenericError(xmlGenericErrorContext,
1221 "\tusage %s [ URL [ filename ] ]\n", argv[0]);
1222 }
1223 xmlNanoHTTPCleanup();
1224 xmlMemoryDump();
1225 return(0);
1226}
1227#endif /* STANDALONE */
1228#else /* !LIBXML_HTTP_ENABLED */
1229#ifdef STANDALONE
1230#include <stdio.h>
1231int main(int argc, char **argv) {
1232 xmlGenericError(xmlGenericErrorContext,
1233 "%s : HTTP support not compiled in\n", argv[0]);
1234 return(0);
1235}
1236#endif /* STANDALONE */
1237#endif /* LIBXML_HTTP_ENABLED */