blob: f3e4827dae420218a07c8e62480c183564ebdd8a [file] [log] [blame]
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001/*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * Daniel.Veillard@w3.org
12 */
13
14/* TODO add compression support, Send the Accept- , and decompress on the
15 fly with ZLIB if found at compile-time */
16
Daniel Veillard3c558c31999-12-22 11:30:41 +000017#ifdef WIN32
Daniel Veillard0142b842000-01-14 14:45:24 +000018#define INCLUDE_WINSOCK
Daniel Veillard3c558c31999-12-22 11:30:41 +000019#include "win32config.h"
20#else
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000021#include "config.h"
22#endif
23
Daniel Veillard3c558c31999-12-22 11:30:41 +000024
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000025#include <stdio.h>
26#include <string.h>
27
28#ifdef HAVE_STDLIB_H
29#include <stdlib.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_SYS_SOCKET_H
35#include <sys/socket.h>
36#endif
37#ifdef HAVE_NETINET_IN_H
38#include <netinet/in.h>
39#endif
40#ifdef HAVE_ARPA_INET_H
41#include <arpa/inet.h>
42#endif
43#ifdef HAVE_NETDB_H
44#include <netdb.h>
45#endif
46#ifdef HAVE_FCNTL_H
47#include <fcntl.h>
48#endif
49#ifdef HAVE_ERRNO_H
50#include <errno.h>
51#endif
52#ifdef HAVE_SYS_TIME_H
53#include <sys/time.h>
54#endif
55#ifdef HAVE_SYS_SELECT_H
56#include <sys/select.h>
57#endif
58
59#include "xmlmemory.h"
Daniel Veillard00fdf371999-10-08 09:40:39 +000060#include "nanohttp.h"
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000061
62#ifdef STANDALONE
63#define DEBUG_HTTP
64#endif
65
66#define XML_NANO_HTTP_MAX_REDIR 10
67
68#define XML_NANO_HTTP_CHUNK 4096
69
70#define XML_NANO_HTTP_CLOSED 0
71#define XML_NANO_HTTP_WRITE 1
72#define XML_NANO_HTTP_READ 2
73#define XML_NANO_HTTP_NONE 4
74
75typedef struct xmlNanoHTTPCtxt {
76 char *protocol; /* the protocol name */
77 char *hostname; /* the host name */
78 int port; /* the port */
79 char *path; /* the path within the URL */
80 int fd; /* the file descriptor for the socket */
81 int state; /* WRITE / READ / CLOSED */
82 char *out; /* buffer sent (zero terminated) */
83 char *outptr; /* index within the buffer sent */
84 char *in; /* the receiving buffer */
85 char *content; /* the start of the content */
86 char *inptr; /* the next byte to read from network */
87 char *inrptr; /* the next byte to give back to the client */
88 int inlen; /* len of the input buffer */
89 int last; /* return code for last operation */
90 int returnValue; /* the protocol return value */
91 char *contentType; /* the MIME type for the input */
92 char *location; /* the new URL in case of redirect */
93} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
94
Daniel Veillarde41f2b72000-01-30 20:00:07 +000095static int initialized = 0;
96static char *proxy = NULL; /* the proxy name if any */
97static int proxyPort; /* the proxy port if any */
98
99/**
100 * xmlNanoHTTPInit:
101 *
102 * Initialize the HTTP protocol layer.
103 * Currently it just checks for proxy informations
104 */
105
106void
107xmlNanoHTTPInit(void) {
108 const char *env;
109
110 if (initialized)
111 return;
112
113 if (proxy == NULL) {
114 proxyPort = 80;
115 env = getenv("no_proxy");
116 if (env != NULL)
117 goto done;
118 env = getenv("http_proxy");
119 if (env != NULL) {
120 xmlNanoHTTPScanProxy(env);
121 goto done;
122 }
123 env = getenv("HTTP_PROXY");
124 if (env != NULL) {
125 xmlNanoHTTPScanProxy(env);
126 goto done;
127 }
128 }
129done:
130 initialized = 1;
131}
132
133/**
134 * xmlNanoHTTPClenup:
135 *
136 * Cleanup the HTTP protocol layer.
137 */
138
139void
140xmlNanoHTTPCleanup(void) {
141 if (proxy != NULL)
142 xmlFree(proxy);
143 initialized = 0;
144 return;
145}
146
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000147/**
148 * xmlNanoHTTPScanURL:
149 * @ctxt: an HTTP context
150 * @URL: The URL used to initialize the context
151 *
152 * (Re)Initialize an HTTP context by parsing the URL and finding
153 * the protocol host port and path it indicates.
154 */
155
156static void
157xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
158 const char *cur = URL;
159 char buf[4096];
160 int index = 0;
161 int port = 0;
162
163 if (ctxt->protocol != NULL) {
164 xmlFree(ctxt->protocol);
165 ctxt->protocol = NULL;
166 }
167 if (ctxt->hostname != NULL) {
168 xmlFree(ctxt->hostname);
169 ctxt->hostname = NULL;
170 }
171 if (ctxt->path != NULL) {
172 xmlFree(ctxt->path);
173 ctxt->path = NULL;
174 }
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000175 if (URL == NULL) return;
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000176 buf[index] = 0;
177 while (*cur != 0) {
178 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
179 buf[index] = 0;
180 ctxt->protocol = xmlMemStrdup(buf);
181 index = 0;
182 cur += 3;
183 break;
184 }
185 buf[index++] = *cur++;
186 }
187 if (*cur == 0) return;
188
189 buf[index] = 0;
190 while (1) {
191 if (cur[0] == ':') {
192 buf[index] = 0;
193 ctxt->hostname = xmlMemStrdup(buf);
194 index = 0;
195 cur += 1;
196 while ((*cur >= '0') && (*cur <= '9')) {
197 port *= 10;
198 port += *cur - '0';
199 cur++;
200 }
201 if (port != 0) ctxt->port = port;
202 while ((cur[0] != '/') && (*cur != 0))
203 cur++;
204 break;
205 }
206 if ((*cur == '/') || (*cur == 0)) {
207 buf[index] = 0;
208 ctxt->hostname = xmlMemStrdup(buf);
209 index = 0;
210 break;
211 }
212 buf[index++] = *cur++;
213 }
214 if (*cur == 0)
215 ctxt->path = xmlMemStrdup("/");
216 else {
Daniel Veillard726e8792000-01-30 20:04:29 +0000217 index = 0;
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000218 buf[index] = 0;
Daniel Veillard726e8792000-01-30 20:04:29 +0000219 while (*cur != 0)
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000220 buf[index++] = *cur++;
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000221 buf[index] = 0;
222 ctxt->path = xmlMemStrdup(buf);
223 }
224}
225
226/**
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000227 * xmlNanoHTTPScanProxy:
228 * @URL: The proxy URL used to initialize the proxy context
229 *
230 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
231 * the protocol host port it indicates.
232 * Should be like http://myproxy/ or http://myproxy:3128/
233 * A NULL URL cleans up proxy informations.
234 */
235
236void
237xmlNanoHTTPScanProxy(const char *URL) {
238 const char *cur = URL;
239 char buf[4096];
240 int index = 0;
241 int port = 0;
242
243 if (proxy != NULL) {
244 xmlFree(proxy);
245 proxy = NULL;
246 }
247 if (proxyPort != 0) {
248 proxyPort = 0;
249 }
250#ifdef DEBUG_HTTP
251 if (URL == NULL)
252 printf("Removing HTTP proxy info\n");
253 else
254 printf("Using HTTP proxy %s\n", URL);
255#endif
256 if (URL == NULL) return;
257 buf[index] = 0;
258 while (*cur != 0) {
259 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
260 buf[index] = 0;
261 index = 0;
262 cur += 3;
263 break;
264 }
265 buf[index++] = *cur++;
266 }
267 if (*cur == 0) return;
268
269 buf[index] = 0;
270 while (1) {
271 if (cur[0] == ':') {
272 buf[index] = 0;
273 proxy = xmlMemStrdup(buf);
274 index = 0;
275 cur += 1;
276 while ((*cur >= '0') && (*cur <= '9')) {
277 port *= 10;
278 port += *cur - '0';
279 cur++;
280 }
281 if (port != 0) proxyPort = port;
282 while ((cur[0] != '/') && (*cur != 0))
283 cur++;
284 break;
285 }
286 if ((*cur == '/') || (*cur == 0)) {
287 buf[index] = 0;
288 proxy = xmlMemStrdup(buf);
289 index = 0;
290 break;
291 }
292 buf[index++] = *cur++;
293 }
294}
295
296/**
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000297 * xmlNanoHTTPNewCtxt:
298 * @URL: The URL used to initialize the context
299 *
300 * Allocate and initialize a new HTTP context.
301 *
302 * Returns an HTTP context or NULL in case of error.
303 */
304
305static xmlNanoHTTPCtxtPtr
306xmlNanoHTTPNewCtxt(const char *URL) {
307 xmlNanoHTTPCtxtPtr ret;
308
309 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
310 if (ret == NULL) return(NULL);
311
312 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
313 ret->port = 80;
314 ret->returnValue = 0;
315
316 xmlNanoHTTPScanURL(ret, URL);
317
318 return(ret);
319}
320
321/**
322 * xmlNanoHTTPFreeCtxt:
323 * @ctxt: an HTTP context
324 *
325 * Frees the context after closing the connection.
326 */
327
328static void
329xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
330 if (ctxt == NULL) return;
331 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
332 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
333 if (ctxt->path != NULL) xmlFree(ctxt->path);
334 if (ctxt->out != NULL) xmlFree(ctxt->out);
335 if (ctxt->in != NULL) xmlFree(ctxt->in);
336 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
337 if (ctxt->location != NULL) xmlFree(ctxt->location);
338 ctxt->state = XML_NANO_HTTP_NONE;
339 if (ctxt->fd >= 0) close(ctxt->fd);
340 ctxt->fd = -1;
341 xmlFree(ctxt);
342}
343
344/**
345 * xmlNanoHTTPSend:
346 * @ctxt: an HTTP context
347 *
348 * Send the input needed to initiate the processing on the server side
349 */
350
351static void
352xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
353 if (ctxt->state & XML_NANO_HTTP_WRITE)
354 ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr));
355}
356
357/**
358 * xmlNanoHTTPRecv:
359 * @ctxt: an HTTP context
360 *
361 * Read information coming from the HTTP connection.
362 * This is a blocking call (but it blocks in select(), not read()).
363 *
364 * Returns the number of byte read or -1 in case of error.
365 */
366
367static int
368xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
369 fd_set rfd;
370 struct timeval tv;
371
372
373 while (ctxt->state & XML_NANO_HTTP_READ) {
374 if (ctxt->in == NULL) {
375 ctxt->in = (char *) xmlMalloc(65000 * sizeof(char));
376 if (ctxt->in == NULL) {
377 ctxt->last = -1;
378 return(-1);
379 }
380 ctxt->inlen = 65000;
381 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
382 }
383 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
384 int delta = ctxt->inrptr - ctxt->in;
385 int len = ctxt->inptr - ctxt->inrptr;
386
387 memmove(ctxt->in, ctxt->inrptr, len);
388 ctxt->inrptr -= delta;
389 ctxt->content -= delta;
390 ctxt->inptr -= delta;
391 }
392 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
393 int d_inptr = ctxt->inptr - ctxt->in;
394 int d_content = ctxt->content - ctxt->in;
395 int d_inrptr = ctxt->inrptr - ctxt->in;
396
397 ctxt->inlen *= 2;
398 ctxt->in = (char *) xmlRealloc(ctxt->in, ctxt->inlen);
399 if (ctxt->in == NULL) {
400 ctxt->last = -1;
401 return(-1);
402 }
403 ctxt->inptr = ctxt->in + d_inptr;
404 ctxt->content = ctxt->in + d_content;
405 ctxt->inrptr = ctxt->in + d_inrptr;
406 }
407 ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK);
408 if (ctxt->last > 0) {
409 ctxt->inptr += ctxt->last;
410 return(ctxt->last);
411 }
412 if (ctxt->last == 0) {
413 return(0);
414 }
415#ifdef EWOULDBLOCK
416 if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) {
417 return(0);
418 }
419#endif
420 tv.tv_sec=10;
421 tv.tv_usec=0;
422 FD_ZERO(&rfd);
423 FD_SET(ctxt->fd, &rfd);
424
425 if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
426 return(0);
427 }
428 return(0);
429}
430
431/**
432 * xmlNanoHTTPReadLine:
433 * @ctxt: an HTTP context
434 *
435 * Read one line in the HTTP server output, usually for extracting
436 * the HTTP protocol informations from the answer header.
437 *
438 * Returns a newly allocated string with a copy of the line, or NULL
439 * which indicate the end of the input.
440 */
441
442static char *
443xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
444 char buf[4096];
445 char *bp=buf;
446
447 while(bp - buf < 4095) {
448 if(ctxt->inrptr == ctxt->inptr) {
449 if (xmlNanoHTTPRecv(ctxt) == 0) {
450 if (bp == buf)
451 return(NULL);
452 else
453 *bp = 0;
454 return(xmlMemStrdup(buf));
455 }
456 }
457 *bp = *ctxt->inrptr++;
458 if(*bp == '\n') {
459 *bp = 0;
460 return(xmlMemStrdup(buf));
461 }
462 if(*bp != '\r')
463 bp++;
464 }
465 buf[4095] = 0;
466 return(xmlMemStrdup(buf));
467}
468
469
470/**
471 * xmlNanoHTTPScanAnswer:
472 * @ctxt: an HTTP context
473 * @line: an HTTP header line
474 *
475 * Try to extract useful informations from the server answer.
476 * We currently parse and process:
477 * - The HTTP revision/ return code
478 * - The Content-Type
479 * - The Location for redirrect processing.
480 *
481 * Returns -1 in case of failure, the file descriptor number otherwise
482 */
483
484static void
485xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
486 const char *cur = line;
487
488 if (line == NULL) return;
489
490 if (!strncmp(line, "HTTP/", 5)) {
491 int version = 0;
492 int ret = 0;
493
494 cur += 5;
495 while ((*cur >= '0') && (*cur <= '9')) {
496 version *= 10;
497 version += *cur - '0';
498 cur++;
499 }
500 if (*cur == '.') {
501 cur++;
502 if ((*cur >= '0') && (*cur <= '9')) {
503 version *= 10;
504 version += *cur - '0';
505 cur++;
506 }
507 while ((*cur >= '0') && (*cur <= '9'))
508 cur++;
509 } else
510 version *= 10;
511 if ((*cur != ' ') && (*cur != '\t')) return;
512 while ((*cur == ' ') || (*cur == '\t')) cur++;
513 if ((*cur < '0') || (*cur > '9')) return;
514 while ((*cur >= '0') && (*cur <= '9')) {
515 ret *= 10;
516 ret += *cur - '0';
517 cur++;
518 }
519 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
520 ctxt->returnValue = ret;
521 } else if (!strncmp(line, "Content-Type:", 13)) {
522 cur += 13;
523 while ((*cur == ' ') || (*cur == '\t')) cur++;
524 if (ctxt->contentType != NULL)
525 xmlFree(ctxt->contentType);
526 ctxt->contentType = xmlMemStrdup(cur);
527 } else if (!strncmp(line, "ContentType:", 12)) {
528 cur += 12;
529 if (ctxt->contentType != NULL) return;
530 while ((*cur == ' ') || (*cur == '\t')) cur++;
531 ctxt->contentType = xmlMemStrdup(cur);
532 } else if (!strncmp(line, "content-type:", 13)) {
533 cur += 13;
534 if (ctxt->contentType != NULL) return;
535 while ((*cur == ' ') || (*cur == '\t')) cur++;
536 ctxt->contentType = xmlMemStrdup(cur);
537 } else if (!strncmp(line, "contenttype:", 12)) {
538 cur += 12;
539 if (ctxt->contentType != NULL) return;
540 while ((*cur == ' ') || (*cur == '\t')) cur++;
541 ctxt->contentType = xmlMemStrdup(cur);
542 } else if (!strncmp(line, "Location:", 9)) {
543 cur += 9;
544 while ((*cur == ' ') || (*cur == '\t')) cur++;
545 if (ctxt->location != NULL)
546 xmlFree(ctxt->location);
547 ctxt->location = xmlMemStrdup(cur);
548 } else if (!strncmp(line, "location:", 9)) {
549 cur += 9;
550 if (ctxt->location != NULL) return;
551 while ((*cur == ' ') || (*cur == '\t')) cur++;
552 ctxt->location = xmlMemStrdup(cur);
553 }
554}
555
556/**
557 * xmlNanoHTTPConnectAttempt:
558 * @ia: an internet adress structure
559 * @port: the port number
560 *
561 * Attempt a connection to the given IP:port endpoint. It forces
562 * non-blocking semantic on the socket, and allow 60 seconds for
563 * the host to answer.
564 *
565 * Returns -1 in case of failure, the file descriptor number otherwise
566 */
567
568static int
569xmlNanoHTTPConnectAttempt(struct in_addr ia, int port)
570{
571 int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
572 struct sockaddr_in sin;
573 fd_set wfd;
574 struct timeval tv;
575 int status;
576
577 if(s==-1) {
578#ifdef DEBUG_HTTP
579 perror("socket");
580#endif
581 return(-1);
582 }
583
584#ifdef _WINSOCKAPI_
585 {
586 long levents = FD_READ | FD_WRITE | FD_ACCEPT |
587 FD_CONNECT | FD_CLOSE ;
588 int rv = 0 ;
589 u_long one = 1;
590
591 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
592 }
593#else /* _WINSOCKAPI_ */
594#if defined(VMS)
595 {
596 int enable = 1;
597 status = IOCTL(s, FIONBIO, &enable);
598 }
599#else /* VMS */
600 if((status = fcntl(s, F_GETFL, 0)) != -1) {
601#ifdef O_NONBLOCK
602 status |= O_NONBLOCK;
603#else /* O_NONBLOCK */
604#ifdef F_NDELAY
605 status |= F_NDELAY;
606#endif /* F_NDELAY */
607#endif /* !O_NONBLOCK */
608 status = fcntl(s, F_SETFL, status);
609 }
610 if(status < 0) {
611#ifdef DEBUG_HTTP
612 perror("nonblocking");
613#endif
614 close(s);
615 return(-1);
616 }
617#endif /* !VMS */
618#endif /* !_WINSOCKAPI_ */
619
620
621 sin.sin_family = AF_INET;
622 sin.sin_addr = ia;
623 sin.sin_port = htons(port);
624
625 if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) &&
626 (errno != EINPROGRESS)) {
627 perror("connect");
628 close(s);
629 return(-1);
630 }
631
632 tv.tv_sec = 60; /* We use 60 second timeouts for now */
633 tv.tv_usec = 0;
634
635 FD_ZERO(&wfd);
636 FD_SET(s, &wfd);
637
638 switch(select(s+1, NULL, &wfd, NULL, &tv))
639 {
640 case 0:
641 /* Time out */
642 close(s);
643 return(-1);
644 case -1:
645 /* Ermm.. ?? */
646#ifdef DEBUG_HTTP
647 perror("select");
648#endif
649 close(s);
650 return(-1);
651 }
652
653 return(s);
654}
655
656/**
657 * xmlNanoHTTPConnectHost:
658 * @host: the host name
659 * @port: the port number
660 *
661 * Attempt a connection to the given host:port endpoint. It tries
662 * the multiple IP provided by the DNS if available.
663 *
664 * Returns -1 in case of failure, the file descriptor number otherwise
665 */
666
667static int
668xmlNanoHTTPConnectHost(const char *host, int port)
669{
670 struct hostent *h;
671 int i;
672 int s;
673
674 h=gethostbyname(host);
675 if(h==NULL)
676 {
677#ifdef DEBUG_HTTP
678 fprintf(stderr,"unable to resolve '%s'.\n", host);
679#endif
680 return(-1);
681 }
682
683 for(i=0; h->h_addr_list[i]; i++)
684 {
685 struct in_addr ia;
686 memcpy(&ia, h->h_addr_list[i],4);
687 s = xmlNanoHTTPConnectAttempt(ia, port);
688 if(s != -1)
689 return(s);
690 }
691
692#ifdef DEBUG_HTTP
693 fprintf(stderr, "unable to connect to '%s'.\n", host);
694#endif
695 return(-1);
696}
697
698
699/**
700 * xmlNanoHTTPOpen:
701 * @URL: The URL to load
702 * @contentType: if available the Content-Type information will be
703 * returned at that location
704 *
705 * This function try to open a connection to the indicated resource
706 * via HTTP GET.
707 *
708 * Returns NULL in case of failure, otherwise a request handler.
709 * The contentType, if provided must be freed by the caller
710 */
711
712void *
713xmlNanoHTTPOpen(const char *URL, char **contentType) {
714 xmlNanoHTTPCtxtPtr ctxt;
715 char buf[4096];
716 int ret;
717 char *p;
718 int head;
719 int nbRedirects = 0;
720 char *redirURL = NULL;
721
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000722 xmlNanoHTTPInit();
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000723 if (contentType != NULL) *contentType = NULL;
724
725retry:
726 if (redirURL == NULL)
727 ctxt = xmlNanoHTTPNewCtxt(URL);
728 else {
729 ctxt = xmlNanoHTTPNewCtxt(redirURL);
730 xmlFree(redirURL);
731 redirURL = NULL;
732 }
733
734 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
735 xmlNanoHTTPFreeCtxt(ctxt);
736 if (redirURL != NULL) xmlFree(redirURL);
737 return(NULL);
738 }
739 if (ctxt->hostname == NULL) {
740 xmlNanoHTTPFreeCtxt(ctxt);
741 return(NULL);
742 }
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000743 if (proxy)
744 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
745 else
746 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000747 if (ret < 0) {
748 xmlNanoHTTPFreeCtxt(ctxt);
749 return(NULL);
750 }
751 ctxt->fd = ret;
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000752 if (proxy) {
753#ifdef have_snprintf
754 if (ctxt->port != 80)
755 snprintf(buf, sizeof(buf),
756 "GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",
757 ctxt->hostname, ctxt->port, ctxt->path, ctxt->hostname);
758 else
759 snprintf(buf, sizeof(buf),"GET http://%s%s HTTP/1.0\r\nHost: %s\r\n\r\n",
760 ctxt->hostname, ctxt->path, ctxt->hostname);
Daniel Veillard335849b1999-09-23 23:08:42 +0000761#else
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000762 if (ctxt->port != 80)
763 sprintf(buf,
764 "GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",
765 ctxt->hostname, ctxt->port, ctxt->path, ctxt->hostname);
766 else
767 sprintf(buf, "GET http://%s%s HTTP/1.0\r\nHost: %s\r\n\r\n",
768 ctxt->hostname, ctxt->path, ctxt->hostname);
Daniel Veillard335849b1999-09-23 23:08:42 +0000769#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000770#ifdef DEBUG_HTTP
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000771 if (ctxt->port != 80)
772 printf("-> Proxy GET http://%s:%d%s HTTP/1.0\n-> Host: %s\n\n",
773 ctxt->hostname, ctxt->port, ctxt->path, ctxt->hostname);
774 else
775 printf("-> Proxy GET http://%s%s HTTP/1.0\n-> Host: %s\n\n",
776 ctxt->hostname, ctxt->path, ctxt->hostname);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000777#endif
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000778 } else {
779#ifdef HAVE_SNPRINTF
780 snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n",
781 ctxt->path, ctxt->hostname);
782#else
783 sprintf(buf, "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n",
784 ctxt->path, ctxt->hostname);
785#endif
786#ifdef DEBUG_HTTP
787 printf("-> GET %s HTTP/1.0\n-> Host: %s\n\n",
788 ctxt->path, ctxt->hostname);
789#endif
790 }
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000791 ctxt->outptr = ctxt->out = xmlMemStrdup(buf);
792 ctxt->state = XML_NANO_HTTP_WRITE;
793 xmlNanoHTTPSend(ctxt);
794 ctxt->state = XML_NANO_HTTP_READ;
795 head = 1;
796
797 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
798 if (head && (*p == 0)) {
799 head = 0;
800 ctxt->content = ctxt->inrptr;
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000801 xmlFree(p);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000802 break;
803 }
804 xmlNanoHTTPScanAnswer(ctxt, p);
805
806#ifdef DEBUG_HTTP
807 if (p != NULL) printf("<- %s\n", p);
808#endif
809 if (p != NULL) xmlFree(p);
810 }
811
812 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
813 (ctxt->returnValue < 400)) {
814#ifdef DEBUG_HTTP
815 printf("\nRedirect to: %s\n", ctxt->location);
816#endif
817 while (xmlNanoHTTPRecv(ctxt)) ;
818 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
819 nbRedirects++;
820 redirURL = xmlMemStrdup(ctxt->location);
821 xmlNanoHTTPFreeCtxt(ctxt);
822 goto retry;
823 }
824 xmlNanoHTTPFreeCtxt(ctxt);
825#ifdef DEBUG_HTTP
826 printf("Too many redirrects, aborting ...\n");
827#endif
828 return(NULL);
829
830 }
831
832 if ((contentType != NULL) && (ctxt->contentType != NULL))
833 *contentType = xmlMemStrdup(ctxt->contentType);
834
835#ifdef DEBUG_HTTP
836 if (ctxt->contentType != NULL)
837 printf("\nCode %d, content-type '%s'\n\n",
838 ctxt->returnValue, ctxt->contentType);
839 else
840 printf("\nCode %d, no content-type\n\n",
841 ctxt->returnValue);
842#endif
843
844 return((void *) ctxt);
845}
846
847/**
848 * xmlNanoHTTPRead:
849 * @ctx: the HTTP context
850 * @dest: a buffer
851 * @len: the buffer length
852 *
853 * This function tries to read @len bytes from the existing HTTP connection
854 * and saves them in @dest. This is a blocking call.
855 *
856 * Returns the number of byte read. 0 is an indication of an end of connection.
857 * -1 indicates a parameter error.
858 */
859int
860xmlNanoHTTPRead(void *ctx, void *dest, int len) {
861 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
862
863 if (ctx == NULL) return(-1);
864 if (dest == NULL) return(-1);
865 if (len <= 0) return(0);
866
867 while (ctxt->inptr - ctxt->inrptr < len) {
868 if (xmlNanoHTTPRecv(ctxt) == 0) break;
869 }
870 if (ctxt->inptr - ctxt->inrptr < len)
871 len = ctxt->inptr - ctxt->inrptr;
872 memcpy(dest, ctxt->inrptr, len);
873 ctxt->inrptr += len;
874 return(len);
875}
876
877/**
878 * xmlNanoHTTPClose:
879 * @ctx: the HTTP context
880 *
881 * This function closes an HTTP context, it ends up the connection and
882 * free all data related to it.
883 */
884void
885xmlNanoHTTPClose(void *ctx) {
886 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
887
888 if (ctx == NULL) return;
889
890 xmlNanoHTTPFreeCtxt(ctxt);
891}
892
Daniel Veillard00fdf371999-10-08 09:40:39 +0000893#ifndef DEBUG_HTTP
894#define DEBUG_HTTP
895#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000896/**
897 * xmlNanoHTTPMethod:
898 * @URL: The URL to load
899 * @method: the HTTP method to use
900 * @input: the input string if any
901 * @contentType: the Content-Type information IN and OUT
902 * @headers: the extra headers
903 *
904 * This function try to open a connection to the indicated resource
905 * via HTTP using the given @method, adding the given extra headers
906 * and the input buffer for the request content.
907 *
908 * Returns NULL in case of failure, otherwise a request handler.
909 * The contentType, if provided must be freed by the caller
910 */
911
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000912void *
913xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
914 char **contentType, const char *headers) {
915 xmlNanoHTTPCtxtPtr ctxt;
916 char buf[20000];
917 int ret;
918 char *p;
919 int head;
920 int nbRedirects = 0;
921 char *redirURL = NULL;
922
923 if (URL == NULL) return(NULL);
924 if (method == NULL) method = "GET";
925 if (contentType != NULL) *contentType = NULL;
926
927retry:
928 if (redirURL == NULL)
929 ctxt = xmlNanoHTTPNewCtxt(URL);
930 else {
931 ctxt = xmlNanoHTTPNewCtxt(redirURL);
932 xmlFree(redirURL);
933 redirURL = NULL;
934 }
935
936 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
937 xmlNanoHTTPFreeCtxt(ctxt);
938 if (redirURL != NULL) xmlFree(redirURL);
939 return(NULL);
940 }
941 if (ctxt->hostname == NULL) {
942 xmlNanoHTTPFreeCtxt(ctxt);
943 return(NULL);
944 }
945 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
946 if (ret < 0) {
947 xmlNanoHTTPFreeCtxt(ctxt);
948 return(NULL);
949 }
950 ctxt->fd = ret;
951
952 if (input == NULL) {
953 if (headers == NULL) {
954 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +0000955#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000956 snprintf(buf, sizeof(buf),
957 "%s %s HTTP/1.0\r\nHost: %s\r\n\r\n",
958 method, ctxt->path, ctxt->hostname);
Daniel Veillard335849b1999-09-23 23:08:42 +0000959#else
960 sprintf(buf,
961 "%s %s HTTP/1.0\r\nHost: %s\r\n\r\n",
962 method, ctxt->path, ctxt->hostname);
963#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000964 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +0000965#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000966 snprintf(buf, sizeof(buf),
967 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n\r\n",
968 method, ctxt->path, ctxt->hostname, *contentType);
Daniel Veillard335849b1999-09-23 23:08:42 +0000969#else
970 sprintf(buf,
971 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n\r\n",
972 method, ctxt->path, ctxt->hostname, *contentType);
973#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000974 }
975 } else {
976 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +0000977#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000978 snprintf(buf, sizeof(buf),
979 "%s %s HTTP/1.0\r\nHost: %s\r\n%s\r\n",
980 method, ctxt->path, ctxt->hostname, headers);
Daniel Veillard335849b1999-09-23 23:08:42 +0000981#else
982 sprintf(buf,
983 "%s %s HTTP/1.0\r\nHost: %s\r\n%s\r\n",
984 method, ctxt->path, ctxt->hostname, headers);
985#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000986 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +0000987#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000988 snprintf(buf, sizeof(buf),
989 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n%s\r\n",
990 method, ctxt->path, ctxt->hostname, *contentType,
991 headers);
Daniel Veillard335849b1999-09-23 23:08:42 +0000992#else
993 sprintf(buf,
994 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n%s\r\n",
995 method, ctxt->path, ctxt->hostname, *contentType,
996 headers);
997#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000998 }
999 }
1000 } else {
1001 int len = strlen(input);
1002 if (headers == NULL) {
1003 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +00001004#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001005 snprintf(buf, sizeof(buf),
1006 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n\r\n%s",
1007 method, ctxt->path, ctxt->hostname, len, input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001008#else
1009 sprintf(buf,
1010 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n\r\n%s",
1011 method, ctxt->path, ctxt->hostname, len, input);
1012#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001013 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +00001014#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001015 snprintf(buf, sizeof(buf),
1016"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n\r\n%s",
1017 method, ctxt->path, ctxt->hostname, *contentType, len,
1018 input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001019#else
1020 sprintf(buf,
1021"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n\r\n%s",
1022 method, ctxt->path, ctxt->hostname, *contentType, len,
1023 input);
1024#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001025 }
1026 } else {
1027 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +00001028#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001029 snprintf(buf, sizeof(buf),
1030 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1031 method, ctxt->path, ctxt->hostname, len,
1032 headers, input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001033#else
1034 sprintf(buf,
1035 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1036 method, ctxt->path, ctxt->hostname, len,
1037 headers, input);
1038#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001039 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +00001040#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001041 snprintf(buf, sizeof(buf),
1042"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1043 method, ctxt->path, ctxt->hostname, *contentType,
1044 len, headers, input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001045#else
1046 sprintf(buf,
1047"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1048 method, ctxt->path, ctxt->hostname, *contentType,
1049 len, headers, input);
1050#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001051 }
1052 }
1053 }
1054#ifdef DEBUG_HTTP
1055 printf("-> %s", buf);
1056#endif
1057 ctxt->outptr = ctxt->out = xmlMemStrdup(buf);
1058 ctxt->state = XML_NANO_HTTP_WRITE;
1059 xmlNanoHTTPSend(ctxt);
1060 ctxt->state = XML_NANO_HTTP_READ;
1061 head = 1;
1062
1063 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1064 if (head && (*p == 0)) {
1065 head = 0;
1066 ctxt->content = ctxt->inrptr;
1067 if (p != NULL) xmlFree(p);
1068 break;
1069 }
1070 xmlNanoHTTPScanAnswer(ctxt, p);
1071
1072#ifdef DEBUG_HTTP
1073 if (p != NULL) printf("<- %s\n", p);
1074#endif
1075 if (p != NULL) xmlFree(p);
1076 }
1077
1078 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1079 (ctxt->returnValue < 400)) {
1080#ifdef DEBUG_HTTP
1081 printf("\nRedirect to: %s\n", ctxt->location);
1082#endif
1083 while (xmlNanoHTTPRecv(ctxt)) ;
1084 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1085 nbRedirects++;
1086 redirURL = xmlMemStrdup(ctxt->location);
1087 xmlNanoHTTPFreeCtxt(ctxt);
1088 goto retry;
1089 }
1090 xmlNanoHTTPFreeCtxt(ctxt);
1091#ifdef DEBUG_HTTP
1092 printf("Too many redirrects, aborting ...\n");
1093#endif
1094 return(NULL);
1095
1096 }
1097
1098 if ((contentType != NULL) && (ctxt->contentType != NULL))
1099 *contentType = xmlMemStrdup(ctxt->contentType);
1100 else if (contentType != NULL)
1101 *contentType = NULL;
1102
1103#ifdef DEBUG_HTTP
1104 if (ctxt->contentType != NULL)
1105 printf("\nCode %d, content-type '%s'\n\n",
1106 ctxt->returnValue, ctxt->contentType);
1107 else
1108 printf("\nCode %d, no content-type\n\n",
1109 ctxt->returnValue);
1110#endif
1111
1112 return((void *) ctxt);
1113}
1114
1115/**
1116 * xmlNanoHTTPFetch:
1117 * @URL: The URL to load
1118 * @filename: the filename where the content should be saved
1119 * @contentType: if available the Content-Type information will be
1120 * returned at that location
1121 *
1122 * This function try to fetch the indicated resource via HTTP GET
1123 * and save it's content in the file.
1124 *
1125 * Returns -1 in case of failure, 0 incase of success. The contentType,
1126 * if provided must be freed by the caller
1127 */
1128int
1129xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1130 void *ctxt;
1131 char buf[4096];
1132 int fd;
1133 int len;
1134
1135 ctxt = xmlNanoHTTPOpen(URL, contentType);
1136 if (ctxt == NULL) return(-1);
1137
1138 if (!strcmp(filename, "-"))
1139 fd = 0;
1140 else {
Daniel Veillarde41f2b72000-01-30 20:00:07 +00001141 fd = open(filename, O_CREAT | O_WRONLY, 00644);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001142 if (fd < 0) {
1143 xmlNanoHTTPClose(ctxt);
1144 if ((contentType != NULL) && (*contentType != NULL)) {
1145 xmlFree(*contentType);
1146 *contentType = NULL;
1147 }
1148 return(-1);
1149 }
1150 }
1151
1152 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1153 write(fd, buf, len);
1154 }
1155
1156 xmlNanoHTTPClose(ctxt);
Daniel Veillarde41f2b72000-01-30 20:00:07 +00001157 close(fd);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001158 return(0);
1159}
1160
1161/**
1162 * xmlNanoHTTPSave:
Daniel Veillard00fdf371999-10-08 09:40:39 +00001163 * @ctxt: the HTTP context
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001164 * @filename: the filename where the content should be saved
1165 *
1166 * This function saves the output of the HTTP transaction to a file
1167 * It closes and free the context at the end
1168 *
1169 * Returns -1 in case of failure, 0 incase of success.
1170 */
1171int
1172xmlNanoHTTPSave(void *ctxt, const char *filename) {
1173 char buf[4096];
1174 int fd;
1175 int len;
1176
1177 if (ctxt == NULL) return(-1);
1178
1179 if (!strcmp(filename, "-"))
1180 fd = 0;
1181 else {
1182 fd = open(filename, O_CREAT | O_WRONLY);
1183 if (fd < 0) {
1184 xmlNanoHTTPClose(ctxt);
1185 return(-1);
1186 }
1187 }
1188
1189 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1190 write(fd, buf, len);
1191 }
1192
1193 xmlNanoHTTPClose(ctxt);
1194 return(0);
1195}
1196
1197/**
1198 * xmlNanoHTTPReturnCode:
1199 * @ctx: the HTTP context
1200 *
1201 * Returns the HTTP return code for the request.
1202 */
1203int
1204xmlNanoHTTPReturnCode(void *ctx) {
1205 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1206
1207 if (ctxt == NULL) return(-1);
1208
1209 return(ctxt->returnValue);
1210}
1211
1212#ifdef STANDALONE
1213int main(int argc, char **argv) {
1214 char *contentType = NULL;
1215
1216 if (argv[1] != NULL) {
1217 if (argv[2] != NULL)
1218 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1219 else
1220 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1221 if (contentType != NULL) xmlFree(contentType);
1222 } else {
1223 printf("%s: minimal HTTP GET implementation\n", argv[0]);
1224 printf("\tusage %s [ URL [ filename ] ]\n", argv[0]);
1225 }
Daniel Veillarde41f2b72000-01-30 20:00:07 +00001226 xmlNanoHTTPCleanup();
1227 xmlMemoryDump();
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001228 return(0);
1229}
1230#endif /* STANDALONE */