blob: b8cb24ed67914c8ad9aa3e8c14fd80923de9cfa3 [file] [log] [blame]
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001/*
2 * nanohttp.c: minimalist HTTP GET implementation to fetch external subsets.
3 * focuses on size, streamability, reentrancy and portability
4 *
5 * This is clearly not a general purpose HTTP implementation
6 * If you look for one, check:
7 * http://www.w3.org/Library/
8 *
9 * See Copyright for the status of this software.
10 *
11 * Daniel.Veillard@w3.org
12 */
13
14/* TODO add compression support, Send the Accept- , and decompress on the
15 fly with ZLIB if found at compile-time */
16
Daniel Veillard3c558c31999-12-22 11:30:41 +000017#ifdef WIN32
Daniel Veillard0142b842000-01-14 14:45:24 +000018#define INCLUDE_WINSOCK
Daniel Veillard3c558c31999-12-22 11:30:41 +000019#include "win32config.h"
20#else
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000021#include "config.h"
22#endif
23
Daniel Veillard3c558c31999-12-22 11:30:41 +000024
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000025#include <stdio.h>
26#include <string.h>
27
28#ifdef HAVE_STDLIB_H
29#include <stdlib.h>
30#endif
31#ifdef HAVE_UNISTD_H
32#include <unistd.h>
33#endif
34#ifdef HAVE_SYS_SOCKET_H
35#include <sys/socket.h>
36#endif
37#ifdef HAVE_NETINET_IN_H
38#include <netinet/in.h>
39#endif
40#ifdef HAVE_ARPA_INET_H
41#include <arpa/inet.h>
42#endif
43#ifdef HAVE_NETDB_H
44#include <netdb.h>
45#endif
46#ifdef HAVE_FCNTL_H
47#include <fcntl.h>
48#endif
49#ifdef HAVE_ERRNO_H
50#include <errno.h>
51#endif
52#ifdef HAVE_SYS_TIME_H
53#include <sys/time.h>
54#endif
55#ifdef HAVE_SYS_SELECT_H
56#include <sys/select.h>
57#endif
Daniel Veillard5feb8492000-02-02 17:15:36 +000058#ifdef HAVE_STRINGS_H
59#include <strings.h>
60#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000061
62#include "xmlmemory.h"
Daniel Veillard00fdf371999-10-08 09:40:39 +000063#include "nanohttp.h"
Daniel Veillard4ecf39f1999-09-22 12:14:03 +000064
65#ifdef STANDALONE
66#define DEBUG_HTTP
67#endif
68
69#define XML_NANO_HTTP_MAX_REDIR 10
70
71#define XML_NANO_HTTP_CHUNK 4096
72
73#define XML_NANO_HTTP_CLOSED 0
74#define XML_NANO_HTTP_WRITE 1
75#define XML_NANO_HTTP_READ 2
76#define XML_NANO_HTTP_NONE 4
77
78typedef struct xmlNanoHTTPCtxt {
79 char *protocol; /* the protocol name */
80 char *hostname; /* the host name */
81 int port; /* the port */
82 char *path; /* the path within the URL */
83 int fd; /* the file descriptor for the socket */
84 int state; /* WRITE / READ / CLOSED */
85 char *out; /* buffer sent (zero terminated) */
86 char *outptr; /* index within the buffer sent */
87 char *in; /* the receiving buffer */
88 char *content; /* the start of the content */
89 char *inptr; /* the next byte to read from network */
90 char *inrptr; /* the next byte to give back to the client */
91 int inlen; /* len of the input buffer */
92 int last; /* return code for last operation */
93 int returnValue; /* the protocol return value */
94 char *contentType; /* the MIME type for the input */
95 char *location; /* the new URL in case of redirect */
96} xmlNanoHTTPCtxt, *xmlNanoHTTPCtxtPtr;
97
Daniel Veillarde41f2b72000-01-30 20:00:07 +000098static int initialized = 0;
99static char *proxy = NULL; /* the proxy name if any */
100static int proxyPort; /* the proxy port if any */
101
102/**
103 * xmlNanoHTTPInit:
104 *
105 * Initialize the HTTP protocol layer.
106 * Currently it just checks for proxy informations
107 */
108
109void
110xmlNanoHTTPInit(void) {
111 const char *env;
112
113 if (initialized)
114 return;
115
116 if (proxy == NULL) {
117 proxyPort = 80;
118 env = getenv("no_proxy");
119 if (env != NULL)
120 goto done;
121 env = getenv("http_proxy");
122 if (env != NULL) {
123 xmlNanoHTTPScanProxy(env);
124 goto done;
125 }
126 env = getenv("HTTP_PROXY");
127 if (env != NULL) {
128 xmlNanoHTTPScanProxy(env);
129 goto done;
130 }
131 }
132done:
133 initialized = 1;
134}
135
136/**
137 * xmlNanoHTTPClenup:
138 *
139 * Cleanup the HTTP protocol layer.
140 */
141
142void
143xmlNanoHTTPCleanup(void) {
144 if (proxy != NULL)
145 xmlFree(proxy);
146 initialized = 0;
147 return;
148}
149
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000150/**
151 * xmlNanoHTTPScanURL:
152 * @ctxt: an HTTP context
153 * @URL: The URL used to initialize the context
154 *
155 * (Re)Initialize an HTTP context by parsing the URL and finding
156 * the protocol host port and path it indicates.
157 */
158
159static void
160xmlNanoHTTPScanURL(xmlNanoHTTPCtxtPtr ctxt, const char *URL) {
161 const char *cur = URL;
162 char buf[4096];
163 int index = 0;
164 int port = 0;
165
166 if (ctxt->protocol != NULL) {
167 xmlFree(ctxt->protocol);
168 ctxt->protocol = NULL;
169 }
170 if (ctxt->hostname != NULL) {
171 xmlFree(ctxt->hostname);
172 ctxt->hostname = NULL;
173 }
174 if (ctxt->path != NULL) {
175 xmlFree(ctxt->path);
176 ctxt->path = NULL;
177 }
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000178 if (URL == NULL) return;
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000179 buf[index] = 0;
180 while (*cur != 0) {
181 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
182 buf[index] = 0;
183 ctxt->protocol = xmlMemStrdup(buf);
184 index = 0;
185 cur += 3;
186 break;
187 }
188 buf[index++] = *cur++;
189 }
190 if (*cur == 0) return;
191
192 buf[index] = 0;
193 while (1) {
194 if (cur[0] == ':') {
195 buf[index] = 0;
196 ctxt->hostname = xmlMemStrdup(buf);
197 index = 0;
198 cur += 1;
199 while ((*cur >= '0') && (*cur <= '9')) {
200 port *= 10;
201 port += *cur - '0';
202 cur++;
203 }
204 if (port != 0) ctxt->port = port;
205 while ((cur[0] != '/') && (*cur != 0))
206 cur++;
207 break;
208 }
209 if ((*cur == '/') || (*cur == 0)) {
210 buf[index] = 0;
211 ctxt->hostname = xmlMemStrdup(buf);
212 index = 0;
213 break;
214 }
215 buf[index++] = *cur++;
216 }
217 if (*cur == 0)
218 ctxt->path = xmlMemStrdup("/");
219 else {
Daniel Veillard726e8792000-01-30 20:04:29 +0000220 index = 0;
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000221 buf[index] = 0;
Daniel Veillard726e8792000-01-30 20:04:29 +0000222 while (*cur != 0)
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000223 buf[index++] = *cur++;
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000224 buf[index] = 0;
225 ctxt->path = xmlMemStrdup(buf);
226 }
227}
228
229/**
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000230 * xmlNanoHTTPScanProxy:
231 * @URL: The proxy URL used to initialize the proxy context
232 *
233 * (Re)Initialize the HTTP Proxy context by parsing the URL and finding
234 * the protocol host port it indicates.
235 * Should be like http://myproxy/ or http://myproxy:3128/
236 * A NULL URL cleans up proxy informations.
237 */
238
239void
240xmlNanoHTTPScanProxy(const char *URL) {
241 const char *cur = URL;
242 char buf[4096];
243 int index = 0;
244 int port = 0;
245
246 if (proxy != NULL) {
247 xmlFree(proxy);
248 proxy = NULL;
249 }
250 if (proxyPort != 0) {
251 proxyPort = 0;
252 }
253#ifdef DEBUG_HTTP
254 if (URL == NULL)
255 printf("Removing HTTP proxy info\n");
256 else
257 printf("Using HTTP proxy %s\n", URL);
258#endif
259 if (URL == NULL) return;
260 buf[index] = 0;
261 while (*cur != 0) {
262 if ((cur[0] == ':') && (cur[1] == '/') && (cur[2] == '/')) {
263 buf[index] = 0;
264 index = 0;
265 cur += 3;
266 break;
267 }
268 buf[index++] = *cur++;
269 }
270 if (*cur == 0) return;
271
272 buf[index] = 0;
273 while (1) {
274 if (cur[0] == ':') {
275 buf[index] = 0;
276 proxy = xmlMemStrdup(buf);
277 index = 0;
278 cur += 1;
279 while ((*cur >= '0') && (*cur <= '9')) {
280 port *= 10;
281 port += *cur - '0';
282 cur++;
283 }
284 if (port != 0) proxyPort = port;
285 while ((cur[0] != '/') && (*cur != 0))
286 cur++;
287 break;
288 }
289 if ((*cur == '/') || (*cur == 0)) {
290 buf[index] = 0;
291 proxy = xmlMemStrdup(buf);
292 index = 0;
293 break;
294 }
295 buf[index++] = *cur++;
296 }
297}
298
299/**
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000300 * xmlNanoHTTPNewCtxt:
301 * @URL: The URL used to initialize the context
302 *
303 * Allocate and initialize a new HTTP context.
304 *
305 * Returns an HTTP context or NULL in case of error.
306 */
307
308static xmlNanoHTTPCtxtPtr
309xmlNanoHTTPNewCtxt(const char *URL) {
310 xmlNanoHTTPCtxtPtr ret;
311
312 ret = (xmlNanoHTTPCtxtPtr) xmlMalloc(sizeof(xmlNanoHTTPCtxt));
313 if (ret == NULL) return(NULL);
314
315 memset(ret, 0, sizeof(xmlNanoHTTPCtxt));
316 ret->port = 80;
317 ret->returnValue = 0;
318
319 xmlNanoHTTPScanURL(ret, URL);
320
321 return(ret);
322}
323
324/**
325 * xmlNanoHTTPFreeCtxt:
326 * @ctxt: an HTTP context
327 *
328 * Frees the context after closing the connection.
329 */
330
331static void
332xmlNanoHTTPFreeCtxt(xmlNanoHTTPCtxtPtr ctxt) {
333 if (ctxt == NULL) return;
334 if (ctxt->hostname != NULL) xmlFree(ctxt->hostname);
335 if (ctxt->protocol != NULL) xmlFree(ctxt->protocol);
336 if (ctxt->path != NULL) xmlFree(ctxt->path);
337 if (ctxt->out != NULL) xmlFree(ctxt->out);
338 if (ctxt->in != NULL) xmlFree(ctxt->in);
339 if (ctxt->contentType != NULL) xmlFree(ctxt->contentType);
340 if (ctxt->location != NULL) xmlFree(ctxt->location);
341 ctxt->state = XML_NANO_HTTP_NONE;
342 if (ctxt->fd >= 0) close(ctxt->fd);
343 ctxt->fd = -1;
344 xmlFree(ctxt);
345}
346
347/**
348 * xmlNanoHTTPSend:
349 * @ctxt: an HTTP context
350 *
351 * Send the input needed to initiate the processing on the server side
352 */
353
354static void
355xmlNanoHTTPSend(xmlNanoHTTPCtxtPtr ctxt) {
356 if (ctxt->state & XML_NANO_HTTP_WRITE)
357 ctxt->last = write(ctxt->fd, ctxt->outptr, strlen(ctxt->outptr));
358}
359
360/**
361 * xmlNanoHTTPRecv:
362 * @ctxt: an HTTP context
363 *
364 * Read information coming from the HTTP connection.
365 * This is a blocking call (but it blocks in select(), not read()).
366 *
367 * Returns the number of byte read or -1 in case of error.
368 */
369
370static int
371xmlNanoHTTPRecv(xmlNanoHTTPCtxtPtr ctxt) {
372 fd_set rfd;
373 struct timeval tv;
374
375
376 while (ctxt->state & XML_NANO_HTTP_READ) {
377 if (ctxt->in == NULL) {
378 ctxt->in = (char *) xmlMalloc(65000 * sizeof(char));
379 if (ctxt->in == NULL) {
380 ctxt->last = -1;
381 return(-1);
382 }
383 ctxt->inlen = 65000;
384 ctxt->inptr = ctxt->content = ctxt->inrptr = ctxt->in;
385 }
386 if (ctxt->inrptr > ctxt->in + XML_NANO_HTTP_CHUNK) {
387 int delta = ctxt->inrptr - ctxt->in;
388 int len = ctxt->inptr - ctxt->inrptr;
389
390 memmove(ctxt->in, ctxt->inrptr, len);
391 ctxt->inrptr -= delta;
392 ctxt->content -= delta;
393 ctxt->inptr -= delta;
394 }
395 if ((ctxt->in + ctxt->inlen) < (ctxt->inptr + XML_NANO_HTTP_CHUNK)) {
396 int d_inptr = ctxt->inptr - ctxt->in;
397 int d_content = ctxt->content - ctxt->in;
398 int d_inrptr = ctxt->inrptr - ctxt->in;
399
400 ctxt->inlen *= 2;
401 ctxt->in = (char *) xmlRealloc(ctxt->in, ctxt->inlen);
402 if (ctxt->in == NULL) {
403 ctxt->last = -1;
404 return(-1);
405 }
406 ctxt->inptr = ctxt->in + d_inptr;
407 ctxt->content = ctxt->in + d_content;
408 ctxt->inrptr = ctxt->in + d_inrptr;
409 }
410 ctxt->last = read(ctxt->fd, ctxt->inptr, XML_NANO_HTTP_CHUNK);
411 if (ctxt->last > 0) {
412 ctxt->inptr += ctxt->last;
413 return(ctxt->last);
414 }
415 if (ctxt->last == 0) {
416 return(0);
417 }
418#ifdef EWOULDBLOCK
419 if ((ctxt->last == -1) && (errno != EWOULDBLOCK)) {
420 return(0);
421 }
422#endif
423 tv.tv_sec=10;
424 tv.tv_usec=0;
425 FD_ZERO(&rfd);
426 FD_SET(ctxt->fd, &rfd);
427
428 if(select(ctxt->fd+1, &rfd, NULL, NULL, &tv)<1)
429 return(0);
430 }
431 return(0);
432}
433
434/**
435 * xmlNanoHTTPReadLine:
436 * @ctxt: an HTTP context
437 *
438 * Read one line in the HTTP server output, usually for extracting
439 * the HTTP protocol informations from the answer header.
440 *
441 * Returns a newly allocated string with a copy of the line, or NULL
442 * which indicate the end of the input.
443 */
444
445static char *
446xmlNanoHTTPReadLine(xmlNanoHTTPCtxtPtr ctxt) {
447 char buf[4096];
448 char *bp=buf;
449
450 while(bp - buf < 4095) {
451 if(ctxt->inrptr == ctxt->inptr) {
452 if (xmlNanoHTTPRecv(ctxt) == 0) {
453 if (bp == buf)
454 return(NULL);
455 else
456 *bp = 0;
457 return(xmlMemStrdup(buf));
458 }
459 }
460 *bp = *ctxt->inrptr++;
461 if(*bp == '\n') {
462 *bp = 0;
463 return(xmlMemStrdup(buf));
464 }
465 if(*bp != '\r')
466 bp++;
467 }
468 buf[4095] = 0;
469 return(xmlMemStrdup(buf));
470}
471
472
473/**
474 * xmlNanoHTTPScanAnswer:
475 * @ctxt: an HTTP context
476 * @line: an HTTP header line
477 *
478 * Try to extract useful informations from the server answer.
479 * We currently parse and process:
480 * - The HTTP revision/ return code
481 * - The Content-Type
482 * - The Location for redirrect processing.
483 *
484 * Returns -1 in case of failure, the file descriptor number otherwise
485 */
486
487static void
488xmlNanoHTTPScanAnswer(xmlNanoHTTPCtxtPtr ctxt, const char *line) {
489 const char *cur = line;
490
491 if (line == NULL) return;
492
493 if (!strncmp(line, "HTTP/", 5)) {
494 int version = 0;
495 int ret = 0;
496
497 cur += 5;
498 while ((*cur >= '0') && (*cur <= '9')) {
499 version *= 10;
500 version += *cur - '0';
501 cur++;
502 }
503 if (*cur == '.') {
504 cur++;
505 if ((*cur >= '0') && (*cur <= '9')) {
506 version *= 10;
507 version += *cur - '0';
508 cur++;
509 }
510 while ((*cur >= '0') && (*cur <= '9'))
511 cur++;
512 } else
513 version *= 10;
514 if ((*cur != ' ') && (*cur != '\t')) return;
515 while ((*cur == ' ') || (*cur == '\t')) cur++;
516 if ((*cur < '0') || (*cur > '9')) return;
517 while ((*cur >= '0') && (*cur <= '9')) {
518 ret *= 10;
519 ret += *cur - '0';
520 cur++;
521 }
522 if ((*cur != 0) && (*cur != ' ') && (*cur != '\t')) return;
523 ctxt->returnValue = ret;
524 } else if (!strncmp(line, "Content-Type:", 13)) {
525 cur += 13;
526 while ((*cur == ' ') || (*cur == '\t')) cur++;
527 if (ctxt->contentType != NULL)
528 xmlFree(ctxt->contentType);
529 ctxt->contentType = xmlMemStrdup(cur);
530 } else if (!strncmp(line, "ContentType:", 12)) {
531 cur += 12;
532 if (ctxt->contentType != NULL) return;
533 while ((*cur == ' ') || (*cur == '\t')) cur++;
534 ctxt->contentType = xmlMemStrdup(cur);
535 } else if (!strncmp(line, "content-type:", 13)) {
536 cur += 13;
537 if (ctxt->contentType != NULL) return;
538 while ((*cur == ' ') || (*cur == '\t')) cur++;
539 ctxt->contentType = xmlMemStrdup(cur);
540 } else if (!strncmp(line, "contenttype:", 12)) {
541 cur += 12;
542 if (ctxt->contentType != NULL) return;
543 while ((*cur == ' ') || (*cur == '\t')) cur++;
544 ctxt->contentType = xmlMemStrdup(cur);
545 } else if (!strncmp(line, "Location:", 9)) {
546 cur += 9;
547 while ((*cur == ' ') || (*cur == '\t')) cur++;
548 if (ctxt->location != NULL)
549 xmlFree(ctxt->location);
550 ctxt->location = xmlMemStrdup(cur);
551 } else if (!strncmp(line, "location:", 9)) {
552 cur += 9;
553 if (ctxt->location != NULL) return;
554 while ((*cur == ' ') || (*cur == '\t')) cur++;
555 ctxt->location = xmlMemStrdup(cur);
556 }
557}
558
559/**
560 * xmlNanoHTTPConnectAttempt:
561 * @ia: an internet adress structure
562 * @port: the port number
563 *
564 * Attempt a connection to the given IP:port endpoint. It forces
565 * non-blocking semantic on the socket, and allow 60 seconds for
566 * the host to answer.
567 *
568 * Returns -1 in case of failure, the file descriptor number otherwise
569 */
570
571static int
572xmlNanoHTTPConnectAttempt(struct in_addr ia, int port)
573{
574 int s=socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
575 struct sockaddr_in sin;
576 fd_set wfd;
577 struct timeval tv;
578 int status;
579
580 if(s==-1) {
581#ifdef DEBUG_HTTP
582 perror("socket");
583#endif
584 return(-1);
585 }
586
587#ifdef _WINSOCKAPI_
588 {
589 long levents = FD_READ | FD_WRITE | FD_ACCEPT |
590 FD_CONNECT | FD_CLOSE ;
591 int rv = 0 ;
592 u_long one = 1;
593
594 status = ioctlsocket(s, FIONBIO, &one) == SOCKET_ERROR ? -1 : 0;
595 }
596#else /* _WINSOCKAPI_ */
597#if defined(VMS)
598 {
599 int enable = 1;
600 status = IOCTL(s, FIONBIO, &enable);
601 }
602#else /* VMS */
603 if((status = fcntl(s, F_GETFL, 0)) != -1) {
604#ifdef O_NONBLOCK
605 status |= O_NONBLOCK;
606#else /* O_NONBLOCK */
607#ifdef F_NDELAY
608 status |= F_NDELAY;
609#endif /* F_NDELAY */
610#endif /* !O_NONBLOCK */
611 status = fcntl(s, F_SETFL, status);
612 }
613 if(status < 0) {
614#ifdef DEBUG_HTTP
615 perror("nonblocking");
616#endif
617 close(s);
618 return(-1);
619 }
620#endif /* !VMS */
621#endif /* !_WINSOCKAPI_ */
622
623
624 sin.sin_family = AF_INET;
625 sin.sin_addr = ia;
626 sin.sin_port = htons(port);
627
628 if((connect(s, (struct sockaddr *)&sin, sizeof(sin))==-1) &&
629 (errno != EINPROGRESS)) {
630 perror("connect");
631 close(s);
632 return(-1);
633 }
634
635 tv.tv_sec = 60; /* We use 60 second timeouts for now */
636 tv.tv_usec = 0;
637
638 FD_ZERO(&wfd);
639 FD_SET(s, &wfd);
640
641 switch(select(s+1, NULL, &wfd, NULL, &tv))
642 {
643 case 0:
644 /* Time out */
645 close(s);
646 return(-1);
647 case -1:
648 /* Ermm.. ?? */
649#ifdef DEBUG_HTTP
650 perror("select");
651#endif
652 close(s);
653 return(-1);
654 }
655
656 return(s);
657}
658
659/**
660 * xmlNanoHTTPConnectHost:
661 * @host: the host name
662 * @port: the port number
663 *
664 * Attempt a connection to the given host:port endpoint. It tries
665 * the multiple IP provided by the DNS if available.
666 *
667 * Returns -1 in case of failure, the file descriptor number otherwise
668 */
669
670static int
671xmlNanoHTTPConnectHost(const char *host, int port)
672{
673 struct hostent *h;
674 int i;
675 int s;
676
677 h=gethostbyname(host);
678 if(h==NULL)
679 {
680#ifdef DEBUG_HTTP
681 fprintf(stderr,"unable to resolve '%s'.\n", host);
682#endif
683 return(-1);
684 }
685
686 for(i=0; h->h_addr_list[i]; i++)
687 {
688 struct in_addr ia;
689 memcpy(&ia, h->h_addr_list[i],4);
690 s = xmlNanoHTTPConnectAttempt(ia, port);
691 if(s != -1)
692 return(s);
693 }
694
695#ifdef DEBUG_HTTP
696 fprintf(stderr, "unable to connect to '%s'.\n", host);
697#endif
698 return(-1);
699}
700
701
702/**
703 * xmlNanoHTTPOpen:
704 * @URL: The URL to load
705 * @contentType: if available the Content-Type information will be
706 * returned at that location
707 *
708 * This function try to open a connection to the indicated resource
709 * via HTTP GET.
710 *
711 * Returns NULL in case of failure, otherwise a request handler.
712 * The contentType, if provided must be freed by the caller
713 */
714
715void *
716xmlNanoHTTPOpen(const char *URL, char **contentType) {
717 xmlNanoHTTPCtxtPtr ctxt;
718 char buf[4096];
719 int ret;
720 char *p;
721 int head;
722 int nbRedirects = 0;
723 char *redirURL = NULL;
724
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000725 xmlNanoHTTPInit();
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000726 if (contentType != NULL) *contentType = NULL;
727
728retry:
729 if (redirURL == NULL)
730 ctxt = xmlNanoHTTPNewCtxt(URL);
731 else {
732 ctxt = xmlNanoHTTPNewCtxt(redirURL);
733 xmlFree(redirURL);
734 redirURL = NULL;
735 }
736
737 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
738 xmlNanoHTTPFreeCtxt(ctxt);
739 if (redirURL != NULL) xmlFree(redirURL);
740 return(NULL);
741 }
742 if (ctxt->hostname == NULL) {
743 xmlNanoHTTPFreeCtxt(ctxt);
744 return(NULL);
745 }
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000746 if (proxy)
747 ret = xmlNanoHTTPConnectHost(proxy, proxyPort);
748 else
749 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000750 if (ret < 0) {
751 xmlNanoHTTPFreeCtxt(ctxt);
752 return(NULL);
753 }
754 ctxt->fd = ret;
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000755 if (proxy) {
756#ifdef have_snprintf
757 if (ctxt->port != 80)
758 snprintf(buf, sizeof(buf),
759 "GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",
760 ctxt->hostname, ctxt->port, ctxt->path, ctxt->hostname);
761 else
762 snprintf(buf, sizeof(buf),"GET http://%s%s HTTP/1.0\r\nHost: %s\r\n\r\n",
763 ctxt->hostname, ctxt->path, ctxt->hostname);
Daniel Veillard335849b1999-09-23 23:08:42 +0000764#else
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000765 if (ctxt->port != 80)
766 sprintf(buf,
767 "GET http://%s:%d%s HTTP/1.0\r\nHost: %s\r\n\r\n",
768 ctxt->hostname, ctxt->port, ctxt->path, ctxt->hostname);
769 else
770 sprintf(buf, "GET http://%s%s HTTP/1.0\r\nHost: %s\r\n\r\n",
771 ctxt->hostname, ctxt->path, ctxt->hostname);
Daniel Veillard335849b1999-09-23 23:08:42 +0000772#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000773#ifdef DEBUG_HTTP
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000774 if (ctxt->port != 80)
775 printf("-> Proxy GET http://%s:%d%s HTTP/1.0\n-> Host: %s\n\n",
776 ctxt->hostname, ctxt->port, ctxt->path, ctxt->hostname);
777 else
778 printf("-> Proxy GET http://%s%s HTTP/1.0\n-> Host: %s\n\n",
779 ctxt->hostname, ctxt->path, ctxt->hostname);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000780#endif
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000781 } else {
782#ifdef HAVE_SNPRINTF
783 snprintf(buf, sizeof(buf),"GET %s HTTP/1.0\r\nHost: %s\r\n\r\n",
784 ctxt->path, ctxt->hostname);
785#else
786 sprintf(buf, "GET %s HTTP/1.0\r\nHost: %s\r\n\r\n",
787 ctxt->path, ctxt->hostname);
788#endif
789#ifdef DEBUG_HTTP
790 printf("-> GET %s HTTP/1.0\n-> Host: %s\n\n",
791 ctxt->path, ctxt->hostname);
792#endif
793 }
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000794 ctxt->outptr = ctxt->out = xmlMemStrdup(buf);
795 ctxt->state = XML_NANO_HTTP_WRITE;
796 xmlNanoHTTPSend(ctxt);
797 ctxt->state = XML_NANO_HTTP_READ;
798 head = 1;
799
800 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
801 if (head && (*p == 0)) {
802 head = 0;
803 ctxt->content = ctxt->inrptr;
Daniel Veillarde41f2b72000-01-30 20:00:07 +0000804 xmlFree(p);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000805 break;
806 }
807 xmlNanoHTTPScanAnswer(ctxt, p);
808
809#ifdef DEBUG_HTTP
810 if (p != NULL) printf("<- %s\n", p);
811#endif
812 if (p != NULL) xmlFree(p);
813 }
814
815 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
816 (ctxt->returnValue < 400)) {
817#ifdef DEBUG_HTTP
818 printf("\nRedirect to: %s\n", ctxt->location);
819#endif
820 while (xmlNanoHTTPRecv(ctxt)) ;
821 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
822 nbRedirects++;
823 redirURL = xmlMemStrdup(ctxt->location);
824 xmlNanoHTTPFreeCtxt(ctxt);
825 goto retry;
826 }
827 xmlNanoHTTPFreeCtxt(ctxt);
828#ifdef DEBUG_HTTP
829 printf("Too many redirrects, aborting ...\n");
830#endif
831 return(NULL);
832
833 }
834
835 if ((contentType != NULL) && (ctxt->contentType != NULL))
836 *contentType = xmlMemStrdup(ctxt->contentType);
837
838#ifdef DEBUG_HTTP
839 if (ctxt->contentType != NULL)
840 printf("\nCode %d, content-type '%s'\n\n",
841 ctxt->returnValue, ctxt->contentType);
842 else
843 printf("\nCode %d, no content-type\n\n",
844 ctxt->returnValue);
845#endif
846
847 return((void *) ctxt);
848}
849
850/**
851 * xmlNanoHTTPRead:
852 * @ctx: the HTTP context
853 * @dest: a buffer
854 * @len: the buffer length
855 *
856 * This function tries to read @len bytes from the existing HTTP connection
857 * and saves them in @dest. This is a blocking call.
858 *
859 * Returns the number of byte read. 0 is an indication of an end of connection.
860 * -1 indicates a parameter error.
861 */
862int
863xmlNanoHTTPRead(void *ctx, void *dest, int len) {
864 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
865
866 if (ctx == NULL) return(-1);
867 if (dest == NULL) return(-1);
868 if (len <= 0) return(0);
869
870 while (ctxt->inptr - ctxt->inrptr < len) {
871 if (xmlNanoHTTPRecv(ctxt) == 0) break;
872 }
873 if (ctxt->inptr - ctxt->inrptr < len)
874 len = ctxt->inptr - ctxt->inrptr;
875 memcpy(dest, ctxt->inrptr, len);
876 ctxt->inrptr += len;
877 return(len);
878}
879
880/**
881 * xmlNanoHTTPClose:
882 * @ctx: the HTTP context
883 *
884 * This function closes an HTTP context, it ends up the connection and
885 * free all data related to it.
886 */
887void
888xmlNanoHTTPClose(void *ctx) {
889 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
890
891 if (ctx == NULL) return;
892
893 xmlNanoHTTPFreeCtxt(ctxt);
894}
895
Daniel Veillard00fdf371999-10-08 09:40:39 +0000896#ifndef DEBUG_HTTP
897#define DEBUG_HTTP
898#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000899/**
900 * xmlNanoHTTPMethod:
901 * @URL: The URL to load
902 * @method: the HTTP method to use
903 * @input: the input string if any
904 * @contentType: the Content-Type information IN and OUT
905 * @headers: the extra headers
906 *
907 * This function try to open a connection to the indicated resource
908 * via HTTP using the given @method, adding the given extra headers
909 * and the input buffer for the request content.
910 *
911 * Returns NULL in case of failure, otherwise a request handler.
912 * The contentType, if provided must be freed by the caller
913 */
914
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000915void *
916xmlNanoHTTPMethod(const char *URL, const char *method, const char *input,
917 char **contentType, const char *headers) {
918 xmlNanoHTTPCtxtPtr ctxt;
919 char buf[20000];
920 int ret;
921 char *p;
922 int head;
923 int nbRedirects = 0;
924 char *redirURL = NULL;
925
926 if (URL == NULL) return(NULL);
927 if (method == NULL) method = "GET";
928 if (contentType != NULL) *contentType = NULL;
929
930retry:
931 if (redirURL == NULL)
932 ctxt = xmlNanoHTTPNewCtxt(URL);
933 else {
934 ctxt = xmlNanoHTTPNewCtxt(redirURL);
935 xmlFree(redirURL);
936 redirURL = NULL;
937 }
938
939 if ((ctxt->protocol == NULL) || (strcmp(ctxt->protocol, "http"))) {
940 xmlNanoHTTPFreeCtxt(ctxt);
941 if (redirURL != NULL) xmlFree(redirURL);
942 return(NULL);
943 }
944 if (ctxt->hostname == NULL) {
945 xmlNanoHTTPFreeCtxt(ctxt);
946 return(NULL);
947 }
948 ret = xmlNanoHTTPConnectHost(ctxt->hostname, ctxt->port);
949 if (ret < 0) {
950 xmlNanoHTTPFreeCtxt(ctxt);
951 return(NULL);
952 }
953 ctxt->fd = ret;
954
955 if (input == NULL) {
956 if (headers == NULL) {
957 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +0000958#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000959 snprintf(buf, sizeof(buf),
960 "%s %s HTTP/1.0\r\nHost: %s\r\n\r\n",
961 method, ctxt->path, ctxt->hostname);
Daniel Veillard335849b1999-09-23 23:08:42 +0000962#else
963 sprintf(buf,
964 "%s %s HTTP/1.0\r\nHost: %s\r\n\r\n",
965 method, ctxt->path, ctxt->hostname);
966#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000967 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +0000968#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000969 snprintf(buf, sizeof(buf),
970 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n\r\n",
971 method, ctxt->path, ctxt->hostname, *contentType);
Daniel Veillard335849b1999-09-23 23:08:42 +0000972#else
973 sprintf(buf,
974 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n\r\n",
975 method, ctxt->path, ctxt->hostname, *contentType);
976#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000977 }
978 } else {
979 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +0000980#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000981 snprintf(buf, sizeof(buf),
982 "%s %s HTTP/1.0\r\nHost: %s\r\n%s\r\n",
983 method, ctxt->path, ctxt->hostname, headers);
Daniel Veillard335849b1999-09-23 23:08:42 +0000984#else
985 sprintf(buf,
986 "%s %s HTTP/1.0\r\nHost: %s\r\n%s\r\n",
987 method, ctxt->path, ctxt->hostname, headers);
988#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000989 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +0000990#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +0000991 snprintf(buf, sizeof(buf),
992 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n%s\r\n",
993 method, ctxt->path, ctxt->hostname, *contentType,
994 headers);
Daniel Veillard335849b1999-09-23 23:08:42 +0000995#else
996 sprintf(buf,
997 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\n%s\r\n",
998 method, ctxt->path, ctxt->hostname, *contentType,
999 headers);
1000#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001001 }
1002 }
1003 } else {
1004 int len = strlen(input);
1005 if (headers == NULL) {
1006 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +00001007#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001008 snprintf(buf, sizeof(buf),
1009 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n\r\n%s",
1010 method, ctxt->path, ctxt->hostname, len, input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001011#else
1012 sprintf(buf,
1013 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n\r\n%s",
1014 method, ctxt->path, ctxt->hostname, len, input);
1015#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001016 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +00001017#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001018 snprintf(buf, sizeof(buf),
1019"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n\r\n%s",
1020 method, ctxt->path, ctxt->hostname, *contentType, len,
1021 input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001022#else
1023 sprintf(buf,
1024"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n\r\n%s",
1025 method, ctxt->path, ctxt->hostname, *contentType, len,
1026 input);
1027#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001028 }
1029 } else {
1030 if ((contentType == NULL) || (*contentType == NULL)) {
Daniel Veillard335849b1999-09-23 23:08:42 +00001031#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001032 snprintf(buf, sizeof(buf),
1033 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1034 method, ctxt->path, ctxt->hostname, len,
1035 headers, input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001036#else
1037 sprintf(buf,
1038 "%s %s HTTP/1.0\r\nHost: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1039 method, ctxt->path, ctxt->hostname, len,
1040 headers, input);
1041#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001042 } else {
Daniel Veillard335849b1999-09-23 23:08:42 +00001043#ifdef HAVE_SNPRINTF
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001044 snprintf(buf, sizeof(buf),
1045"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1046 method, ctxt->path, ctxt->hostname, *contentType,
1047 len, headers, input);
Daniel Veillard335849b1999-09-23 23:08:42 +00001048#else
1049 sprintf(buf,
1050"%s %s HTTP/1.0\r\nHost: %s\r\nContent-Type: %s\r\nContent-Length: %d\r\n%s\r\n%s",
1051 method, ctxt->path, ctxt->hostname, *contentType,
1052 len, headers, input);
1053#endif
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001054 }
1055 }
1056 }
1057#ifdef DEBUG_HTTP
1058 printf("-> %s", buf);
1059#endif
1060 ctxt->outptr = ctxt->out = xmlMemStrdup(buf);
1061 ctxt->state = XML_NANO_HTTP_WRITE;
1062 xmlNanoHTTPSend(ctxt);
1063 ctxt->state = XML_NANO_HTTP_READ;
1064 head = 1;
1065
1066 while ((p = xmlNanoHTTPReadLine(ctxt)) != NULL) {
1067 if (head && (*p == 0)) {
1068 head = 0;
1069 ctxt->content = ctxt->inrptr;
1070 if (p != NULL) xmlFree(p);
1071 break;
1072 }
1073 xmlNanoHTTPScanAnswer(ctxt, p);
1074
1075#ifdef DEBUG_HTTP
1076 if (p != NULL) printf("<- %s\n", p);
1077#endif
1078 if (p != NULL) xmlFree(p);
1079 }
1080
1081 if ((ctxt->location != NULL) && (ctxt->returnValue >= 300) &&
1082 (ctxt->returnValue < 400)) {
1083#ifdef DEBUG_HTTP
1084 printf("\nRedirect to: %s\n", ctxt->location);
1085#endif
1086 while (xmlNanoHTTPRecv(ctxt)) ;
1087 if (nbRedirects < XML_NANO_HTTP_MAX_REDIR) {
1088 nbRedirects++;
1089 redirURL = xmlMemStrdup(ctxt->location);
1090 xmlNanoHTTPFreeCtxt(ctxt);
1091 goto retry;
1092 }
1093 xmlNanoHTTPFreeCtxt(ctxt);
1094#ifdef DEBUG_HTTP
1095 printf("Too many redirrects, aborting ...\n");
1096#endif
1097 return(NULL);
1098
1099 }
1100
1101 if ((contentType != NULL) && (ctxt->contentType != NULL))
1102 *contentType = xmlMemStrdup(ctxt->contentType);
1103 else if (contentType != NULL)
1104 *contentType = NULL;
1105
1106#ifdef DEBUG_HTTP
1107 if (ctxt->contentType != NULL)
1108 printf("\nCode %d, content-type '%s'\n\n",
1109 ctxt->returnValue, ctxt->contentType);
1110 else
1111 printf("\nCode %d, no content-type\n\n",
1112 ctxt->returnValue);
1113#endif
1114
1115 return((void *) ctxt);
1116}
1117
1118/**
1119 * xmlNanoHTTPFetch:
1120 * @URL: The URL to load
1121 * @filename: the filename where the content should be saved
1122 * @contentType: if available the Content-Type information will be
1123 * returned at that location
1124 *
1125 * This function try to fetch the indicated resource via HTTP GET
1126 * and save it's content in the file.
1127 *
1128 * Returns -1 in case of failure, 0 incase of success. The contentType,
1129 * if provided must be freed by the caller
1130 */
1131int
1132xmlNanoHTTPFetch(const char *URL, const char *filename, char **contentType) {
1133 void *ctxt;
1134 char buf[4096];
1135 int fd;
1136 int len;
1137
1138 ctxt = xmlNanoHTTPOpen(URL, contentType);
1139 if (ctxt == NULL) return(-1);
1140
1141 if (!strcmp(filename, "-"))
1142 fd = 0;
1143 else {
Daniel Veillarde41f2b72000-01-30 20:00:07 +00001144 fd = open(filename, O_CREAT | O_WRONLY, 00644);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001145 if (fd < 0) {
1146 xmlNanoHTTPClose(ctxt);
1147 if ((contentType != NULL) && (*contentType != NULL)) {
1148 xmlFree(*contentType);
1149 *contentType = NULL;
1150 }
1151 return(-1);
1152 }
1153 }
1154
1155 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1156 write(fd, buf, len);
1157 }
1158
1159 xmlNanoHTTPClose(ctxt);
Daniel Veillarde41f2b72000-01-30 20:00:07 +00001160 close(fd);
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001161 return(0);
1162}
1163
1164/**
1165 * xmlNanoHTTPSave:
Daniel Veillard00fdf371999-10-08 09:40:39 +00001166 * @ctxt: the HTTP context
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001167 * @filename: the filename where the content should be saved
1168 *
1169 * This function saves the output of the HTTP transaction to a file
1170 * It closes and free the context at the end
1171 *
1172 * Returns -1 in case of failure, 0 incase of success.
1173 */
1174int
1175xmlNanoHTTPSave(void *ctxt, const char *filename) {
1176 char buf[4096];
1177 int fd;
1178 int len;
1179
1180 if (ctxt == NULL) return(-1);
1181
1182 if (!strcmp(filename, "-"))
1183 fd = 0;
1184 else {
1185 fd = open(filename, O_CREAT | O_WRONLY);
1186 if (fd < 0) {
1187 xmlNanoHTTPClose(ctxt);
1188 return(-1);
1189 }
1190 }
1191
1192 while ((len = xmlNanoHTTPRead(ctxt, buf, sizeof(buf))) > 0) {
1193 write(fd, buf, len);
1194 }
1195
1196 xmlNanoHTTPClose(ctxt);
1197 return(0);
1198}
1199
1200/**
1201 * xmlNanoHTTPReturnCode:
1202 * @ctx: the HTTP context
1203 *
1204 * Returns the HTTP return code for the request.
1205 */
1206int
1207xmlNanoHTTPReturnCode(void *ctx) {
1208 xmlNanoHTTPCtxtPtr ctxt = (xmlNanoHTTPCtxtPtr) ctx;
1209
1210 if (ctxt == NULL) return(-1);
1211
1212 return(ctxt->returnValue);
1213}
1214
1215#ifdef STANDALONE
1216int main(int argc, char **argv) {
1217 char *contentType = NULL;
1218
1219 if (argv[1] != NULL) {
1220 if (argv[2] != NULL)
1221 xmlNanoHTTPFetch(argv[1], argv[2], &contentType);
1222 else
1223 xmlNanoHTTPFetch(argv[1], "-", &contentType);
1224 if (contentType != NULL) xmlFree(contentType);
1225 } else {
1226 printf("%s: minimal HTTP GET implementation\n", argv[0]);
1227 printf("\tusage %s [ URL [ filename ] ]\n", argv[0]);
1228 }
Daniel Veillarde41f2b72000-01-30 20:00:07 +00001229 xmlNanoHTTPCleanup();
1230 xmlMemoryDump();
Daniel Veillard4ecf39f1999-09-22 12:14:03 +00001231 return(0);
1232}
1233#endif /* STANDALONE */