blob: 3526a0a0eaca2b6936a7d40d299ca8a6cc554895 [file] [log] [blame]
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001/*
2 * xmlIO.c : implementation of the I/O interfaces used by the parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
Daniel Veillarde2d034d1999-07-27 19:52:06 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillarde2d034d1999-07-27 19:52:06 +000015
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016#include <stdio.h>
17#include <string.h>
18
19#ifdef HAVE_SYS_TYPES_H
Daniel Veillarde2d034d1999-07-27 19:52:06 +000020#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000021#endif
22#ifdef HAVE_SYS_STAT_H
Daniel Veillarde2d034d1999-07-27 19:52:06 +000023#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000024#endif
25#ifdef HAVE_FCNTL_H
Daniel Veillarde2d034d1999-07-27 19:52:06 +000026#include <fcntl.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000027#endif
Daniel Veillarde2d034d1999-07-27 19:52:06 +000028#ifdef HAVE_UNISTD_H
29#include <unistd.h>
30#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Daniel Veillarde2d034d1999-07-27 19:52:06 +000034#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
Daniel Veillard6454aec1999-09-02 22:04:43 +000038#include "xmlmemory.h"
Daniel Veillardb96e6431999-08-29 21:02:19 +000039#include "parser.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000040#include "xmlIO.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000041#include "nanohttp.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000042
43/* #define DEBUG_INPUT */
44/* #define VERBOSE_FAILURE */
Daniel Veillardb96e6431999-08-29 21:02:19 +000045/* #define DEBUG_EXTERNAL_ENTITIES */
Daniel Veillarde2d034d1999-07-27 19:52:06 +000046
47#ifdef DEBUG_INPUT
48#define MINLEN 40
49#else
50#define MINLEN 4000
51#endif
52
53/**
54 * xmlAllocParserInputBuffer:
55 * @enc: the charset encoding if known
56 *
57 * Create a buffered parser input for progressive parsing
58 *
59 * Returns the new parser input or NULL
60 */
61xmlParserInputBufferPtr
62xmlAllocParserInputBuffer(xmlCharEncoding enc) {
63 xmlParserInputBufferPtr ret;
64
Daniel Veillard6454aec1999-09-02 22:04:43 +000065 ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
Daniel Veillarde2d034d1999-07-27 19:52:06 +000066 if (ret == NULL) {
67 fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
68 return(NULL);
69 }
70 memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
71 ret->buffer = xmlBufferCreate();
72 ret->encoder = xmlGetCharEncodingHandler(enc);
73 ret->fd = -1;
Daniel Veillard7f7d1111999-09-22 09:46:25 +000074 ret->netIO = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000075
76 return(ret);
77}
78
79/**
80 * xmlFreeParserInputBuffer:
81 * @in: a buffered parser input
82 *
83 * Free up the memory used by a buffered parser input
84 */
85void
86xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
87 if (in->buffer != NULL) {
88 xmlBufferFree(in->buffer);
89 in->buffer = NULL;
90 }
91#ifdef HAVE_ZLIB_H
92 if (in->gzfile != NULL)
93 gzclose(in->gzfile);
94#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000095 if (in->netIO != NULL)
96 xmlNanoHTTPClose(in->netIO);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000097 if (in->fd >= 0)
98 close(in->fd);
99 memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000100 xmlFree(in);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000101}
102
103/**
104 * xmlParserInputBufferCreateFilename:
105 * @filename: a C string containing the filename
106 * @enc: the charset encoding if known
107 *
108 * Create a buffered parser input for the progressive parsing of a file
109 * If filename is "-' then we use stdin as the input.
110 * Automatic support for ZLIB/Compress compressed document is provided
111 * by default if found at compile-time.
112 *
113 * Returns the new parser input or NULL
114 */
115xmlParserInputBufferPtr
116xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
117 xmlParserInputBufferPtr ret;
118#ifdef HAVE_ZLIB_H
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000119 gzFile input = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000120#else
121 int input = -1;
122#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000123 void *netIO = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000124
125 if (filename == NULL) return(NULL);
126
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000127 if (!strncmp(filename, "http://", 7)) {
128 netIO = xmlNanoHTTPOpen(filename, NULL);
129 if (netIO == NULL) {
130#ifdef VERBOSE_FAILURE
131 fprintf (stderr, "Cannot read URL %s\n", filename);
132 perror ("xmlNanoHTTPOpen failed");
133#endif
134 return(NULL);
135 }
136 } else if (!strcmp(filename, "-")) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000137#ifdef HAVE_ZLIB_H
138 input = gzdopen (fileno(stdin), "r");
139 if (input == NULL) {
140#ifdef VERBOSE_FAILURE
141 fprintf (stderr, "Cannot read from stdin\n");
142 perror ("gzdopen failed");
143#endif
144 return(NULL);
145 }
146#else
147#ifdef WIN32
148 input = -1;
149#else
150 input = fileno(stdin);
151#endif
152 if (input < 0) {
153#ifdef VERBOSE_FAILURE
154 fprintf (stderr, "Cannot read from stdin\n");
155 perror ("open failed");
156#endif
157 return(NULL);
158 }
159#endif
160 } else {
161#ifdef HAVE_ZLIB_H
162 input = gzopen (filename, "r");
163 if (input == NULL) {
164#ifdef VERBOSE_FAILURE
165 fprintf (stderr, "Cannot read file %s :\n", filename);
166 perror ("gzopen failed");
167#endif
168 return(NULL);
169 }
170#else
171#ifdef WIN32
172 input = _open (filename, O_RDONLY | _O_BINARY);
173#else
174 input = open (filename, O_RDONLY);
175#endif
176 if (input < 0) {
177#ifdef VERBOSE_FAILURE
178 fprintf (stderr, "Cannot read file %s :\n", filename);
179 perror ("open failed");
180#endif
181 return(NULL);
182 }
183#endif
184 }
185 /*
Daniel Veillardb96e6431999-08-29 21:02:19 +0000186 * TODO : get the 4 first bytes and decode the charset
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000187 * if enc == XML_CHAR_ENCODING_NONE
188 * plug some encoding conversion routines here. !!!
189 * enc = xmlDetectCharEncoding(buffer);
190 */
191
192 ret = xmlAllocParserInputBuffer(enc);
193 if (ret != NULL) {
194#ifdef HAVE_ZLIB_H
195 ret->gzfile = input;
196#else
197 ret->fd = input;
198#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000199 ret->netIO = netIO;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000200 }
201 xmlParserInputBufferRead(ret, 4);
202
203 return(ret);
204}
205
206/**
207 * xmlParserInputBufferCreateFile:
208 * @file: a FILE*
209 * @enc: the charset encoding if known
210 *
211 * Create a buffered parser input for the progressive parsing of a FILE *
212 * buffered C I/O
213 *
214 * Returns the new parser input or NULL
215 */
216xmlParserInputBufferPtr
217xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
218 xmlParserInputBufferPtr ret;
219
220 if (file == NULL) return(NULL);
221
222 ret = xmlAllocParserInputBuffer(enc);
223 if (ret != NULL)
224 ret->file = file;
225
226 return(ret);
227}
228
229/**
230 * xmlParserInputBufferCreateFd:
231 * @fd: a file descriptor number
232 * @enc: the charset encoding if known
233 *
234 * Create a buffered parser input for the progressive parsing for the input
235 * from a file descriptor
236 *
237 * Returns the new parser input or NULL
238 */
239xmlParserInputBufferPtr
240xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
241 xmlParserInputBufferPtr ret;
242
243 if (fd < 0) return(NULL);
244
245 ret = xmlAllocParserInputBuffer(enc);
246 if (ret != NULL)
247 ret->fd = fd;
248
249 return(ret);
250}
251
252/**
Daniel Veillard7f858501999-11-17 17:32:38 +0000253 * xmlParserInputBufferPush:
254 * @in: a buffered parser input
255 * @buf: an char array
256 * @len: the size in bytes of the array.
257 *
258 * Push the content of the arry in the input buffer
259 * This routine handle the I18N transcoding to internal UTF-8
260 * This is used when operating the parser in progressive (push) mode.
261 *
262 * Returns the number of chars read and stored in the buffer, or -1
263 * in case of error.
264 */
265int
Daniel Veillarda819dac1999-11-24 18:04:22 +0000266xmlParserInputBufferPush(xmlParserInputBufferPtr in, int len, const char *buf) {
Daniel Veillard7f858501999-11-17 17:32:38 +0000267 char *buffer = NULL;
268 int nbchars = 0;
269
270 if (len < 0) return(0);
271 if (in->encoder != NULL) {
272 xmlChar *buf;
273
274 buf = (xmlChar *) xmlMalloc((len + 1) * 2 * sizeof(xmlChar));
275 if (buf == NULL) {
276 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
277 xmlFree(buffer);
278 return(-1);
279 }
280 nbchars = in->encoder->input(buf, (len + 1) * 2 * sizeof(xmlChar),
281 BAD_CAST buffer, len);
282 /*
283 * TODO : we really need to have something atomic or the
284 * encoder must report the number of bytes read
285 */
286 buf[nbchars] = 0;
287 xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
288 xmlFree(buf);
289 } else {
290 nbchars = len;
291 buffer[nbchars] = 0;
292 xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
293 }
294#ifdef DEBUG_INPUT
295 fprintf(stderr, "I/O: pushed %d chars, buffer %d/%d\n",
296 nbchars, in->buffer->use, in->buffer->size);
297#endif
298 return(nbchars);
299}
300
301/**
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000302 * xmlParserInputBufferGrow:
303 * @in: a buffered parser input
304 * @len: indicative value of the amount of chars to read
305 *
306 * Grow up the content of the input buffer, the old data are preserved
307 * This routine handle the I18N transcoding to internal UTF-8
Daniel Veillard7f858501999-11-17 17:32:38 +0000308 * This routine is used when operating the parser in normal (pull) mode
Daniel Veillardb96e6431999-08-29 21:02:19 +0000309 * TODO: one should be able to remove one extra copy
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000310 *
311 * Returns the number of chars read and stored in the buffer, or -1
312 * in case of error.
313 */
314int
315xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
316 char *buffer = NULL;
317#ifdef HAVE_ZLIB_H
318 gzFile input = (gzFile) in->gzfile;
319#endif
320 int res = 0;
321 int nbchars = 0;
322 int buffree;
323
324 if ((len <= MINLEN) && (len != 4))
325 len = MINLEN;
326 buffree = in->buffer->size - in->buffer->use;
327 if (buffree <= 0) {
328 fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
329 return(0);
330 }
331 if (len > buffree)
332 len = buffree;
333
Daniel Veillard6454aec1999-09-02 22:04:43 +0000334 buffer = xmlMalloc((len + 1) * sizeof(char));
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000335 if (buffer == NULL) {
336 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
337 return(-1);
338 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000339 if (in->netIO != NULL) {
340 res = xmlNanoHTTPRead(in->netIO, &buffer[0], len);
341 } else if (in->file != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000342 res = fread(&buffer[0], 1, len, in->file);
343#ifdef HAVE_ZLIB_H
344 } else if (in->gzfile != NULL) {
345 res = gzread(input, &buffer[0], len);
346#endif
347 } else if (in->fd >= 0) {
348 res = read(in->fd, &buffer[0], len);
349 } else {
350 fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000351 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000352 return(-1);
353 }
354 if (res == 0) {
Daniel Veillard6454aec1999-09-02 22:04:43 +0000355 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000356 return(0);
357 }
358 if (res < 0) {
359 perror ("read error");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000360 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000361 return(-1);
362 }
363 if (in->encoder != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000364 xmlChar *buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000365
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000366 buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000367 if (buf == NULL) {
368 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000369 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000370 return(-1);
371 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000372 nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
Daniel Veillardb96e6431999-08-29 21:02:19 +0000373 BAD_CAST buffer, res);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000374 buf[nbchars] = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000375 xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000376 xmlFree(buf);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000377 } else {
378 nbchars = res;
379 buffer[nbchars] = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000380 xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000381 }
382#ifdef DEBUG_INPUT
383 fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
384 nbchars, in->buffer->use, in->buffer->size);
385#endif
Daniel Veillard6454aec1999-09-02 22:04:43 +0000386 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000387 return(nbchars);
388}
389
390/**
391 * xmlParserInputBufferRead:
392 * @in: a buffered parser input
393 * @len: indicative value of the amount of chars to read
394 *
395 * Refresh the content of the input buffer, the old data are considered
396 * consumed
397 * This routine handle the I18N transcoding to internal UTF-8
398 *
399 * Returns the number of chars read and stored in the buffer, or -1
400 * in case of error.
401 */
402int
403xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
404 /* xmlBufferEmpty(in->buffer); */
405 return(xmlParserInputBufferGrow(in, len));
406}
407
Daniel Veillardb05deb71999-08-10 19:04:08 +0000408/*
409 * xmlParserGetDirectory:
410 * @filename: the path to a file
411 *
412 * lookup the directory for that file
413 *
414 * Returns a new allocated string containing the directory, or NULL.
415 */
416char *
417xmlParserGetDirectory(const char *filename) {
418 char *ret = NULL;
419 char dir[1024];
420 char *cur;
421 char sep = '/';
422
423 if (filename == NULL) return(NULL);
424#ifdef WIN32
425 sep = '\\';
426#endif
427
428 strncpy(dir, filename, 1023);
429 dir[1023] = 0;
430 cur = &dir[strlen(dir)];
431 while (cur > dir) {
432 if (*cur == sep) break;
433 cur --;
434 }
435 if (*cur == sep) {
436 if (cur == dir) dir[1] = 0;
437 else *cur = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000438 ret = xmlMemStrdup(dir);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000439 } else {
440 if (getcwd(dir, 1024) != NULL) {
441 dir[1023] = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000442 ret = xmlMemStrdup(dir);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000443 }
444 }
445 return(ret);
446}
447
Daniel Veillardb96e6431999-08-29 21:02:19 +0000448/****************************************************************
449 * *
450 * External entities loading *
451 * *
452 ****************************************************************/
453
454/*
455 * xmlDefaultExternalEntityLoader:
456 * @URL: the URL for the entity to load
457 * @ID: the System ID for the entity to load
458 * @context: the context in which the entity is called or NULL
459 *
460 * By default we don't load external entitites, yet.
461 * TODO: get a sample http implementation and scan for existing one
462 * at compile time.
463 *
464 * Returns a new allocated xmlParserInputPtr, or NULL.
465 */
466static
467xmlParserInputPtr
468xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
469 xmlParserInputPtr context) {
470#ifdef DEBUG_EXTERNAL_ENTITIES
471 fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
472#endif
473 return(NULL);
474}
475
476static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
477 xmlDefaultExternalEntityLoader;
478
479/*
480 * xmlSetExternalEntityLoader:
481 * @f: the new entity resolver function
482 *
483 * Changes the defaultexternal entity resolver function for the application
484 */
485void
486xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
487 xmlCurrentExternalEntityLoader = f;
488}
489
490/*
491 * xmlGetExternalEntityLoader:
492 *
493 * Get the default external entity resolver function for the application
494 *
495 * Returns the xmlExternalEntityLoader function pointer
496 */
497xmlExternalEntityLoader
498xmlGetExternalEntityLoader(void) {
499 return(xmlCurrentExternalEntityLoader);
500}
501
502/*
503 * xmlLoadExternalEntity:
504 * @URL: the URL for the entity to load
505 * @ID: the System ID for the entity to load
506 * @context: the context in which the entity is called or NULL
507 *
508 * Load an external entity, note that the use of this function for
509 * unparsed entities may generate problems
510 * TODO: a more generic External entitiy API must be designed
511 *
512 * Returns the xmlParserInputPtr or NULL
513 */
514xmlParserInputPtr
515xmlLoadExternalEntity(const char *URL, const char *ID,
516 xmlParserInputPtr context) {
517 return(xmlCurrentExternalEntityLoader(URL, ID, context));
518}
519