blob: 668caaba011f88f36ebed1ae5de8bae09ddfa797 [file] [log] [blame]
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001/*
2 * xmlIO.c : implementation of the I/O interfaces used by the parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
Daniel Veillard7f7d1111999-09-22 09:46:25 +00009#ifdef WIN32
10#define HAVE_FCNTL_H
11#include <io.h>
12#else
Daniel Veillarde2d034d1999-07-27 19:52:06 +000013#include "config.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000014#endif
Daniel Veillarde2d034d1999-07-27 19:52:06 +000015
Daniel Veillard7f7d1111999-09-22 09:46:25 +000016#include <stdio.h>
17#include <string.h>
18
19#ifdef HAVE_SYS_TYPES_H
Daniel Veillarde2d034d1999-07-27 19:52:06 +000020#include <sys/types.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000021#endif
22#ifdef HAVE_SYS_STAT_H
Daniel Veillarde2d034d1999-07-27 19:52:06 +000023#include <sys/stat.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000024#endif
25#ifdef HAVE_FCNTL_H
Daniel Veillarde2d034d1999-07-27 19:52:06 +000026#include <fcntl.h>
Daniel Veillard7f7d1111999-09-22 09:46:25 +000027#endif
Daniel Veillarde2d034d1999-07-27 19:52:06 +000028#ifdef HAVE_UNISTD_H
29#include <unistd.h>
30#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000031#ifdef HAVE_STDLIB_H
32#include <stdlib.h>
33#endif
Daniel Veillarde2d034d1999-07-27 19:52:06 +000034#ifdef HAVE_ZLIB_H
35#include <zlib.h>
36#endif
37
Daniel Veillard6454aec1999-09-02 22:04:43 +000038#include "xmlmemory.h"
Daniel Veillardb96e6431999-08-29 21:02:19 +000039#include "parser.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000040#include "xmlIO.h"
Daniel Veillard7f7d1111999-09-22 09:46:25 +000041#include "nanohttp.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000042
43/* #define DEBUG_INPUT */
44/* #define VERBOSE_FAILURE */
Daniel Veillardb96e6431999-08-29 21:02:19 +000045/* #define DEBUG_EXTERNAL_ENTITIES */
Daniel Veillarde2d034d1999-07-27 19:52:06 +000046
47#ifdef DEBUG_INPUT
48#define MINLEN 40
49#else
50#define MINLEN 4000
51#endif
52
53/**
54 * xmlAllocParserInputBuffer:
55 * @enc: the charset encoding if known
56 *
57 * Create a buffered parser input for progressive parsing
58 *
59 * Returns the new parser input or NULL
60 */
61xmlParserInputBufferPtr
62xmlAllocParserInputBuffer(xmlCharEncoding enc) {
63 xmlParserInputBufferPtr ret;
64
Daniel Veillard6454aec1999-09-02 22:04:43 +000065 ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
Daniel Veillarde2d034d1999-07-27 19:52:06 +000066 if (ret == NULL) {
67 fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
68 return(NULL);
69 }
70 memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
71 ret->buffer = xmlBufferCreate();
72 ret->encoder = xmlGetCharEncodingHandler(enc);
73 ret->fd = -1;
Daniel Veillard7f7d1111999-09-22 09:46:25 +000074 ret->netIO = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +000075
76 return(ret);
77}
78
79/**
80 * xmlFreeParserInputBuffer:
81 * @in: a buffered parser input
82 *
83 * Free up the memory used by a buffered parser input
84 */
85void
86xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
87 if (in->buffer != NULL) {
88 xmlBufferFree(in->buffer);
89 in->buffer = NULL;
90 }
91#ifdef HAVE_ZLIB_H
92 if (in->gzfile != NULL)
93 gzclose(in->gzfile);
94#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +000095 if (in->netIO != NULL)
96 xmlNanoHTTPClose(in->netIO);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000097 if (in->fd >= 0)
98 close(in->fd);
99 memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +0000100 xmlFree(in);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000101}
102
103/**
104 * xmlParserInputBufferCreateFilename:
105 * @filename: a C string containing the filename
106 * @enc: the charset encoding if known
107 *
108 * Create a buffered parser input for the progressive parsing of a file
109 * If filename is "-' then we use stdin as the input.
110 * Automatic support for ZLIB/Compress compressed document is provided
111 * by default if found at compile-time.
112 *
113 * Returns the new parser input or NULL
114 */
115xmlParserInputBufferPtr
116xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
117 xmlParserInputBufferPtr ret;
118#ifdef HAVE_ZLIB_H
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000119 gzFile input = 0;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000120#else
121 int input = -1;
122#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000123 void *netIO = NULL;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000124
125 if (filename == NULL) return(NULL);
126
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000127 if (!strncmp(filename, "http://", 7)) {
128 netIO = xmlNanoHTTPOpen(filename, NULL);
129 if (netIO == NULL) {
130#ifdef VERBOSE_FAILURE
131 fprintf (stderr, "Cannot read URL %s\n", filename);
132 perror ("xmlNanoHTTPOpen failed");
133#endif
134 return(NULL);
135 }
136 } else if (!strcmp(filename, "-")) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000137#ifdef HAVE_ZLIB_H
138 input = gzdopen (fileno(stdin), "r");
139 if (input == NULL) {
140#ifdef VERBOSE_FAILURE
141 fprintf (stderr, "Cannot read from stdin\n");
142 perror ("gzdopen failed");
143#endif
144 return(NULL);
145 }
146#else
147#ifdef WIN32
148 input = -1;
149#else
150 input = fileno(stdin);
151#endif
152 if (input < 0) {
153#ifdef VERBOSE_FAILURE
154 fprintf (stderr, "Cannot read from stdin\n");
155 perror ("open failed");
156#endif
157 return(NULL);
158 }
159#endif
160 } else {
161#ifdef HAVE_ZLIB_H
162 input = gzopen (filename, "r");
163 if (input == NULL) {
164#ifdef VERBOSE_FAILURE
165 fprintf (stderr, "Cannot read file %s :\n", filename);
166 perror ("gzopen failed");
167#endif
168 return(NULL);
169 }
170#else
171#ifdef WIN32
172 input = _open (filename, O_RDONLY | _O_BINARY);
173#else
174 input = open (filename, O_RDONLY);
175#endif
176 if (input < 0) {
177#ifdef VERBOSE_FAILURE
178 fprintf (stderr, "Cannot read file %s :\n", filename);
179 perror ("open failed");
180#endif
181 return(NULL);
182 }
183#endif
184 }
185 /*
Daniel Veillardb96e6431999-08-29 21:02:19 +0000186 * TODO : get the 4 first bytes and decode the charset
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000187 * if enc == XML_CHAR_ENCODING_NONE
188 * plug some encoding conversion routines here. !!!
189 * enc = xmlDetectCharEncoding(buffer);
190 */
191
192 ret = xmlAllocParserInputBuffer(enc);
193 if (ret != NULL) {
194#ifdef HAVE_ZLIB_H
195 ret->gzfile = input;
196#else
197 ret->fd = input;
198#endif
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000199 ret->netIO = netIO;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000200 }
201 xmlParserInputBufferRead(ret, 4);
202
203 return(ret);
204}
205
206/**
207 * xmlParserInputBufferCreateFile:
208 * @file: a FILE*
209 * @enc: the charset encoding if known
210 *
211 * Create a buffered parser input for the progressive parsing of a FILE *
212 * buffered C I/O
213 *
214 * Returns the new parser input or NULL
215 */
216xmlParserInputBufferPtr
217xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
218 xmlParserInputBufferPtr ret;
219
220 if (file == NULL) return(NULL);
221
222 ret = xmlAllocParserInputBuffer(enc);
223 if (ret != NULL)
224 ret->file = file;
225
226 return(ret);
227}
228
229/**
230 * xmlParserInputBufferCreateFd:
231 * @fd: a file descriptor number
232 * @enc: the charset encoding if known
233 *
234 * Create a buffered parser input for the progressive parsing for the input
235 * from a file descriptor
236 *
237 * Returns the new parser input or NULL
238 */
239xmlParserInputBufferPtr
240xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
241 xmlParserInputBufferPtr ret;
242
243 if (fd < 0) return(NULL);
244
245 ret = xmlAllocParserInputBuffer(enc);
246 if (ret != NULL)
247 ret->fd = fd;
248
249 return(ret);
250}
251
252/**
253 * xmlParserInputBufferGrow:
254 * @in: a buffered parser input
255 * @len: indicative value of the amount of chars to read
256 *
257 * Grow up the content of the input buffer, the old data are preserved
258 * This routine handle the I18N transcoding to internal UTF-8
Daniel Veillardb96e6431999-08-29 21:02:19 +0000259 * TODO: one should be able to remove one extra copy
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000260 *
261 * Returns the number of chars read and stored in the buffer, or -1
262 * in case of error.
263 */
264int
265xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
266 char *buffer = NULL;
267#ifdef HAVE_ZLIB_H
268 gzFile input = (gzFile) in->gzfile;
269#endif
270 int res = 0;
271 int nbchars = 0;
272 int buffree;
273
274 if ((len <= MINLEN) && (len != 4))
275 len = MINLEN;
276 buffree = in->buffer->size - in->buffer->use;
277 if (buffree <= 0) {
278 fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
279 return(0);
280 }
281 if (len > buffree)
282 len = buffree;
283
Daniel Veillard6454aec1999-09-02 22:04:43 +0000284 buffer = xmlMalloc((len + 1) * sizeof(char));
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000285 if (buffer == NULL) {
286 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
287 return(-1);
288 }
Daniel Veillard7f7d1111999-09-22 09:46:25 +0000289 if (in->netIO != NULL) {
290 res = xmlNanoHTTPRead(in->netIO, &buffer[0], len);
291 } else if (in->file != NULL) {
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000292 res = fread(&buffer[0], 1, len, in->file);
293#ifdef HAVE_ZLIB_H
294 } else if (in->gzfile != NULL) {
295 res = gzread(input, &buffer[0], len);
296#endif
297 } else if (in->fd >= 0) {
298 res = read(in->fd, &buffer[0], len);
299 } else {
300 fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000301 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000302 return(-1);
303 }
304 if (res == 0) {
Daniel Veillard6454aec1999-09-02 22:04:43 +0000305 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000306 return(0);
307 }
308 if (res < 0) {
309 perror ("read error");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000310 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000311 return(-1);
312 }
313 if (in->encoder != NULL) {
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000314 xmlChar *buf;
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000315
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000316 buf = (xmlChar *) xmlMalloc((res + 1) * 2 * sizeof(xmlChar));
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000317 if (buf == NULL) {
318 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000319 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000320 return(-1);
321 }
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000322 nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(xmlChar),
Daniel Veillardb96e6431999-08-29 21:02:19 +0000323 BAD_CAST buffer, res);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000324 buf[nbchars] = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000325 xmlBufferAdd(in->buffer, (xmlChar *) buf, nbchars);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000326 xmlFree(buf);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000327 } else {
328 nbchars = res;
329 buffer[nbchars] = 0;
Daniel Veillarddd6b3671999-09-23 22:19:22 +0000330 xmlBufferAdd(in->buffer, (xmlChar *) buffer, nbchars);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000331 }
332#ifdef DEBUG_INPUT
333 fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
334 nbchars, in->buffer->use, in->buffer->size);
335#endif
Daniel Veillard6454aec1999-09-02 22:04:43 +0000336 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000337 return(nbchars);
338}
339
340/**
341 * xmlParserInputBufferRead:
342 * @in: a buffered parser input
343 * @len: indicative value of the amount of chars to read
344 *
345 * Refresh the content of the input buffer, the old data are considered
346 * consumed
347 * This routine handle the I18N transcoding to internal UTF-8
348 *
349 * Returns the number of chars read and stored in the buffer, or -1
350 * in case of error.
351 */
352int
353xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
354 /* xmlBufferEmpty(in->buffer); */
355 return(xmlParserInputBufferGrow(in, len));
356}
357
Daniel Veillardb05deb71999-08-10 19:04:08 +0000358/*
359 * xmlParserGetDirectory:
360 * @filename: the path to a file
361 *
362 * lookup the directory for that file
363 *
364 * Returns a new allocated string containing the directory, or NULL.
365 */
366char *
367xmlParserGetDirectory(const char *filename) {
368 char *ret = NULL;
369 char dir[1024];
370 char *cur;
371 char sep = '/';
372
373 if (filename == NULL) return(NULL);
374#ifdef WIN32
375 sep = '\\';
376#endif
377
378 strncpy(dir, filename, 1023);
379 dir[1023] = 0;
380 cur = &dir[strlen(dir)];
381 while (cur > dir) {
382 if (*cur == sep) break;
383 cur --;
384 }
385 if (*cur == sep) {
386 if (cur == dir) dir[1] = 0;
387 else *cur = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000388 ret = xmlMemStrdup(dir);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000389 } else {
390 if (getcwd(dir, 1024) != NULL) {
391 dir[1023] = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000392 ret = xmlMemStrdup(dir);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000393 }
394 }
395 return(ret);
396}
397
Daniel Veillardb96e6431999-08-29 21:02:19 +0000398/****************************************************************
399 * *
400 * External entities loading *
401 * *
402 ****************************************************************/
403
404/*
405 * xmlDefaultExternalEntityLoader:
406 * @URL: the URL for the entity to load
407 * @ID: the System ID for the entity to load
408 * @context: the context in which the entity is called or NULL
409 *
410 * By default we don't load external entitites, yet.
411 * TODO: get a sample http implementation and scan for existing one
412 * at compile time.
413 *
414 * Returns a new allocated xmlParserInputPtr, or NULL.
415 */
416static
417xmlParserInputPtr
418xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
419 xmlParserInputPtr context) {
420#ifdef DEBUG_EXTERNAL_ENTITIES
421 fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
422#endif
423 return(NULL);
424}
425
426static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
427 xmlDefaultExternalEntityLoader;
428
429/*
430 * xmlSetExternalEntityLoader:
431 * @f: the new entity resolver function
432 *
433 * Changes the defaultexternal entity resolver function for the application
434 */
435void
436xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
437 xmlCurrentExternalEntityLoader = f;
438}
439
440/*
441 * xmlGetExternalEntityLoader:
442 *
443 * Get the default external entity resolver function for the application
444 *
445 * Returns the xmlExternalEntityLoader function pointer
446 */
447xmlExternalEntityLoader
448xmlGetExternalEntityLoader(void) {
449 return(xmlCurrentExternalEntityLoader);
450}
451
452/*
453 * xmlLoadExternalEntity:
454 * @URL: the URL for the entity to load
455 * @ID: the System ID for the entity to load
456 * @context: the context in which the entity is called or NULL
457 *
458 * Load an external entity, note that the use of this function for
459 * unparsed entities may generate problems
460 * TODO: a more generic External entitiy API must be designed
461 *
462 * Returns the xmlParserInputPtr or NULL
463 */
464xmlParserInputPtr
465xmlLoadExternalEntity(const char *URL, const char *ID,
466 xmlParserInputPtr context) {
467 return(xmlCurrentExternalEntityLoader(URL, ID, context));
468}
469