blob: 8e065389bed2f9d4523cc3e7189cee27473b938d [file] [log] [blame]
Daniel Veillarde2d034d1999-07-27 19:52:06 +00001/*
2 * xmlIO.c : implementation of the I/O interfaces used by the parser
3 *
4 * See Copyright for the status of this software.
5 *
6 * Daniel.Veillard@w3.org
7 */
8
9#include "config.h"
10
11#include <sys/types.h>
12#include <sys/stat.h>
13#include <fcntl.h>
Daniel Veillarde2d034d1999-07-27 19:52:06 +000014#ifdef HAVE_UNISTD_H
15#include <unistd.h>
16#endif
17#ifdef HAVE_ZLIB_H
18#include <zlib.h>
19#endif
Daniel Veillardb05deb71999-08-10 19:04:08 +000020#include <string.h>
Daniel Veillarde2d034d1999-07-27 19:52:06 +000021
Daniel Veillard6454aec1999-09-02 22:04:43 +000022#include "xmlmemory.h"
Daniel Veillardb96e6431999-08-29 21:02:19 +000023#include "parser.h"
Daniel Veillarde2d034d1999-07-27 19:52:06 +000024#include "xmlIO.h"
25
26/* #define DEBUG_INPUT */
27/* #define VERBOSE_FAILURE */
Daniel Veillardb96e6431999-08-29 21:02:19 +000028/* #define DEBUG_EXTERNAL_ENTITIES */
Daniel Veillarde2d034d1999-07-27 19:52:06 +000029
30#ifdef DEBUG_INPUT
31#define MINLEN 40
32#else
33#define MINLEN 4000
34#endif
35
36/**
37 * xmlAllocParserInputBuffer:
38 * @enc: the charset encoding if known
39 *
40 * Create a buffered parser input for progressive parsing
41 *
42 * Returns the new parser input or NULL
43 */
44xmlParserInputBufferPtr
45xmlAllocParserInputBuffer(xmlCharEncoding enc) {
46 xmlParserInputBufferPtr ret;
47
Daniel Veillard6454aec1999-09-02 22:04:43 +000048 ret = (xmlParserInputBufferPtr) xmlMalloc(sizeof(xmlParserInputBuffer));
Daniel Veillarde2d034d1999-07-27 19:52:06 +000049 if (ret == NULL) {
50 fprintf(stderr, "xmlAllocParserInputBuffer : out of memory!\n");
51 return(NULL);
52 }
53 memset(ret, 0, (size_t) sizeof(xmlParserInputBuffer));
54 ret->buffer = xmlBufferCreate();
55 ret->encoder = xmlGetCharEncodingHandler(enc);
56 ret->fd = -1;
57
58 return(ret);
59}
60
61/**
62 * xmlFreeParserInputBuffer:
63 * @in: a buffered parser input
64 *
65 * Free up the memory used by a buffered parser input
66 */
67void
68xmlFreeParserInputBuffer(xmlParserInputBufferPtr in) {
69 if (in->buffer != NULL) {
70 xmlBufferFree(in->buffer);
71 in->buffer = NULL;
72 }
73#ifdef HAVE_ZLIB_H
74 if (in->gzfile != NULL)
75 gzclose(in->gzfile);
76#endif
77 if (in->fd >= 0)
78 close(in->fd);
79 memset(in, 0xbe, (size_t) sizeof(xmlParserInputBuffer));
Daniel Veillard6454aec1999-09-02 22:04:43 +000080 xmlFree(in);
Daniel Veillarde2d034d1999-07-27 19:52:06 +000081}
82
83/**
84 * xmlParserInputBufferCreateFilename:
85 * @filename: a C string containing the filename
86 * @enc: the charset encoding if known
87 *
88 * Create a buffered parser input for the progressive parsing of a file
89 * If filename is "-' then we use stdin as the input.
90 * Automatic support for ZLIB/Compress compressed document is provided
91 * by default if found at compile-time.
92 *
93 * Returns the new parser input or NULL
94 */
95xmlParserInputBufferPtr
96xmlParserInputBufferCreateFilename(const char *filename, xmlCharEncoding enc) {
97 xmlParserInputBufferPtr ret;
98#ifdef HAVE_ZLIB_H
99 gzFile input;
100#else
101 int input = -1;
102#endif
103
104 if (filename == NULL) return(NULL);
105
106 if (!strcmp(filename, "-")) {
107#ifdef HAVE_ZLIB_H
108 input = gzdopen (fileno(stdin), "r");
109 if (input == NULL) {
110#ifdef VERBOSE_FAILURE
111 fprintf (stderr, "Cannot read from stdin\n");
112 perror ("gzdopen failed");
113#endif
114 return(NULL);
115 }
116#else
117#ifdef WIN32
118 input = -1;
119#else
120 input = fileno(stdin);
121#endif
122 if (input < 0) {
123#ifdef VERBOSE_FAILURE
124 fprintf (stderr, "Cannot read from stdin\n");
125 perror ("open failed");
126#endif
127 return(NULL);
128 }
129#endif
130 } else {
131#ifdef HAVE_ZLIB_H
132 input = gzopen (filename, "r");
133 if (input == NULL) {
134#ifdef VERBOSE_FAILURE
135 fprintf (stderr, "Cannot read file %s :\n", filename);
136 perror ("gzopen failed");
137#endif
138 return(NULL);
139 }
140#else
141#ifdef WIN32
142 input = _open (filename, O_RDONLY | _O_BINARY);
143#else
144 input = open (filename, O_RDONLY);
145#endif
146 if (input < 0) {
147#ifdef VERBOSE_FAILURE
148 fprintf (stderr, "Cannot read file %s :\n", filename);
149 perror ("open failed");
150#endif
151 return(NULL);
152 }
153#endif
154 }
155 /*
Daniel Veillardb96e6431999-08-29 21:02:19 +0000156 * TODO : get the 4 first bytes and decode the charset
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000157 * if enc == XML_CHAR_ENCODING_NONE
158 * plug some encoding conversion routines here. !!!
159 * enc = xmlDetectCharEncoding(buffer);
160 */
161
162 ret = xmlAllocParserInputBuffer(enc);
163 if (ret != NULL) {
164#ifdef HAVE_ZLIB_H
165 ret->gzfile = input;
166#else
167 ret->fd = input;
168#endif
169 }
170 xmlParserInputBufferRead(ret, 4);
171
172 return(ret);
173}
174
175/**
176 * xmlParserInputBufferCreateFile:
177 * @file: a FILE*
178 * @enc: the charset encoding if known
179 *
180 * Create a buffered parser input for the progressive parsing of a FILE *
181 * buffered C I/O
182 *
183 * Returns the new parser input or NULL
184 */
185xmlParserInputBufferPtr
186xmlParserInputBufferCreateFile(FILE *file, xmlCharEncoding enc) {
187 xmlParserInputBufferPtr ret;
188
189 if (file == NULL) return(NULL);
190
191 ret = xmlAllocParserInputBuffer(enc);
192 if (ret != NULL)
193 ret->file = file;
194
195 return(ret);
196}
197
198/**
199 * xmlParserInputBufferCreateFd:
200 * @fd: a file descriptor number
201 * @enc: the charset encoding if known
202 *
203 * Create a buffered parser input for the progressive parsing for the input
204 * from a file descriptor
205 *
206 * Returns the new parser input or NULL
207 */
208xmlParserInputBufferPtr
209xmlParserInputBufferCreateFd(int fd, xmlCharEncoding enc) {
210 xmlParserInputBufferPtr ret;
211
212 if (fd < 0) return(NULL);
213
214 ret = xmlAllocParserInputBuffer(enc);
215 if (ret != NULL)
216 ret->fd = fd;
217
218 return(ret);
219}
220
221/**
222 * xmlParserInputBufferGrow:
223 * @in: a buffered parser input
224 * @len: indicative value of the amount of chars to read
225 *
226 * Grow up the content of the input buffer, the old data are preserved
227 * This routine handle the I18N transcoding to internal UTF-8
Daniel Veillardb96e6431999-08-29 21:02:19 +0000228 * TODO: one should be able to remove one extra copy
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000229 *
230 * Returns the number of chars read and stored in the buffer, or -1
231 * in case of error.
232 */
233int
234xmlParserInputBufferGrow(xmlParserInputBufferPtr in, int len) {
235 char *buffer = NULL;
236#ifdef HAVE_ZLIB_H
237 gzFile input = (gzFile) in->gzfile;
238#endif
239 int res = 0;
240 int nbchars = 0;
241 int buffree;
242
243 if ((len <= MINLEN) && (len != 4))
244 len = MINLEN;
245 buffree = in->buffer->size - in->buffer->use;
246 if (buffree <= 0) {
247 fprintf(stderr, "xmlParserInputBufferGrow : buffer full !\n");
248 return(0);
249 }
250 if (len > buffree)
251 len = buffree;
252
Daniel Veillard6454aec1999-09-02 22:04:43 +0000253 buffer = xmlMalloc((len + 1) * sizeof(char));
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000254 if (buffer == NULL) {
255 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
256 return(-1);
257 }
258 if (in->file != NULL) {
259 res = fread(&buffer[0], 1, len, in->file);
260#ifdef HAVE_ZLIB_H
261 } else if (in->gzfile != NULL) {
262 res = gzread(input, &buffer[0], len);
263#endif
264 } else if (in->fd >= 0) {
265 res = read(in->fd, &buffer[0], len);
266 } else {
267 fprintf(stderr, "xmlParserInputBufferGrow : no input !\n");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000268 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000269 return(-1);
270 }
271 if (res == 0) {
Daniel Veillard6454aec1999-09-02 22:04:43 +0000272 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000273 return(0);
274 }
275 if (res < 0) {
276 perror ("read error");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000277 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000278 return(-1);
279 }
280 if (in->encoder != NULL) {
281 CHAR *buf;
282
Daniel Veillard6454aec1999-09-02 22:04:43 +0000283 buf = (CHAR *) xmlMalloc((res + 1) * 2 * sizeof(CHAR));
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000284 if (buf == NULL) {
285 fprintf(stderr, "xmlParserInputBufferGrow : out of memory !\n");
Daniel Veillard6454aec1999-09-02 22:04:43 +0000286 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000287 return(-1);
288 }
289 nbchars = in->encoder->input(buf, (res + 1) * 2 * sizeof(CHAR),
Daniel Veillardb96e6431999-08-29 21:02:19 +0000290 BAD_CAST buffer, res);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000291 buf[nbchars] = 0;
292 xmlBufferAdd(in->buffer, (CHAR *) buf, nbchars);
Daniel Veillard6454aec1999-09-02 22:04:43 +0000293 xmlFree(buf);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000294 } else {
295 nbchars = res;
296 buffer[nbchars] = 0;
297 xmlBufferAdd(in->buffer, (CHAR *) buffer, nbchars);
298 }
299#ifdef DEBUG_INPUT
300 fprintf(stderr, "I/O: read %d chars, buffer %d/%d\n",
301 nbchars, in->buffer->use, in->buffer->size);
302#endif
Daniel Veillard6454aec1999-09-02 22:04:43 +0000303 xmlFree(buffer);
Daniel Veillarde2d034d1999-07-27 19:52:06 +0000304 return(nbchars);
305}
306
307/**
308 * xmlParserInputBufferRead:
309 * @in: a buffered parser input
310 * @len: indicative value of the amount of chars to read
311 *
312 * Refresh the content of the input buffer, the old data are considered
313 * consumed
314 * This routine handle the I18N transcoding to internal UTF-8
315 *
316 * Returns the number of chars read and stored in the buffer, or -1
317 * in case of error.
318 */
319int
320xmlParserInputBufferRead(xmlParserInputBufferPtr in, int len) {
321 /* xmlBufferEmpty(in->buffer); */
322 return(xmlParserInputBufferGrow(in, len));
323}
324
Daniel Veillardb05deb71999-08-10 19:04:08 +0000325/*
326 * xmlParserGetDirectory:
327 * @filename: the path to a file
328 *
329 * lookup the directory for that file
330 *
331 * Returns a new allocated string containing the directory, or NULL.
332 */
333char *
334xmlParserGetDirectory(const char *filename) {
335 char *ret = NULL;
336 char dir[1024];
337 char *cur;
338 char sep = '/';
339
340 if (filename == NULL) return(NULL);
341#ifdef WIN32
342 sep = '\\';
343#endif
344
345 strncpy(dir, filename, 1023);
346 dir[1023] = 0;
347 cur = &dir[strlen(dir)];
348 while (cur > dir) {
349 if (*cur == sep) break;
350 cur --;
351 }
352 if (*cur == sep) {
353 if (cur == dir) dir[1] = 0;
354 else *cur = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000355 ret = xmlMemStrdup(dir);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000356 } else {
357 if (getcwd(dir, 1024) != NULL) {
358 dir[1023] = 0;
Daniel Veillard6454aec1999-09-02 22:04:43 +0000359 ret = xmlMemStrdup(dir);
Daniel Veillardb05deb71999-08-10 19:04:08 +0000360 }
361 }
362 return(ret);
363}
364
Daniel Veillardb96e6431999-08-29 21:02:19 +0000365/****************************************************************
366 * *
367 * External entities loading *
368 * *
369 ****************************************************************/
370
371/*
372 * xmlDefaultExternalEntityLoader:
373 * @URL: the URL for the entity to load
374 * @ID: the System ID for the entity to load
375 * @context: the context in which the entity is called or NULL
376 *
377 * By default we don't load external entitites, yet.
378 * TODO: get a sample http implementation and scan for existing one
379 * at compile time.
380 *
381 * Returns a new allocated xmlParserInputPtr, or NULL.
382 */
383static
384xmlParserInputPtr
385xmlDefaultExternalEntityLoader(const char *URL, const char *ID,
386 xmlParserInputPtr context) {
387#ifdef DEBUG_EXTERNAL_ENTITIES
388 fprintf(stderr, "xmlDefaultExternalEntityLoader(%s, xxx)\n", URL);
389#endif
390 return(NULL);
391}
392
393static xmlExternalEntityLoader xmlCurrentExternalEntityLoader =
394 xmlDefaultExternalEntityLoader;
395
396/*
397 * xmlSetExternalEntityLoader:
398 * @f: the new entity resolver function
399 *
400 * Changes the defaultexternal entity resolver function for the application
401 */
402void
403xmlSetExternalEntityLoader(xmlExternalEntityLoader f) {
404 xmlCurrentExternalEntityLoader = f;
405}
406
407/*
408 * xmlGetExternalEntityLoader:
409 *
410 * Get the default external entity resolver function for the application
411 *
412 * Returns the xmlExternalEntityLoader function pointer
413 */
414xmlExternalEntityLoader
415xmlGetExternalEntityLoader(void) {
416 return(xmlCurrentExternalEntityLoader);
417}
418
419/*
420 * xmlLoadExternalEntity:
421 * @URL: the URL for the entity to load
422 * @ID: the System ID for the entity to load
423 * @context: the context in which the entity is called or NULL
424 *
425 * Load an external entity, note that the use of this function for
426 * unparsed entities may generate problems
427 * TODO: a more generic External entitiy API must be designed
428 *
429 * Returns the xmlParserInputPtr or NULL
430 */
431xmlParserInputPtr
432xmlLoadExternalEntity(const char *URL, const char *ID,
433 xmlParserInputPtr context) {
434 return(xmlCurrentExternalEntityLoader(URL, ID, context));
435}
436