PageRenderTime 482ms CodeModel.GetById 156ms app.highlight 134ms RepoModel.GetById 188ms app.codeStats 0ms

/src/ftk_xml_parser.c

http://ftk.googlecode.com/
C | 563 lines | 462 code | 71 blank | 30 comment | 108 complexity | 1d8d14ec6199398a417758016a78ff18 MD5 | raw file
  1/*
  2 * File:    ftk_xml_parser.c
  3 * Author:  Li XianJing <xianjimli@hotmail.com>
  4 * Brief:   xml parser
  5 *
  6 * Copyright (c) Li XianJing
  7 *
  8 * Licensed under the Academic Free License version 2.1
  9 *
 10 * This program is free software; you can redistribute it and/or modify
 11 * it under the terms of the GNU General Public License as published by
 12 * the Free Software Foundation; either version 2 of the License, or
 13 * (at your option) any later version.
 14 *
 15 * This program is distributed in the hope that it will be useful,
 16 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 17 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 18 * GNU General Public License for more details.
 19 *
 20 * You should have received a copy of the GNU General Public License
 21 * along with this program; if not, write to the Free Software
 22 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
 23 */
 24
 25/*
 26 * History:
 27 * ================================================================
 28 * 2009-05-30 Li XianJing <xianjimli@hotmail.com> created.
 29 *
 30 */
 31
 32#include "ftk_allocator.h"
 33#include "ftk_xml_parser.h"
 34
 35struct _FtkXmlParser
 36{
 37	const char* read_ptr;
 38
 39	int   attrs_nr;
 40	char* attrs[2*MAX_ATTR_NR+1];
 41
 42	char* buffer;
 43	int buffer_used;
 44	int buffer_total;
 45
 46	FtkXmlBuilder* builder;
 47};
 48
 49static const char* strtrim(char* str);
 50static void ftk_xml_parser_parse_entity(FtkXmlParser* thiz);
 51static void ftk_xml_parser_parse_start_tag(FtkXmlParser* thiz);
 52static void ftk_xml_parser_parse_end_tag(FtkXmlParser* thiz);
 53static void ftk_xml_parser_parse_comment(FtkXmlParser* thiz);
 54static void ftk_xml_parser_parse_pi(FtkXmlParser* thiz);
 55static void ftk_xml_parser_parse_text(FtkXmlParser* thiz);
 56static void ftk_xml_parser_reset_buffer(FtkXmlParser* thiz);
 57
 58FtkXmlParser* ftk_xml_parser_create(void)
 59{
 60	return (FtkXmlParser*)FTK_ZALLOC(sizeof(FtkXmlParser));
 61}
 62
 63void ftk_xml_parser_set_builder(FtkXmlParser* thiz, FtkXmlBuilder* builder)
 64{
 65	thiz->builder = builder;
 66
 67	return;
 68}
 69
 70void ftk_xml_parser_parse(FtkXmlParser* thiz, const char* xml, int length)
 71{
 72	int i = 0;
 73	enum _State
 74	{
 75		STAT_NONE,
 76		STAT_AFTER_LT,
 77		STAT_START_TAG,
 78		STAT_END_TAG,
 79		STAT_TEXT,
 80		STAT_PRE_COMMENT1,
 81		STAT_PRE_COMMENT2,
 82		STAT_COMMENT,
 83		STAT_PROCESS_INSTRUCTION,
 84	}state = STAT_NONE;
 85
 86	thiz->read_ptr = xml;
 87
 88	for(; *thiz->read_ptr != '\0' && (thiz->read_ptr - xml) < length; thiz->read_ptr++, i++)
 89	{
 90		char c = thiz->read_ptr[0];
 91
 92		switch(state)
 93		{
 94			case STAT_NONE:
 95			{
 96
 97				if(c == '<')
 98				{
 99					ftk_xml_parser_reset_buffer(thiz);
100					state = STAT_AFTER_LT;
101				}
102				else if(!isspace(c))
103				{
104					state = STAT_TEXT;
105				}
106				break;
107			}
108			case STAT_AFTER_LT:
109			{
110				if(c == '?')
111				{
112					state = STAT_PROCESS_INSTRUCTION;
113				}
114				else if(c == '/')
115				{
116					state = STAT_END_TAG;
117				}
118				else if(c == '!')
119				{
120					state = STAT_PRE_COMMENT1;
121				}
122				else if(isalpha(c) || c == '_')
123				{
124					state = STAT_START_TAG;
125				}
126				else
127				{
128					ftk_xml_builder_on_error(thiz->builder, 0, 0, "unexpected char");
129				}
130				break;
131			}
132			case STAT_START_TAG:
133			{
134				ftk_xml_parser_parse_start_tag(thiz);
135				state = STAT_NONE;
136				break;
137			}
138			case STAT_END_TAG:
139			{
140				ftk_xml_parser_parse_end_tag(thiz);
141				state = STAT_NONE;
142				break;
143			}
144			case STAT_PROCESS_INSTRUCTION:
145			{
146				ftk_xml_parser_parse_pi(thiz);
147				state = STAT_NONE;
148				break;
149			}
150			case STAT_TEXT:
151			{
152				ftk_xml_parser_parse_text(thiz);
153				state = STAT_NONE;
154				break;
155			}
156			case STAT_PRE_COMMENT1:
157			{
158				if(c == '-')
159				{
160					state = STAT_PRE_COMMENT2;
161				}
162				else
163				{
164					ftk_xml_builder_on_error(thiz->builder, 0, 0, "expected \'-\'");
165				}
166				break;
167			}
168			case STAT_PRE_COMMENT2:
169			{
170				if(c == '-')
171				{
172					state = STAT_COMMENT;
173				}
174				else
175				{
176					ftk_xml_builder_on_error(thiz->builder, 0, 0, "expected \'-\'");
177				}
178			}
179			case STAT_COMMENT:
180			{
181				ftk_xml_parser_parse_comment(thiz);	
182				state = STAT_NONE;
183				break;
184			}
185			default:break;
186		}
187
188		if(*thiz->read_ptr == '\0')
189		{
190			break;
191		}
192	}
193
194	return;
195}
196
197static void ftk_xml_parser_reset_buffer(FtkXmlParser* thiz)
198{
199	thiz->buffer_used = 0;
200	thiz->attrs_nr = 0;
201	thiz->attrs[0] = NULL;
202
203	return;
204}
205
206static int ftk_xml_parser_strdup(FtkXmlParser* thiz, const char* start, int length)
207{
208	int offset = -1;
209
210	if((thiz->buffer_used + length) >= thiz->buffer_total)
211	{
212		int length = thiz->buffer_total+(thiz->buffer_total>>1) + 128;
213		char* buffer = (char*)FTK_REALLOC(thiz->buffer, length);
214		if(buffer != NULL)
215		{
216			thiz->buffer = buffer;
217			thiz->buffer_total = length;
218		}
219	}
220
221	if((thiz->buffer_used + length) >= thiz->buffer_total)
222	{
223		return offset;
224	}
225
226	offset = thiz->buffer_used;
227	ftk_strncpy(thiz->buffer + offset, start, length);
228	thiz->buffer[offset + length] = '\0';
229	strtrim(thiz->buffer+offset);
230	thiz->buffer_used += length + 1;
231
232	return offset;
233}
234
235static void ftk_xml_parser_parse_attrs(FtkXmlParser* thiz, char end_char)
236{
237	int i = 0;
238	enum _State
239	{
240		STAT_PRE_KEY,
241		STAT_KEY,
242		STAT_PRE_VALUE,
243		STAT_VALUE,
244		STAT_END,
245	}state = STAT_PRE_KEY;
246
247	char value_end = '\"';
248	const char* start = thiz->read_ptr;
249
250	thiz->attrs_nr = 0;
251	for(; *thiz->read_ptr != '\0' && thiz->attrs_nr < MAX_ATTR_NR; thiz->read_ptr++)
252	{
253		char c = *thiz->read_ptr;
254	
255		switch(state)
256		{
257			case STAT_PRE_KEY:
258			{
259				if(c == end_char || c == '>')
260				{
261					state = STAT_END;
262				}
263				else if(!isspace(c))
264				{
265					state = STAT_KEY;
266					start = thiz->read_ptr;
267				}
268			}
269			case STAT_KEY:
270			{
271				if(c == '=')
272				{
273					thiz->attrs[thiz->attrs_nr++] = (char*)ftk_xml_parser_strdup(thiz, start, thiz->read_ptr - start);
274					state = STAT_PRE_VALUE;
275				}
276
277				break;
278			}
279			case STAT_PRE_VALUE:
280			{
281				if(c == '\"' || c == '\'')
282				{
283					state = STAT_VALUE;
284					value_end = c;
285					start = thiz->read_ptr + 1;
286				}
287				break;
288			}
289			case STAT_VALUE:
290			{
291				if(c == value_end)
292				{
293					thiz->attrs[thiz->attrs_nr++] = (char*)ftk_xml_parser_strdup(thiz, start, thiz->read_ptr - start);
294					state = STAT_PRE_KEY;
295				}
296			}
297			default:break;
298		}
299
300		if(state == STAT_END)
301		{
302			break;
303		}
304	}
305	
306	for(i = 0; i < thiz->attrs_nr; i++)
307	{
308		thiz->attrs[i] = thiz->buffer + (int)(thiz->attrs[i]);
309	}
310	thiz->attrs[thiz->attrs_nr] = NULL;
311
312	return;
313}
314
315static void ftk_xml_parser_parse_start_tag(FtkXmlParser* thiz)
316{
317	enum _State
318	{
319		STAT_NAME,
320		STAT_ATTR,
321		STAT_END,
322	}state = STAT_NAME;
323
324	char* tag_name = NULL;
325	const char* start = thiz->read_ptr - 1;
326
327	for(; *thiz->read_ptr != '\0'; thiz->read_ptr++)
328	{
329		char c = *thiz->read_ptr;
330	
331		switch(state)
332		{
333			case STAT_NAME:
334			{
335				if(isspace(c) || c == '>' || c == '/')
336				{
337					tag_name = (char*)ftk_xml_parser_strdup(thiz, start, thiz->read_ptr - start);
338					state = (c != '>' && c != '/') ? STAT_ATTR : STAT_END;
339				}
340				break;
341			}
342			case STAT_ATTR:
343			{
344				ftk_xml_parser_parse_attrs(thiz, '/');
345				state = STAT_END;
346
347				break;
348			}
349			default:break;
350		}
351
352		if(state == STAT_END)
353		{
354			break;
355		}
356	}
357	
358	tag_name = thiz->buffer + (int)tag_name;
359	ftk_xml_builder_on_start_element(thiz->builder, tag_name, (const char**)thiz->attrs);
360	
361	if(thiz->read_ptr[0] == '/')
362	{
363		ftk_xml_builder_on_end_element(thiz->builder, tag_name);
364	}
365
366	for(; *thiz->read_ptr != '>' && *thiz->read_ptr != '\0'; thiz->read_ptr++);
367
368	return;
369}
370
371static void ftk_xml_parser_parse_end_tag(FtkXmlParser* thiz)
372{
373	char* tag_name = NULL;
374	const char* start = thiz->read_ptr;
375	for(; *thiz->read_ptr != '\0'; thiz->read_ptr++)
376	{
377		if(*thiz->read_ptr == '>')
378		{
379			tag_name = thiz->buffer + ftk_xml_parser_strdup(thiz, start, thiz->read_ptr-start);
380			ftk_xml_builder_on_end_element(thiz->builder, tag_name);
381
382			break;
383		}
384	}
385	
386	return;
387}
388
389static void ftk_xml_parser_parse_comment(FtkXmlParser* thiz)
390{
391	enum _State
392	{
393		STAT_COMMENT,
394		STAT_MINUS1,
395		STAT_MINUS2,
396	}state = STAT_COMMENT;
397
398	const char* start = ++thiz->read_ptr;
399	for(; *thiz->read_ptr != '\0'; thiz->read_ptr++)
400	{
401		char c = *thiz->read_ptr;
402
403		switch(state)
404		{
405			case STAT_COMMENT:
406			{
407				if(c == '-')
408				{
409					state = STAT_MINUS1;
410				}
411				break;
412			}
413			case STAT_MINUS1:
414			{
415				if(c == '-')
416				{
417					state = STAT_MINUS2;
418				}
419				else
420				{
421					state = STAT_COMMENT;
422				}
423				break;
424			}
425			case STAT_MINUS2:
426			{
427				if(c == '>')
428				{
429					ftk_xml_builder_on_comment(thiz->builder, start, thiz->read_ptr-start-2);
430					return;
431				}
432			}
433			default:break;
434		}
435	}
436
437	return;
438}
439
440static void ftk_xml_parser_parse_pi(FtkXmlParser* thiz)
441{
442	enum _State
443	{
444		STAT_NAME,
445		STAT_ATTR,
446		STAT_END
447	}state = STAT_NAME;
448
449	char* tag_name = NULL;
450	const char* start = thiz->read_ptr;
451
452	for(; *thiz->read_ptr != '\0'; thiz->read_ptr++)
453	{
454		char c = *thiz->read_ptr;
455	
456		switch(state)
457		{
458			case STAT_NAME:
459			{
460				if(isspace(c) || c == '>')
461				{
462					tag_name = (char*)ftk_xml_parser_strdup(thiz, start, thiz->read_ptr - start);
463					state = c != '>' ? STAT_ATTR : STAT_END;
464				}
465
466				break;
467			}
468			case STAT_ATTR:
469			{
470				ftk_xml_parser_parse_attrs(thiz, '?');
471				state = STAT_END;
472				break;
473			}
474			default:break;
475		}
476
477		if(state == STAT_END)
478		{
479			break;
480		}
481	}
482	
483	tag_name = thiz->buffer + (int)tag_name;
484	ftk_xml_builder_on_pi_element(thiz->builder, tag_name, (const char**)thiz->attrs);	
485
486	for(; *thiz->read_ptr != '>' && *thiz->read_ptr != '\0'; thiz->read_ptr++);
487
488	return;
489}
490
491static void ftk_xml_parser_parse_text(FtkXmlParser* thiz)
492{
493	const char* start = thiz->read_ptr - 1;
494	for(; *thiz->read_ptr != '\0'; thiz->read_ptr++)
495	{
496		char c = *thiz->read_ptr;
497
498		if(c == '<')
499		{
500			if(thiz->read_ptr > start)
501			{
502				ftk_xml_builder_on_text(thiz->builder, start, thiz->read_ptr-start);
503			}
504			thiz->read_ptr--;
505			return;
506		}
507		else if(c == '&')
508		{
509			ftk_xml_parser_parse_entity(thiz);
510		}
511	}
512
513	return;
514}
515
516static void ftk_xml_parser_parse_entity(FtkXmlParser* thiz)
517{
518	/*TODO*/
519
520	return;
521}
522
523void ftk_xml_parser_destroy(FtkXmlParser* thiz)
524{
525	if(thiz != NULL)
526	{
527		FTK_FREE(thiz->buffer);
528		FTK_FREE(thiz);
529	}
530
531	return;
532}
533
534static const char* strtrim(char* str)
535{
536	char* p = NULL;
537
538	p = str + strlen(str) - 1;
539
540	while(p != str && isspace(*p)) 
541	{
542		*p = '\0';
543		p--;
544	}
545
546	p = str;
547	while(*p != '\0' && isspace(*p)) p++;
548
549	if(p != str)
550	{
551		char* s = p;
552		char* d = str;
553		while(*s != '\0')
554		{
555			*d = *s;
556			d++;
557			s++;
558		}
559		*d = '\0';
560	}
561
562	return str;
563}