/* $Id$ */ /* Copyright (c) 2010-2021 Pierre Pronchery */ /* This file is part of DeforaOS System libParser */ /* This program is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, version 3 of the License. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program. If not, see . */ #include #include #include #include #include #include #include "System/Parser/XML.h" #ifdef DEBUG # define DEBUG_CALLBACK() fprintf(stderr, "DEBUG: %s()\n", __func__) #else # define DEBUG_CALLBACK() #endif /* XML */ /* private */ /* types */ typedef enum _XMLContext { XML_CONTEXT_DATA, XML_CONTEXT_TAG, XML_CONTEXT_TAG_ATTRIBUTES, XML_CONTEXT_TAG_ATTRIBUTES_VALUE } XMLContext; #define XML_CONTEXT_TAG_FIRST XML_CONTEXT_TAG #define XML_CONTEXT_TAG_LAST XML_CONTEXT_TAG_ATTRIBUTES_VALUE struct _XML { XMLPrefs prefs; XMLDocument * document; /* parsing */ Parser * parser; XMLContext context; char * inject; }; typedef enum _XMLCode { XML_CODE_DATA, XML_CODE_ENTITY, XML_CODE_TAG_ATTRIBUTE, XML_CODE_TAG_ATTRIBUTE_VALUE, XML_CODE_TAG_CLOSE, XML_CODE_TAG_ENTER, XML_CODE_TAG_LEAVE, XML_CODE_TAG_NAME, XML_CODE_TAG_SPECIAL } XMLCode; /* prototypes */ static int _xml_inject(XML * xml, char const * string); /* attribute */ static XMLAttribute * _xml_attribute_new(char const * name, char const * value); static void _xml_attribute_delete(XMLAttribute * attribute); static int _xml_attribute_set_value(XMLAttribute * attribute, char const * value); /* callbacks */ static int _xml_callback_data(Parser * parser, Token * token, int c, void * data); static int _xml_callback_entity(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_attribute(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_attribute_value(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_close(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_enter(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_leave(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_name(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_special(Parser * parser, Token * token, int c, void * data); static int _xml_callback_tag_whitespace(Parser * parser, Token * token, int c, void * data); /* document */ static XMLDocument * _xml_document_new(XMLNode * node); static void _xml_document_delete(XMLDocument * document); /* filters */ static int _xml_filter_comment(int * c, void * data); static int _xml_filter_inject(int * c, void * data); static int _xml_filter_whitespace(int * c, void * data); /* node */ static XMLNode * _xml_node_new(XMLNodeType type, XMLNodeTag * parent); static XMLNode * _xml_node_new_data(XMLNodeTag * parent, char const * buffer, size_t size); static XMLNode * _xml_node_new_entity(XMLNodeTag * parent, char const * name); static XMLNode * _xml_node_new_tag(XMLNodeTag * parent, char const * name); static void _xml_node_delete(XMLNode * node); static char const * _xml_node_tag_get_name(XMLNodeTag * node); static int _xml_node_tag_add_attribute(XMLNodeTag * node, XMLAttribute * attribute); static int _xml_node_tag_add_child(XMLNodeTag * node, XMLNode * child); /* public */ /* functions */ /* xml_new */ static XML * _new_do(XMLPrefs * prefs, char const * pathname, char const * string, size_t length); XML * xml_new(XMLPrefs * prefs, char const * pathname) { return _new_do(prefs, pathname, NULL, 0); } static XML * _new_do(XMLPrefs * prefs, char const * pathname, char const * string, size_t length) { XML * xml; if((xml = object_new(sizeof(*xml))) == NULL) return NULL; if(prefs != NULL) memcpy(&xml->prefs, prefs, sizeof(xml->prefs)); else memset(&xml->prefs, 0, sizeof(xml->prefs)); xml->document = NULL; if(pathname != NULL) xml->parser = parser_new(pathname); else xml->parser = parser_new_string(string, length); xml->context = XML_CONTEXT_DATA; xml->inject = NULL; if(xml->parser == NULL) { xml_delete(xml); return NULL; } parser_add_filter(xml->parser, _xml_filter_inject, xml); if((xml->prefs.filters & XML_FILTER_WHITESPACE) == XML_FILTER_WHITESPACE) parser_add_filter(xml->parser, _xml_filter_whitespace, xml); /* FIXME filter out comments only optionally */ parser_add_filter(xml->parser, _xml_filter_comment, xml); parser_add_callback(xml->parser, _xml_callback_tag_whitespace, xml); parser_add_callback(xml->parser, _xml_callback_tag_special, xml); parser_add_callback(xml->parser, _xml_callback_tag_name, xml); parser_add_callback(xml->parser, _xml_callback_tag_attribute, xml); parser_add_callback(xml->parser, _xml_callback_tag_attribute_value, xml); parser_add_callback(xml->parser, _xml_callback_tag_close, xml); parser_add_callback(xml->parser, _xml_callback_tag_enter, xml); parser_add_callback(xml->parser, _xml_callback_tag_leave, xml); parser_add_callback(xml->parser, _xml_callback_entity, xml); parser_add_callback(xml->parser, _xml_callback_data, xml); return xml; } /* xml_new_string */ XML * xml_new_string(XMLPrefs * prefs, char const * string, size_t length) { return _new_do(prefs, NULL, string, length); } /* xml_delete */ void xml_delete(XML * xml) { if(xml->document != NULL) _xml_document_delete(xml->document); if(xml->parser != NULL) parser_delete(xml->parser); object_delete(xml); } /* accessors */ /* xml_get_document */ static int _document_data(Token * token, XMLNodeTag * current); static int _document_entity(Token * token, XMLNodeTag * current); static int _document_tag_attribute(Token * token, XMLNodeTag * node, XMLAttribute ** attribute); static int _document_tag_attribute_value(Token * token, XMLAttribute * attribute); static int _document_tag_close(Token * token, XMLNodeTag ** current); static int _document_tag_open(XML * xml, Token * token, XMLNodeTag ** current); XMLDocument * xml_get_document(XML * xml) { Token * token = NULL; XMLCode code; XMLNodeTag * current = NULL; XMLAttribute * attribute = NULL; enum TagState { TS_UNKNOWN = 0, TS_OPEN, TS_CLOSE, TS_SHORT } closed = TS_UNKNOWN; if(xml->document != NULL) return xml->document; if((xml->document = _xml_document_new(NULL)) == NULL) return NULL; for(; parser_get_token(xml->parser, &token) == 0 && token != NULL; token_delete(token)) { #ifdef DEBUG fprintf(stderr, "DEBUG: %s() code=%u string \"%s\" closed=%d\n", __func__, token_get_code(token), token_get_string(token), closed); #endif switch((code = token_get_code(token))) { case XML_CODE_DATA: _document_data(token, current); break; case XML_CODE_ENTITY: _document_entity(token, current); break; case XML_CODE_TAG_ATTRIBUTE: _document_tag_attribute(token, current, &attribute); break; case XML_CODE_TAG_ATTRIBUTE_VALUE: _document_tag_attribute_value(token, attribute); break; case XML_CODE_TAG_CLOSE: closed = (closed == TS_OPEN) ? TS_SHORT : TS_CLOSE; break; case XML_CODE_TAG_ENTER: break; case XML_CODE_TAG_LEAVE: if(closed == TS_SHORT) current = current->parent; closed = TS_UNKNOWN; break; case XML_CODE_TAG_NAME: if(closed == TS_CLOSE) { _document_tag_close(token, ¤t); break; } closed = TS_OPEN; _document_tag_open(xml, token, ¤t); break; case XML_CODE_TAG_SPECIAL: break; } } return xml->document; } static int _document_data(Token * token, XMLNodeTag * current) { XMLNode * node; String const * string; size_t size = 0; if(current == NULL) return -1; if((string = token_get_string(token)) != NULL) size = string_get_length(string); node = _xml_node_new_data(current, string, size); return _xml_node_tag_add_child(current, node); } static int _document_entity(Token * token, XMLNodeTag * current) { XMLNode * node; String const * string; if(current == NULL) return -1; if((string = token_get_string(token)) == NULL) return -1; if((node = _xml_node_new_entity(current, string)) == NULL) return -1; return _xml_node_tag_add_child(current, node); } static int _document_tag_attribute(Token * token, XMLNodeTag * current, XMLAttribute ** attribute) { if(current == NULL) return -1; if((*attribute = _xml_attribute_new(token_get_string(token), NULL)) == NULL) return -1; return _xml_node_tag_add_attribute(current, *attribute); } static int _document_tag_attribute_value(Token * token, XMLAttribute * attribute) { if(attribute == NULL) return -1; return _xml_attribute_set_value(attribute, token_get_string(token)); } static int _document_tag_close(Token * token, XMLNodeTag ** current) { char const * parent; #ifdef DEBUG fprintf(stderr, "DEBUG: %s(\"%s\")\n", __func__, token_get_string(token)); #endif if(*current == NULL) return -1; /* XXX the document is malformed */ parent = _xml_node_tag_get_name(*current); if(strcmp(parent, token_get_string(token)) != 0) return -1; /* XXX the document is malformed */ *current = (*current)->parent; return 0; } static int _document_tag_open(XML * xml, Token * token, XMLNodeTag ** current) { XMLNode * node; if((node = _xml_node_new_tag(*current, token_get_string(token))) == NULL) return -1; if(*current == NULL) xml->document->root = node; else _xml_node_tag_add_child(*current, node); *current = &node->tag; return 0; } /* xml_get_filename */ char const * xml_get_filename(XML * xml) { return parser_get_filename(xml->parser); } /* node */ /* xml_node_get_attribute_value_by_name */ char const * xml_node_get_attribute_value_by_name(XMLNode * node, char const * name) { size_t i; if(node->type != XML_NODE_TYPE_TAG) return NULL; for(i = 0; i < node->tag.attributes_cnt; i++) if(strcmp(node->tag.attributes[i]->name, name) == 0) return node->tag.attributes[i]->value; return NULL; } /* private */ /* functions */ /* xml_inject */ static int _xml_inject(XML * xml, char const * string) { if(string == NULL || string[0] == '\0') return 0; /* don't bother */ if(xml->inject == NULL) { if((xml->inject = string_new(string)) == NULL) return -1; } else if(string_append(&xml->inject, string) != 0) return -1; #ifdef DEBUG fprintf(stderr, "DEBUG: %s(%p, \"%s\") => \"%s\"\n", __func__, (void *)xml, string, xml->inject); #endif return 0; } /* attribute */ /* xml_attribute_new */ static XMLAttribute * _xml_attribute_new(char const * name, char const * value) { XMLAttribute * attribute; if(name == NULL) { error_set_code(1, "%s", strerror(EINVAL)); return NULL; } if(value == NULL) value = name; if((attribute = object_new(sizeof(*attribute))) == NULL) return NULL; attribute->name = string_new(name); attribute->value = string_new(value); if(attribute->name == NULL || attribute->value == NULL) { _xml_attribute_delete(attribute); return NULL; } return attribute; } /* xml_attribute_delete */ static void _xml_attribute_delete(XMLAttribute * attribute) { string_delete(attribute->name); string_delete(attribute->value); object_delete(attribute); } /* xml_attribute_set_value */ static int _xml_attribute_set_value(XMLAttribute * attribute, char const * value) { char * v; if((v = string_new(value)) == NULL) return -1; string_delete(attribute->value); attribute->value = v; return 0; } /* callbacks */ /* xml_callback_data */ static int _xml_callback_data(Parser * parser, Token * token, int c, void * data) { XML * xml = data; char * string = NULL; size_t len = 0; char * p; if(xml->context != XML_CONTEXT_DATA) return -1; while(c != EOF && c != '<') { if((p = realloc(string, len + 2)) == NULL) return -1; /* XXX report error */ string = p; string[len++] = c; c = parser_scan_filter(parser); if(c == '&') break; } if(len == 0) return -1; DEBUG_CALLBACK(); token_set_code(token, XML_CODE_DATA); string[len] = '\0'; token_set_string(token, string); free(string); return 0; } /* xml_callback_entity */ static int _xml_callback_entity(Parser * parser, Token * token, int c, void * data) { XML * xml = data; char * string = NULL; size_t len = 0; char * p; if(xml->context != XML_CONTEXT_DATA || c != '&') return -1; for(c = parser_scan_filter(parser); c != EOF && c != '<'; len++) { if((p = realloc(string, len + 2)) == NULL) return -1; /* XXX report error */ string = p; string[len] = c; c = parser_scan_filter(parser); if(string[len] == ';') break; } if(len == 0) return -1; DEBUG_CALLBACK(); token_set_code(token, XML_CODE_ENTITY); string[len] = '\0'; token_set_string(token, string); free(string); return 0; } /* xml_callback_tag_attribute */ static int _xml_callback_tag_attribute(Parser * parser, Token * token, int c, void * data) { XML * xml = data; int q = '\0'; char * string = NULL; size_t len = 0; char * p; if(xml->context != XML_CONTEXT_TAG_ATTRIBUTES || (!isalnum(c) && c != '"')) return -1; if(c == '"') { q = c; c = parser_scan_filter(parser); } while(c != EOF && (isalnum(c) || c == ':' || c == '-' || (q != '\0' && c != q))) { if((p = realloc(string, len + 2)) == NULL) return -1; /* XXX report error */ string = p; string[len++] = c; c = parser_scan_filter(parser); } if(len == 0) return -1; if(q != '\0') parser_scan_filter(parser); DEBUG_CALLBACK(); token_set_code(token, XML_CODE_TAG_ATTRIBUTE); string[len] = '\0'; token_set_string(token, string); free(string); if(c == '=') xml->context = XML_CONTEXT_TAG_ATTRIBUTES_VALUE; return 0; } /* xml_callback_tag_attribute_value */ static int _xml_callback_tag_attribute_value(Parser * parser, Token * token, int c, void * data) { XML * xml = data; int q = '\0'; char * string = NULL; size_t len = 0; char * p; if(xml->context != XML_CONTEXT_TAG_ATTRIBUTES_VALUE) return -1; if(c != '=') return -1; DEBUG_CALLBACK(); if((c = parser_scan_filter(parser)) == '\'' || c == '"') { q = c; c = parser_scan_filter(parser); } while(c != EOF && ((q == '\0' && isalnum(c)) || (q != '\0' && c != q))) { if((p = realloc(string, len + 2)) == NULL) return -1; /* XXX report error */ string = p; string[len++] = c; c = parser_scan_filter(parser); } if(q != '\0') parser_scan_filter(parser); token_set_code(token, XML_CODE_TAG_ATTRIBUTE_VALUE); if(len == 0) token_set_string(token, ""); else { string[len] = '\0'; token_set_string(token, string); free(string); } xml->context = XML_CONTEXT_TAG_ATTRIBUTES; return 0; } /* xml_callback_tag_close */ static int _xml_callback_tag_close(Parser * parser, Token * token, int c, void * data) { XML * xml = data; if(c != '/') return -1; if(xml->context < XML_CONTEXT_TAG_FIRST || xml->context > XML_CONTEXT_TAG_LAST) return -1; DEBUG_CALLBACK(); parser_scan_filter(parser); token_set_code(token, XML_CODE_TAG_CLOSE); token_set_string(token, "/"); return 0; } /* xml_callback_tag_enter */ static int _xml_callback_tag_enter(Parser * parser, Token * token, int c, void * data) { XML * xml = data; if(xml->context != XML_CONTEXT_DATA || c != '<') return -1; DEBUG_CALLBACK(); parser_scan_filter(parser); xml->context = XML_CONTEXT_TAG; token_set_code(token, XML_CODE_TAG_ENTER); token_set_string(token, "<"); return 0; } /* xml_callback_tag_leave */ static int _xml_callback_tag_leave(Parser * parser, Token * token, int c, void * data) { XML * xml = data; if(c != '>') return -1; if(xml->context < XML_CONTEXT_TAG_FIRST || xml->context > XML_CONTEXT_TAG_LAST) return -1; DEBUG_CALLBACK(); parser_scan_filter(parser); xml->context = XML_CONTEXT_DATA; token_set_code(token, XML_CODE_TAG_LEAVE); token_set_string(token, ">"); return 0; } /* xml_callback_tag_name */ static int _xml_callback_tag_name(Parser * parser, Token * token, int c, void * data) { XML * xml = data; char * string = NULL; size_t len = 0; char * p; if(xml->context != XML_CONTEXT_TAG || !isalnum(c)) return -1; DEBUG_CALLBACK(); do { if((p = realloc(string, len + 2)) == NULL) return -1; /* XXX report error */ string = p; string[len++] = c; } while((c = parser_scan_filter(parser)) != EOF && c != '<' && c != '!' && c != '?' && c != '/' && c != '=' && c != '>' && !isspace(c)); token_set_code(token, XML_CODE_TAG_NAME); string[len] = '\0'; token_set_string(token, string); free(string); xml->context = XML_CONTEXT_TAG_ATTRIBUTES; return 0; } /* xml_callback_tag_special */ /* FIXME decompose this function in at least two different ones */ static int _xml_callback_tag_special(Parser * parser, Token * token, int c, void * data) { XML * xml = data; char buf[2] = { '\0', '\0' }; if((xml->context != XML_CONTEXT_TAG && xml->context != XML_CONTEXT_TAG_ATTRIBUTES) || (c != '?' && c != '!')) return -1; DEBUG_CALLBACK(); buf[0] = c; parser_scan_filter(parser); token_set_code(token, XML_CODE_TAG_SPECIAL); token_set_string(token, buf); return 0; } /* xml_callback_tag_whitespace */ static int _xml_callback_tag_whitespace(Parser * parser, Token * token, int c, void * data) { XML * xml = data; (void) token; if(!isspace(c)) return -1; if(xml->context < XML_CONTEXT_TAG_FIRST || xml->context > XML_CONTEXT_TAG_LAST) return -1; DEBUG_CALLBACK(); while(isspace(c)) c = parser_scan_filter(parser); if(xml->context == XML_CONTEXT_TAG_ATTRIBUTES_VALUE) xml->context = XML_CONTEXT_TAG_ATTRIBUTES; return -1; } /* document */ /* xml_document_new */ static XMLDocument * _xml_document_new(XMLNode * node) { XMLDocument * document; if(node != NULL && node->type != XML_NODE_TYPE_TAG) { error_set_code(1, "%s", strerror(EINVAL)); return NULL; } if((document = object_new(sizeof(*document))) == NULL) return NULL; document->root = node; return document; } /* xml_document_delete */ static void _xml_document_delete(XMLDocument * document) { if(document->root != NULL) _xml_node_delete(document->root); object_delete(document); } /* filters */ /* xml_filter_comment */ static int _xml_filter_comment(int * c, void * data) { XML * xml = data; char start[5] = "