From c7be629d44d4e2be6c8116796714e0042a977885 Mon Sep 17 00:00:00 2001 From: David Sommerseth Date: Tue, 28 Apr 2009 11:12:58 +0200 Subject: Added generic XML -> Python parser The xmlpythonizer module will convert any XML data (xmlNode or xmlDoc) and format it as a Python object. The formatting is defined in it's own XML file. Basic format: The keytype and key attributes defines the key in the Python Dict. The root element will always be a Python Dict structure. The valid key types are: * constant Uses the value in {key} as the key value * string, integer, float Uses a string value from the data XML to be converted to Python. The value set in the key attribute defines an XPath value which points to the data to be used as a Python dict key. Since Python only supports C strings in the C interface for Python dict keys, integer and float will be treated as strings. The valuetype and value attributes are similar to the keys, but with some more features. Valid valuetypes are: * constant The value given in the value attribute will be used in the value in the Python result. * string, integer, float The value given in the value attribute defines the XPath to the data XML, of where to retrieve the value for the given key. The valuetype defines if the data should be understood as a string, integer or float in the Python result. * list:string, list:integer, list:float This does the same as the string, integer or float type, with a tweak. The data will be put into a list. If the XPath given returns multiple nodes, all of them will be added to this list. * dict The dict valuetype is more special. It should not contain any value attribute. On the other hand, it should contain a sub-level of tags. In this way, you can build up a multi dimensional Python dict. Example: ** pythonmap.xml ** ** exampledata.xml ** String value #1 More test data Value1 in list Value2 in list Value3 in list ** C code snippet ** void xmlpythonizer() { xmlDoc *xmlmap = NULL; xmlDoc *xmldata = NULL; ptzMAP *mapping = NULL; PyObject *pythondata = NULL; // Read XML files xmlmap = xmlReadFile("pythonmap.xml", NULL, 0); xmldata = xmlReadFile("exampledata.xml", NULL, 0); // Parse the mapping XML mapping = dmiMAP_ParseMappingXML(xmlmap, "example_map"); // Parse the xmldata into a Python object pythondata = pythonizeXMLdoc(mapping, xmldata); // ..... the program continues to do something useful } The result stored inside the pythondata object should now be something similar to: {'DemoCase': 'XML Pythonizing', 'String1': 'String value #1', 'AttribString1: 1234, 'TestData': {'Value1': 'More test data', 'ValueList': ['Value1 in list','Value2 in list','Value3 in list']} } --- src/xmlpythonizer.c | 522 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 src/xmlpythonizer.c (limited to 'src/xmlpythonizer.c') diff --git a/src/xmlpythonizer.c b/src/xmlpythonizer.c new file mode 100644 index 0000000..92e0d7f --- /dev/null +++ b/src/xmlpythonizer.c @@ -0,0 +1,522 @@ +/* Converts XML docs and nodes to Python dicts and lists by + * using an XML file which describes the Python dict layout + * + * Copyright 2009 David Sommerseth + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * For the avoidance of doubt the "preferred form" of this code is one which + * is in an open unpatent encumbered format. Where cryptographic key signing + * forms part of the process of creating an executable the information + * including keys needed to generate an equivalently functional executable + * are deemed to be part of the source code. + */ + +#include + +#include +#include +#include +#include + +#include +#include + +#include "dmixml.h" +#include "xmlpythonizer.h" + +ptzMAP *ptzmap_Add(const ptzMAP *chain, + ptzTYPES ktyp, const char *key, + ptzTYPES vtyp, const char *value, + ptzMAP *child) +{ + ptzMAP *ret = NULL; + + assert( (ktyp == ptzCONST) || (ktyp == ptzSTR) || (ktyp == ptzINT) || (ktyp == ptzFLOAT) ); + assert( key != NULL ); + // Make sure that value and child are not used together + assert( ((value == NULL) && child != NULL) || ((value != NULL) && (child == NULL)) ); + + ret = (ptzMAP *) malloc(sizeof(ptzMAP)+2); + assert( ret != NULL ); + memset(ret, 0, sizeof(ptzMAP)+2); + + ret->type_key = ktyp; + ret->key = strdup(key); + + ret->type_value = vtyp; + if( value != NULL ) { + ret->value = strdup(value); + ret->child = NULL; + } else if( child != NULL ) { + ret->value = NULL; + ret->child = child; + } + + if( chain != NULL ) { + ret->next = (ptzMAP *) chain; + } + return ret; +}; + +#define ptzmap_Free(ptr) { ptzmap_Free_func(ptr); ptr = NULL; } +void ptzmap_Free_func(ptzMAP *ptr) +{ + if( ptr == NULL ) { + return; + } + + free(ptr->key); + ptr->key = NULL; + + if( ptr->value != NULL ) { + free(ptr->value); + ptr->value = NULL; + } + + if( ptr->child != NULL ) { + ptzmap_Free(ptr->child); + } + if( ptr->next != NULL ) { + ptzmap_Free(ptr->next); + } + free(ptr); +} + + +#if 1 +// DEBUG FUNCTIONS +static const char *ptzTYPESstr[] = { "ptzCONST", "ptzSTR", "ptzINT", "ptzFLOAT", + "ptzLIST_STR", "ptzLIST_INT", "ptzLIST_FLOAT", + "ptzDICT", NULL }; + +void indent(int lvl) +{ + int i = 0; + if( lvl == 0 ) { + return; + } + + for( i = 0; i < (lvl * 3); i++ ) { + printf(" "); + } +} + +#define ptzmap_Dump(ptr) { ptzmap_Dump_func(ptr, 0); } +void ptzmap_Dump_func(const ptzMAP *ptr, int level) +{ + if( ptr == NULL ) { + return; + } + + indent(level); printf("key type: (%i) %-13.13s - key: %s\n", + ptr->type_key, ptzTYPESstr[ptr->type_key], ptr->key); + indent(level); printf("value type: (%i) %-13.13s - value: %s\n", + ptr->type_value, ptzTYPESstr[ptr->type_value], ptr->value); + if( ptr->child != NULL ) { + indent(level); printf(" ** CHILD\n"); + ptzmap_Dump_func(ptr->child, level + 1); + indent(level); printf(" ** ---------\n"); + } + if( ptr->next != NULL ) { + printf("\n"); + ptzmap_Dump_func(ptr->next, level); + } +} +#endif // END OF DEBUG FUNCTIONS + +// +// Parser for the XML -> Python mapping XML file +// +// This mappipng XML file describes how the Python result +// should look like and where it should pick the data from +// when later on parsing the dmidecode XML data. +// + +// Valid key and value types for the mapping file +inline ptzTYPES _convert_maptype(const char *str) { + if( strcmp(str, "string") == 0 ) { + return ptzSTR; + } else if( strcmp(str, "constant") == 0 ) { + return ptzCONST; + } else if( strcmp(str, "integer") == 0 ) { + return ptzINT; + } else if( strcmp(str, "float") == 0 ) { + return ptzFLOAT; + } else if( strcmp(str, "list:string") == 0 ) { + return ptzLIST_STR; + } else if( strcmp(str, "list:integer") == 0 ) { + return ptzLIST_INT; + } else if( strcmp(str, "list:float") == 0 ) { + return ptzLIST_FLOAT; + } else if( strcmp(str, "dict") == 0 ) { + return ptzDICT; + } else { + fprintf(stderr, "Unknown field type: %s - defaulting to 'string'\n", str); + return ptzSTR; + } +} + +// Internal parser +ptzMAP *_do_dmimap_parsing(xmlNode *node) { + ptzMAP *retmap = NULL; + xmlNode *ptr_n = NULL, *map_n = NULL;; + + // Go to the next XML_ELEMENT_NODE + for( map_n = node; map_n != NULL; map_n = map_n->next ) { + if( map_n->type == XML_ELEMENT_NODE ) { + break; + } + } + if( map_n == NULL ) { + return NULL; + } + + // Go to the first node + if( xmlStrcmp(node->name, (xmlChar *) "Map") != 0 ) { + map_n = dmixml_FindNode(node, "Map"); + if( map_n == NULL ) { + return NULL; + } + } + + // Loop through it's children + for( ptr_n = map_n ; ptr_n != NULL; ptr_n = ptr_n->next ) { + ptzTYPES type_key, type_value; + char *key = NULL, *value = NULL; + + if( ptr_n->type != XML_ELEMENT_NODE ) { + continue; + } + + // Get the attributes defining key, keytype, value and valuetype + key = dmixml_GetAttrValue(ptr_n, "key"); + type_key = _convert_maptype(dmixml_GetAttrValue(ptr_n, "keytype")); + + value = dmixml_GetAttrValue(ptr_n, "value"); + type_value = _convert_maptype(dmixml_GetAttrValue(ptr_n, "valuetype")); + + if( type_value == ptzDICT ) { + // When value type is ptzDICT, traverse the children nodes + // - should contain another Map set instead of a value attribute + if( ptr_n->children == NULL ) { + continue; + } + // Recursion + retmap = ptzmap_Add(retmap, type_key, key, type_value, NULL, + _do_dmimap_parsing(ptr_n->children->next)); + } else { + // Append the value as a normal value when the + // value type is not a Python Dict + retmap = ptzmap_Add(retmap, type_key, key, type_value, value, NULL); + } + value = NULL; + key = NULL; + } + return retmap; +} + +// Main parser function for the mapping XML +ptzMAP *dmiMAP_ParseMappingXML(xmlDoc *xmlmap, const char *mapname) { + ptzMAP *map = NULL; + xmlNode *node = NULL; + + // Find the root tag and locate our mapping + node = xmlDocGetRootElement(xmlmap); + assert( node != NULL ); + + // Verify that the root node got the right name + if( (node == NULL) + || (xmlStrcmp(node->name, (xmlChar *) "dmidecode_fieldmap") != 0 )) { + fprintf(stderr, "Invalid XML-Python mapping file\n"); + return NULL; + } + + // Verify that it's of a version we support + if( strcmp(dmixml_GetAttrValue(node, "version"), "1") != 0 ) { + fprintf(stderr, "Unsupported XML-Python mapping file format\n"); + return NULL; + } + + // Find the section matching our request (mapname) + for( node = node->children->next; node != NULL; node = node->next ) { + if( xmlStrcmp(node->name, (xmlChar *) "Mapping") == 0) { + char *name = dmixml_GetAttrValue(node, "name"); + if( (name != NULL) && (strcmp(name, mapname) == 0) ) { + break; + } + } + } + + if( node == NULL ) { + fprintf(stderr, "No mapping for '%s' was found " + "in the XML-Python mapping file\n", mapname); + return NULL; + } + + // Start creating an internal map structure based on the mapping XML. + map = _do_dmimap_parsing(node); + + return map; +} + + +// +// Parser routines for converting XML data into Python structures +// + +inline PyObject *StringToPyObj(ptzTYPES type, const char *str) { + PyObject *value; + + switch( type ) { + case ptzINT: + case ptzLIST_INT: + value = PyInt_FromLong((str != NULL ? atoi(str) : 0)); + break; + + case ptzFLOAT: + case ptzLIST_FLOAT: + value = PyFloat_FromDouble((str != NULL ? atof(str) : 0)); + break; + + case ptzSTR: + case ptzLIST_STR: + value = PyString_FromString(str); + break; + + default: + fprintf(stderr, "Invalid type '%i' for value '%s'\n", type, str); + value = Py_None; + } + return value; +} + + +// Retrieve a value from the XML doc (XPath Context) based on a XPath query +xmlXPathObject *_get_xpath_values(xmlXPathContext *xpctx, const char *xpath) { + xmlChar *xp_xpr = NULL; + xmlXPathObject *xp_obj = NULL; + + if( xpath == NULL ) { + return NULL; + } + + xp_xpr = xmlCharStrdup(xpath); + xp_obj = xmlXPathEvalExpression(xp_xpr, xpctx); + assert( xp_obj != NULL ); + free(xp_xpr); + + if( (xp_obj->nodesetval == NULL) || (xp_obj->nodesetval->nodeNr == 0) ) { + xmlXPathFreeObject(xp_obj); + return NULL; + } + + return xp_obj; +} + +// Internal XML parser routine, which traverses the given mapping table, +// returning a Python structure accordingly to the map. +PyObject *_do_pythonizeXML(ptzMAP *in_map, xmlXPathContext *xpctx, int lvl) { + ptzMAP *map_p = NULL; + PyObject *retdata = NULL; + int i = 0; + + retdata = PyDict_New(); + for( map_p = in_map; map_p != NULL; map_p = map_p->next ) { + xmlXPathObject *xpobj = NULL; + char *key = NULL; + PyObject *value = NULL; + + // Get key value + switch( map_p->type_key ) { + case ptzCONST: + key = map_p->key; + break; + + case ptzSTR: + case ptzINT: + case ptzFLOAT: + xpobj = _get_xpath_values(xpctx, map_p->key); + if( xpobj != NULL ) { + key = dmixml_GetContent(xpobj->nodesetval->nodeTab[0]); + xmlXPathFreeObject(xpobj); + } + break; + default: + fprintf(stderr, "Unknown key type: %i\n", map_p->type_key); + return Py_None; + break; + } + + // Get 'value' value + switch( map_p->type_value ) { + case ptzCONST: + value = PyString_FromString(map_p->value); + break; + + case ptzSTR: + case ptzINT: + case ptzFLOAT: + xpobj = _get_xpath_values(xpctx, map_p->value); + if( xpobj != NULL ) { + value = StringToPyObj(map_p->type_value, + dmixml_GetContent(xpobj->nodesetval->nodeTab[0])); + xmlXPathFreeObject(xpobj); + } + break; + + case ptzLIST_STR: + case ptzLIST_INT: + case ptzLIST_FLOAT: + xpobj = _get_xpath_values(xpctx, map_p->value); + value = PyList_New(0); + if( xpobj != NULL ) { + for( i = 0; i < xpobj->nodesetval->nodeNr; i++ ) { + char *valstr = dmixml_GetContent(xpobj->nodesetval->nodeTab[i]); + PyList_Append(value, StringToPyObj(map_p->type_value, valstr)); + } + xmlXPathFreeObject(xpobj); + } + break; + + case ptzDICT: + // Traverse the children to get the value of this element + value = _do_pythonizeXML(map_p->child, xpctx, lvl+1); + Py_DECREF(value); + break; + + default: + fprintf(stderr, "Unknown value type: %i\n", map_p->type_value); + free(key); key = NULL; + return Py_None; + break; + } + + PyDict_SetItemString(retdata, key, value); + Py_DECREF(value); + } + Py_INCREF(retdata); + return retdata; +} + +// Convert a xmlDoc to a Python object, based on the given map +PyObject *pythonizeXMLdoc(ptzMAP *map, xmlDoc *xmldoc) +{ + xmlXPathContext *xp_ctx = NULL; + PyObject *retdata = NULL; + + // Prepare a XPath context for XPath queries + xp_ctx = xmlXPathNewContext(xmldoc); + assert( xp_ctx != NULL ); + + // Parse the XML and create Python data + retdata = _do_pythonizeXML(map, xp_ctx, 0); + + // Clean up and return data + xmlXPathFreeContext(xp_ctx); + + return retdata; +} + +// Convert a xmlNode to a Python object, based on the given map +PyObject *pythonizeXMLnode(ptzMAP *map, xmlNode *nodes) +{ + xmlDoc *xmldoc = NULL; + PyObject *retdata = NULL; + + // Create our own internal XML doc and + // copy over the input nodes to our internal doc. + // This is needed as the XPath parser in libxml2 + // only works with xmlDoc. + xmldoc = xmlNewDoc((xmlChar *) "1.0"); + assert( xmldoc != NULL ); + xmlDocSetRootElement(xmldoc, xmlCopyNode(nodes, 1)); + + // Parse the internal xmlDoc + retdata = pythonizeXMLdoc(map, xmldoc); + + // Clean up and return data + xmlFreeDoc(xmldoc); + return retdata; +} + + + +#if 0 +// Simple independent main function - only for debugging +int main() { + xmlDoc *doc = NULL; + ptzMAP *map = NULL; + + doc = xmlReadFile("pythonmap.xml", NULL, 0); + assert( doc != NULL ); + + map = dmiMAP_ParseMappingXML(doc, "BIOSLanguage"); + ptzmap_Dump(map); + + ptzmap_Free(map); + xmlFreeDoc(doc); + + return 0; +} +#endif + +#if 0 +// Simple test module for Python - only for debugging +PyObject* demo_xmlpy() +{ + xmlDoc *doc = NULL, *mapping_xml = NULL; + ptzMAP *mapping = NULL; + PyObject *ret = NULL; + + // Read the XML-Python mapping setup + mapping_xml = xmlReadFile("pythonmap.xml", NULL, 0); + assert( mapping_xml != NULL ); + + mapping = dmiMAP_ParseMappingXML(mapping_xml, "BIOSLanguage"); + assert( mapping != NULL ); + + // Read XML data from file + doc = xmlReadFile("test.xml", NULL, 0); + assert( doc != NULL ); + + // Create a PyObject out of the XML indata + ret = pythonizeXMLdoc(mapping, doc); + + // Clean up and return the data + ptzmap_Free(mapping); + xmlFreeDoc(doc); + xmlFreeDoc(mapping_xml); + + return ret; +} + + +static PyMethodDef DemoMethods[] = { + {"xmlpy", demo_xmlpy, METH_NOARGS, ""}, + {NULL, NULL, 0, NULL} +}; + +PyMODINIT_FUNC initxmlpythonizer(void) { + PyObject *module = + Py_InitModule3((char *)"xmlpythonizer", DemoMethods, + "XML to Python Proof-of-Concept Python Module"); + + PyObject *version = PyString_FromString("2.10"); + Py_INCREF(version); + PyModule_AddObject(module, "version", version); +} +#endif // Python test module + -- cgit