From 283a5648a84e77ac848ceeb79708926af2b24fd3 Mon Sep 17 00:00:00 2001 From: Eryn Wells Date: Mon, 19 Oct 2015 07:55:06 -0700 Subject: [PATCH] Breaking up xml parsing into xml namespace and classes --- lib/core/include/core/xml/Document.hh | 47 +++++ lib/core/include/core/xml/Node.hh | 41 ++++ lib/core/src/xml/Document.cc | 275 ++++++++++++++++++++++++++ lib/core/src/xml/Node.cc | 76 +++++++ 4 files changed, 439 insertions(+) create mode 100644 lib/core/include/core/xml/Document.hh create mode 100644 lib/core/include/core/xml/Node.hh create mode 100644 lib/core/src/xml/Document.cc create mode 100644 lib/core/src/xml/Node.cc diff --git a/lib/core/include/core/xml/Document.hh b/lib/core/include/core/xml/Document.hh new file mode 100644 index 0000000..bbccfa8 --- /dev/null +++ b/lib/core/include/core/xml/Document.hh @@ -0,0 +1,47 @@ +/* XMLDocument.hh + * vim: set tw=80: + * Eryn Wells + */ +/** + * An XML document and related structures. + */ + +#pragma once + +#include +#include +#include + +#include "core/File.hh" +#include "core/String.hh" + + +namespace erw { +namespace xml { + +struct Document; + + +/** An XML document. */ +struct Document +{ + /** + * Constructor. Parse an XML document out of the given file. Doing so takes + * ownership of the file. + */ + Document(InFile&& file); + + /** Constructor. Parse an XML document from the given string. */ + Document(const String& string); + + ~Document(); + + const Node& root() const noexcept; + +protected: + /** The root of the XML tree. The document owns its root. */ + Node mRoot; +}; + +} /* namespace xml */ +} /* namespace erw */ diff --git a/lib/core/include/core/xml/Node.hh b/lib/core/include/core/xml/Node.hh new file mode 100644 index 0000000..3ea27f7 --- /dev/null +++ b/lib/core/include/core/xml/Node.hh @@ -0,0 +1,41 @@ +/* Node.hh + * vim: set tw=80: + * Eryn Wells + */ +/** + * An XML node. + */ + +#pragma once + +#include +#include + +#include "String.hh" + +namespace erw { +namespace xml { + +/** A node in an XML tree. */ +struct Node +{ + typedef std::vector List; + typedef std::map AttributeMap; + + Node(); + Node(const String& name, const List& children); + Node(const Node& other); + ~Node(); + + String name() const noexcept; + List children() const noexcept; + +protected: + /** The name of the node. */ + String mName; + /** Children of this node. The node owns its children. */ + List mChildren; +}; + +} /* namespace xml */ +} /* namespace erw */ diff --git a/lib/core/src/xml/Document.cc b/lib/core/src/xml/Document.cc new file mode 100644 index 0000000..53829e9 --- /dev/null +++ b/lib/core/src/xml/Document.cc @@ -0,0 +1,275 @@ +/* XMLParser.cc + * vim: set tw=80: + * Eryn Wells + */ +/** + * Implementation of an XML parser. + */ + +#include +#include + +#include +#include + +#include "XMLParser.hh" + + + +namespace { + +/* + * initLibrary -- + */ +void +initLibrary() +{ + static std::once_flag once; + std::call_once(once, []() { + xmlInitParser(); + }); +} + + +/* + * parseFile -- + */ +XMLNode +parseFile(File&& file) +{ + const size_t kInitialBufferSize = 16; + const size_t kBufferSize = 1024; + + static_assert(kInitialBufferSize < kBufferSize, + "XML parser initial buffer size must be smaller than the " + "total buffer size"); + + initLibrary(); + + char buffer[kBufferSize]; + ssize_t bytesRead = 0; + + bytesRead = file.read(buffer, kInitialBufferSize); + xmlParserCtxtPtr context = xmlCreatePushParserCtxt(nullptr, nullptr, + buffer, bytesRead, + file.path().c_str()); + if (!context) { + // TODO: Throw an appropriate error... + throw 42; + } + + /* + * Read chunks until we're done. Once all data has been read, indicate that + * the parser should terminate by calling xmlParseChunk() with a last + * argument of 1 rather than 0. + */ + while ((bytesRead = file.read(buffer, kBufferSize)) > 0) { + xmlParseChunk(context, buffer, bytesRead, 0); + } + xmlParseChunk(context, buffer, 0, 1); + + bool succeeded = bool(context->wellFormed); + xmlDocPtr document = context->myDoc; + + xmlFreeParserCtxt(context); + + return succeeded ? XMLDocument::UCPtr(new XML2Document(document)) : nullptr; +} + +} + + +namespace erw { + +struct XML2Node + : public XMLNode +{ + XML2Node(xmlNodePtr node); + + virtual ~XML2Node(); + + virtual String name() const noexcept override; + virtual String content() const noexcept override; + virtual XMLNode::AttributeMap attributes() const noexcept override; + +private: + xmlNodePtr mNode; + + /** Make a list of children of the node, excluding text nodes. */ + XMLNode::List childrenOfXML2Node(xmlNodePtr node) const noexcept; +}; + + +/** An XML parser that uses libxml2. */ +struct XML2Document + : public XMLDocument +{ + /** + * Initialize the xml2 library, if needed. This function may be called more + * than once with no adverse affects. + */ + static void initLibrary(); + + /** + * Parse a file into a libxml2 document object. Note: because of move + * semantics related to the file UPtr, the file will be closed after this + * method completes. + * + * @param [in] file The file to parse. + * @return A libxml2 document, or nullptr if the parse fails. + */ + static XMLDocument::UCPtr parseFile(File::UPtr file); + + static XMLDocument::UCPtr parseString(const String& string); + + XML2Document(xmlDocPtr document); + + virtual ~XML2Document(); + + virtual XMLNode::WCPtr root() const noexcept override; + +private: + xmlDocPtr mDocument; +}; + +#pragma mark - erw::XMLDocument + +/* static */ XMLDocument::UCPtr +XMLDocument::parseFile(File::UPtr file) +{ + return XML2Document::parseFile(std::move(file)); +} + + +/* static */ XMLDocument::UCPtr +XMLDocument::parseString(const String& string) +{ + return XML2Document::parseString(string); +} + + +XMLDocument(InFile&& file) + : mRoot() +{ } + + +XMLDocument::~XMLDocument() +{ } + +#pragma mark - erw::XML2Document + + + +/* static */ XMLDocument::UCPtr +XML2Document::parseString(const String& string) +{ + initLibrary(); + xmlDocPtr document = xmlReadMemory(string.c_str(), string.size(), "memory.xml", NULL, 0); + return document ? XMLDocument::UCPtr(new XML2Document(document)) : nullptr; +} + + +XML2Document::XML2Document(xmlDocPtr document) + : XMLDocument(XMLNode::Ptr(new XML2Node(xmlDocGetRootElement(document)))), + mDocument(document) +{ } + + +XML2Document::~XML2Document() +{ + if (mDocument) { + xmlFreeDoc(mDocument); + mDocument = nullptr; + } +} + + +XMLNode::WCPtr +XML2Document::root() + const noexcept +{ + return mRoot; +} + +#pragma mark - XMLNode + +XMLNode::~XMLNode() +{ } + + +XMLNode::XMLNode(XMLNode::List&& children) + : mChildren(children) +{ } + + +XMLNode::WCList +XMLNode::children() + const noexcept +{ + WCList weakChildren; + for (Ptr child : mChildren) { + weakChildren.push_back(WCPtr(child)); + } + return weakChildren; +} + +#pragma mark - XML2Node + +XML2Node::XML2Node(xmlNodePtr node) + : XMLNode(childrenOfXML2Node(node)), + mNode(node) +{ } + + +XML2Node::~XML2Node() +{ } + + +String +XML2Node::name() + const noexcept +{ + return (const char *)mNode->name; +} + + +String +XML2Node::content() + const noexcept +{ + xmlChar *content = xmlNodeGetContent(mNode); + String contentString((const char *)content); + xmlFree(content); + return contentString; +} + + +XMLNode::AttributeMap +XML2Node::attributes() + const noexcept +{ + AttributeMap attrs; + for (xmlAttrPtr attr = mNode->properties; attr && attr->name && attr->children; attr = attr->next) { + xmlChar *value = xmlNodeListGetString(mNode->doc, attr->children, 1); + attrs[(const char *)attr->name] = (const char *)value; + xmlFree(value); + } + return attrs; +} + + +XMLNode::List +XML2Node::childrenOfXML2Node(xmlNodePtr node) + const noexcept +{ + XMLNode::List children; + for (xmlNodePtr c = node->children; c != nullptr; c = c->next) { + if (c->type != XML_ELEMENT_NODE) { + continue; + } + children.push_back(XMLNode::Ptr(new XML2Node(c))); + } + return children; +} + +} /* namespace erw */ diff --git a/lib/core/src/xml/Node.cc b/lib/core/src/xml/Node.cc new file mode 100644 index 0000000..7517de0 --- /dev/null +++ b/lib/core/src/xml/Node.cc @@ -0,0 +1,76 @@ +/* XMLNode.cc + * vim: set tw=80: + * Eryn Wells + */ +/** + * Implementation of a node in an XML tree. + */ + +#include "core/XMLDocument.hh" + +#include +#include + + +namespace erw { +namespace xml { + +/* + * Node::Node -- + */ +Node::Node() + : mName(), + mChildren() +{ } + + +/* + * Node::Node -- + */ +Node::Node(const String& name, + const List& children) + : mName(name), + mChildren(children) +{ } + + +/* + * Node::Node -- + */ +Node::Node(const Node& other) + : mName(other.name), + mChildren(other.children) +{ } + + +/* + * Node::~Node -- + */ +Node::~Node() +{ } + +#pragma mark Properties + +/* + * Node::name -- + */ +String +Node::name() + const +{ + return mName; +} + + +/* + * Node::children -- + */ +List +Node::children() + const +{ + return mChildren; +} + +} /* namespace xml */ +} /* namespace erw */