diff options
Diffstat (limited to 'misc/pylib/robofab/xmlTreeBuilder.pyx')
-rw-r--r-- | misc/pylib/robofab/xmlTreeBuilder.pyx | 116 |
1 files changed, 116 insertions, 0 deletions
diff --git a/misc/pylib/robofab/xmlTreeBuilder.pyx b/misc/pylib/robofab/xmlTreeBuilder.pyx new file mode 100644 index 000000000..be621e14f --- /dev/null +++ b/misc/pylib/robofab/xmlTreeBuilder.pyx @@ -0,0 +1,116 @@ +import os +try: + from xml.parsers.expat import ParserCreate +except ImportError: + _haveExpat = 0 + from xml.parsers.xmlproc.xmlproc import XMLProcessor +else: + _haveExpat = 1 + + +class XMLParser: + + def __init__(self): + self.root = [] + self.current = (self.root, None) + + def getRoot(self): + assert len(self.root) == 1 + return self.root[0] + + def startElementHandler(self, name, attrs): + children = [] + self.current = (children, name, attrs, self.current) + + def endElementHandler(self, name): + children, name, attrs, previous = self.current + previous[0].append((name, attrs, children)) + self.current = previous + + def characterDataHandler(self, data): + nodes = self.current[0] + if nodes and type(nodes[-1]) == type(data): + nodes[-1] = nodes[-1] + data + else: + nodes.append(data) + + def _expatParseFile(self, pathOrFile): + parser = ParserCreate() + parser.returns_unicode = 0 # XXX, Don't remember why. It sucks, though. + parser.StartElementHandler = self.startElementHandler + parser.EndElementHandler = self.endElementHandler + parser.CharacterDataHandler = self.characterDataHandler + if isinstance(pathOrFile, (str, unicode)): + f = open(pathOrFile) + didOpen = 1 + else: + didOpen = 0 + f = pathOrFile + parser.ParseFile(f) + if didOpen: + f.close() + return self.getRoot() + + def _xmlprocDataHandler(self, data, begin, end): + self.characterDataHandler(data[begin:end]) + + def _xmlprocParseFile(self, pathOrFile): + proc = XMLProcessor() + proc.app.handle_start_tag = self.startElementHandler + proc.app.handle_end_tag = self.endElementHandler + proc.app.handle_data = self._xmlprocDataHandler + if isinstance(pathOrFile, (str, unicode)): + f = open(pathOrFile) + didOpen = 1 + else: + didOpen = 0 + f = pathOrFile + proc.parseStart() + proc.read_from(f) + proc.flush() + proc.parseEnd() + proc.deref() + if didOpen: + f.close() + return self.getRoot() + + if _haveExpat: + parseFile = _expatParseFile + else: + parseFile = _xmlprocParseFile + + +def stripCharacterData(nodes, recursive=True): + i = 0 + while 1: + try: + node = nodes[i] + except IndexError: + break + if isinstance(node, tuple): + if recursive: + stripCharacterData(node[2]) + i = i + 1 + else: + node = node.strip() + if node: + nodes[i] = node + i = i + 1 + else: + del nodes[i] + + +def buildTree(pathOrFile, stripData=1): + parser = XMLParser() + tree = parser.parseFile(pathOrFile) + if stripData: + stripCharacterData(tree[2]) + return tree + + +if __name__ == "__main__": + from pprint import pprint + import sys + strip = bool(sys.argv[2:]) + tree = buildTree(sys.argv[1], strip) + pprint(tree) |