summaryrefslogtreecommitdiff
path: root/misc/pylib/robofab/xmlTreeBuilder.pyx
blob: be621e14fa7d4a81a48c6d5ea54cb82eeebc6e9c (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
import os
try:
	from xml.parsers.expat import ParserCreate
except ImportError:
	_haveExpat = 0
	from xml.parsers.xmlproc.xmlproc import XMLProcessor
else:
	_haveExpat = 1


class XMLParser:

	def __init__(self):
		self.root = []
		self.current = (self.root, None)

	def getRoot(self):
		assert len(self.root) == 1
		return self.root[0]

	def startElementHandler(self, name, attrs):
		children = []
		self.current = (children, name, attrs, self.current)

	def endElementHandler(self, name):
		children, name, attrs, previous = self.current
		previous[0].append((name, attrs, children))
		self.current = previous

	def characterDataHandler(self, data):
		nodes = self.current[0]
		if nodes and type(nodes[-1]) == type(data):
			nodes[-1] = nodes[-1] + data
		else:
			nodes.append(data)

	def _expatParseFile(self, pathOrFile):
		parser = ParserCreate()
		parser.returns_unicode = 0  # XXX, Don't remember why. It sucks, though.
		parser.StartElementHandler = self.startElementHandler
		parser.EndElementHandler = self.endElementHandler
		parser.CharacterDataHandler = self.characterDataHandler
		if isinstance(pathOrFile, (str, unicode)):
			f = open(pathOrFile)
			didOpen = 1
		else:
			didOpen = 0
			f = pathOrFile
		parser.ParseFile(f)
		if didOpen:
			f.close()
		return self.getRoot()

	def _xmlprocDataHandler(self, data, begin, end):
		self.characterDataHandler(data[begin:end])

	def _xmlprocParseFile(self, pathOrFile):
		proc = XMLProcessor()
		proc.app.handle_start_tag = self.startElementHandler
		proc.app.handle_end_tag = self.endElementHandler
		proc.app.handle_data = self._xmlprocDataHandler
		if isinstance(pathOrFile, (str, unicode)):
			f = open(pathOrFile)
			didOpen = 1
		else:
			didOpen = 0
			f = pathOrFile
		proc.parseStart()
		proc.read_from(f)
		proc.flush()
		proc.parseEnd()
		proc.deref()
		if didOpen:
			f.close()
		return self.getRoot()

	if _haveExpat:
		parseFile = _expatParseFile
	else:
		parseFile = _xmlprocParseFile


def stripCharacterData(nodes, recursive=True):
	i = 0
	while 1:
		try:
			node = nodes[i]
		except IndexError:
			break
		if isinstance(node, tuple):
			if recursive:
				stripCharacterData(node[2])
			i = i + 1
		else:
			node = node.strip()
			if node:
				nodes[i] = node
				i = i + 1
			else:
				del nodes[i]


def buildTree(pathOrFile, stripData=1):
	parser = XMLParser()
	tree = parser.parseFile(pathOrFile)
	if stripData:
		stripCharacterData(tree[2])
	return tree


if __name__ == "__main__":
	from pprint import pprint
	import sys
	strip = bool(sys.argv[2:])
	tree = buildTree(sys.argv[1], strip)
	pprint(tree)