1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
|
import os
try:
from xml.parsers.expat import ParserCreate
except ImportError:
_haveExpat = 0
from xml.parsers.xmlproc.xmlproc import XMLProcessor
else:
_haveExpat = 1
class XMLParser:
def __init__(self):
self.root = []
self.current = (self.root, None)
def getRoot(self):
assert len(self.root) == 1
return self.root[0]
def startElementHandler(self, name, attrs):
children = []
self.current = (children, name, attrs, self.current)
def endElementHandler(self, name):
children, name, attrs, previous = self.current
previous[0].append((name, attrs, children))
self.current = previous
def characterDataHandler(self, data):
nodes = self.current[0]
if nodes and type(nodes[-1]) == type(data):
nodes[-1] = nodes[-1] + data
else:
nodes.append(data)
def _expatParseFile(self, pathOrFile):
parser = ParserCreate()
parser.returns_unicode = 0 # XXX, Don't remember why. It sucks, though.
parser.StartElementHandler = self.startElementHandler
parser.EndElementHandler = self.endElementHandler
parser.CharacterDataHandler = self.characterDataHandler
if isinstance(pathOrFile, (str, unicode)):
f = open(pathOrFile)
didOpen = 1
else:
didOpen = 0
f = pathOrFile
parser.ParseFile(f)
if didOpen:
f.close()
return self.getRoot()
def _xmlprocDataHandler(self, data, begin, end):
self.characterDataHandler(data[begin:end])
def _xmlprocParseFile(self, pathOrFile):
proc = XMLProcessor()
proc.app.handle_start_tag = self.startElementHandler
proc.app.handle_end_tag = self.endElementHandler
proc.app.handle_data = self._xmlprocDataHandler
if isinstance(pathOrFile, (str, unicode)):
f = open(pathOrFile)
didOpen = 1
else:
didOpen = 0
f = pathOrFile
proc.parseStart()
proc.read_from(f)
proc.flush()
proc.parseEnd()
proc.deref()
if didOpen:
f.close()
return self.getRoot()
if _haveExpat:
parseFile = _expatParseFile
else:
parseFile = _xmlprocParseFile
def stripCharacterData(nodes, recursive=True):
i = 0
while 1:
try:
node = nodes[i]
except IndexError:
break
if isinstance(node, tuple):
if recursive:
stripCharacterData(node[2])
i = i + 1
else:
node = node.strip()
if node:
nodes[i] = node
i = i + 1
else:
del nodes[i]
def buildTree(pathOrFile, stripData=1):
parser = XMLParser()
tree = parser.parseFile(pathOrFile)
if stripData:
stripCharacterData(tree[2])
return tree
if __name__ == "__main__":
from pprint import pprint
import sys
strip = bool(sys.argv[2:])
tree = buildTree(sys.argv[1], strip)
pprint(tree)
|