pulldom.py 2.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263
  1. from __future__ import absolute_import, division, unicode_literals
  2. from xml.dom.pulldom import START_ELEMENT, END_ELEMENT, \
  3. COMMENT, IGNORABLE_WHITESPACE, CHARACTERS
  4. from . import _base
  5. from ..constants import voidElements
  6. class TreeWalker(_base.TreeWalker):
  7. def __iter__(self):
  8. ignore_until = None
  9. previous = None
  10. for event in self.tree:
  11. if previous is not None and \
  12. (ignore_until is None or previous[1] is ignore_until):
  13. if previous[1] is ignore_until:
  14. ignore_until = None
  15. for token in self.tokens(previous, event):
  16. yield token
  17. if token["type"] == "EmptyTag":
  18. ignore_until = previous[1]
  19. previous = event
  20. if ignore_until is None or previous[1] is ignore_until:
  21. for token in self.tokens(previous, None):
  22. yield token
  23. elif ignore_until is not None:
  24. raise ValueError("Illformed DOM event stream: void element without END_ELEMENT")
  25. def tokens(self, event, next):
  26. type, node = event
  27. if type == START_ELEMENT:
  28. name = node.nodeName
  29. namespace = node.namespaceURI
  30. attrs = {}
  31. for attr in list(node.attributes.keys()):
  32. attr = node.getAttributeNode(attr)
  33. attrs[(attr.namespaceURI, attr.localName)] = attr.value
  34. if name in voidElements:
  35. for token in self.emptyTag(namespace,
  36. name,
  37. attrs,
  38. not next or next[1] is not node):
  39. yield token
  40. else:
  41. yield self.startTag(namespace, name, attrs)
  42. elif type == END_ELEMENT:
  43. name = node.nodeName
  44. namespace = node.namespaceURI
  45. if name not in voidElements:
  46. yield self.endTag(namespace, name)
  47. elif type == COMMENT:
  48. yield self.comment(node.nodeValue)
  49. elif type in (IGNORABLE_WHITESPACE, CHARACTERS):
  50. for token in self.text(node.nodeValue):
  51. yield token
  52. else:
  53. yield self.unknown(type)