etree.py 4.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138
  1. from __future__ import absolute_import, division, unicode_literals
  2. try:
  3. from collections import OrderedDict
  4. except ImportError:
  5. try:
  6. from ordereddict import OrderedDict
  7. except ImportError:
  8. OrderedDict = dict
  9. import gettext
  10. _ = gettext.gettext
  11. import re
  12. from pip._vendor.six import text_type
  13. from . import _base
  14. from ..utils import moduleFactoryFactory
  15. tag_regexp = re.compile("{([^}]*)}(.*)")
  16. def getETreeBuilder(ElementTreeImplementation):
  17. ElementTree = ElementTreeImplementation
  18. ElementTreeCommentType = ElementTree.Comment("asd").tag
  19. class TreeWalker(_base.NonRecursiveTreeWalker):
  20. """Given the particular ElementTree representation, this implementation,
  21. to avoid using recursion, returns "nodes" as tuples with the following
  22. content:
  23. 1. The current element
  24. 2. The index of the element relative to its parent
  25. 3. A stack of ancestor elements
  26. 4. A flag "text", "tail" or None to indicate if the current node is a
  27. text node; either the text or tail of the current element (1)
  28. """
  29. def getNodeDetails(self, node):
  30. if isinstance(node, tuple): # It might be the root Element
  31. elt, key, parents, flag = node
  32. if flag in ("text", "tail"):
  33. return _base.TEXT, getattr(elt, flag)
  34. else:
  35. node = elt
  36. if not(hasattr(node, "tag")):
  37. node = node.getroot()
  38. if node.tag in ("DOCUMENT_ROOT", "DOCUMENT_FRAGMENT"):
  39. return (_base.DOCUMENT,)
  40. elif node.tag == "<!DOCTYPE>":
  41. return (_base.DOCTYPE, node.text,
  42. node.get("publicId"), node.get("systemId"))
  43. elif node.tag == ElementTreeCommentType:
  44. return _base.COMMENT, node.text
  45. else:
  46. assert type(node.tag) == text_type, type(node.tag)
  47. # This is assumed to be an ordinary element
  48. match = tag_regexp.match(node.tag)
  49. if match:
  50. namespace, tag = match.groups()
  51. else:
  52. namespace = None
  53. tag = node.tag
  54. attrs = OrderedDict()
  55. for name, value in list(node.attrib.items()):
  56. match = tag_regexp.match(name)
  57. if match:
  58. attrs[(match.group(1), match.group(2))] = value
  59. else:
  60. attrs[(None, name)] = value
  61. return (_base.ELEMENT, namespace, tag,
  62. attrs, len(node) or node.text)
  63. def getFirstChild(self, node):
  64. if isinstance(node, tuple):
  65. element, key, parents, flag = node
  66. else:
  67. element, key, parents, flag = node, None, [], None
  68. if flag in ("text", "tail"):
  69. return None
  70. else:
  71. if element.text:
  72. return element, key, parents, "text"
  73. elif len(element):
  74. parents.append(element)
  75. return element[0], 0, parents, None
  76. else:
  77. return None
  78. def getNextSibling(self, node):
  79. if isinstance(node, tuple):
  80. element, key, parents, flag = node
  81. else:
  82. return None
  83. if flag == "text":
  84. if len(element):
  85. parents.append(element)
  86. return element[0], 0, parents, None
  87. else:
  88. return None
  89. else:
  90. if element.tail and flag != "tail":
  91. return element, key, parents, "tail"
  92. elif key < len(parents[-1]) - 1:
  93. return parents[-1][key + 1], key + 1, parents, None
  94. else:
  95. return None
  96. def getParentNode(self, node):
  97. if isinstance(node, tuple):
  98. element, key, parents, flag = node
  99. else:
  100. return None
  101. if flag == "text":
  102. if not parents:
  103. return element
  104. else:
  105. return element, key, parents, None
  106. else:
  107. parent = parents.pop()
  108. if not parents:
  109. return parent
  110. else:
  111. return parent, list(parents[-1]).index(parent), parents, None
  112. return locals()
  113. getETreeModule = moduleFactoryFactory(getETreeBuilder)