mimeparse.py 6.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. # Copyright 2014 Joe Gregorio
  2. #
  3. # Licensed under the MIT License
  4. """MIME-Type Parser
  5. This module provides basic functions for handling mime-types. It can handle
  6. matching mime-types against a list of media-ranges. See section 14.1 of the
  7. HTTP specification [RFC 2616] for a complete explanation.
  8. http://www.w3.org/Protocols/rfc2616/rfc2616-sec14.html#sec14.1
  9. Contents:
  10. - parse_mime_type(): Parses a mime-type into its component parts.
  11. - parse_media_range(): Media-ranges are mime-types with wild-cards and a 'q'
  12. quality parameter.
  13. - quality(): Determines the quality ('q') of a mime-type when
  14. compared against a list of media-ranges.
  15. - quality_parsed(): Just like quality() except the second parameter must be
  16. pre-parsed.
  17. - best_match(): Choose the mime-type with the highest quality ('q')
  18. from a list of candidates.
  19. """
  20. from __future__ import absolute_import
  21. from functools import reduce
  22. __version__ = "0.1.3"
  23. __author__ = "Joe Gregorio"
  24. __email__ = "joe@bitworking.org"
  25. __license__ = "MIT License"
  26. __credits__ = ""
  27. def parse_mime_type(mime_type):
  28. """Parses a mime-type into its component parts.
  29. Carves up a mime-type and returns a tuple of the (type, subtype, params)
  30. where 'params' is a dictionary of all the parameters for the media range.
  31. For example, the media range 'application/xhtml;q=0.5' would get parsed
  32. into:
  33. ('application', 'xhtml', {'q', '0.5'})
  34. """
  35. parts = mime_type.split(";")
  36. params = dict(
  37. [tuple([s.strip() for s in param.split("=", 1)]) for param in parts[1:]]
  38. )
  39. full_type = parts[0].strip()
  40. # Java URLConnection class sends an Accept header that includes a
  41. # single '*'. Turn it into a legal wildcard.
  42. if full_type == "*":
  43. full_type = "*/*"
  44. (type, subtype) = full_type.split("/")
  45. return (type.strip(), subtype.strip(), params)
  46. def parse_media_range(range):
  47. """Parse a media-range into its component parts.
  48. Carves up a media range and returns a tuple of the (type, subtype,
  49. params) where 'params' is a dictionary of all the parameters for the media
  50. range. For example, the media range 'application/*;q=0.5' would get parsed
  51. into:
  52. ('application', '*', {'q', '0.5'})
  53. In addition this function also guarantees that there is a value for 'q'
  54. in the params dictionary, filling it in with a proper default if
  55. necessary.
  56. """
  57. (type, subtype, params) = parse_mime_type(range)
  58. if (
  59. "q" not in params
  60. or not params["q"]
  61. or not float(params["q"])
  62. or float(params["q"]) > 1
  63. or float(params["q"]) < 0
  64. ):
  65. params["q"] = "1"
  66. return (type, subtype, params)
  67. def fitness_and_quality_parsed(mime_type, parsed_ranges):
  68. """Find the best match for a mime-type amongst parsed media-ranges.
  69. Find the best match for a given mime-type against a list of media_ranges
  70. that have already been parsed by parse_media_range(). Returns a tuple of
  71. the fitness value and the value of the 'q' quality parameter of the best
  72. match, or (-1, 0) if no match was found. Just as for quality_parsed(),
  73. 'parsed_ranges' must be a list of parsed media ranges.
  74. """
  75. best_fitness = -1
  76. best_fit_q = 0
  77. (target_type, target_subtype, target_params) = parse_media_range(mime_type)
  78. for (type, subtype, params) in parsed_ranges:
  79. type_match = type == target_type or type == "*" or target_type == "*"
  80. subtype_match = (
  81. subtype == target_subtype or subtype == "*" or target_subtype == "*"
  82. )
  83. if type_match and subtype_match:
  84. param_matches = reduce(
  85. lambda x, y: x + y,
  86. [
  87. 1
  88. for (key, value) in target_params.items()
  89. if key != "q" and key in params and value == params[key]
  90. ],
  91. 0,
  92. )
  93. fitness = (type == target_type) and 100 or 0
  94. fitness += (subtype == target_subtype) and 10 or 0
  95. fitness += param_matches
  96. if fitness > best_fitness:
  97. best_fitness = fitness
  98. best_fit_q = params["q"]
  99. return best_fitness, float(best_fit_q)
  100. def quality_parsed(mime_type, parsed_ranges):
  101. """Find the best match for a mime-type amongst parsed media-ranges.
  102. Find the best match for a given mime-type against a list of media_ranges
  103. that have already been parsed by parse_media_range(). Returns the 'q'
  104. quality parameter of the best match, 0 if no match was found. This function
  105. bahaves the same as quality() except that 'parsed_ranges' must be a list of
  106. parsed media ranges.
  107. """
  108. return fitness_and_quality_parsed(mime_type, parsed_ranges)[1]
  109. def quality(mime_type, ranges):
  110. """Return the quality ('q') of a mime-type against a list of media-ranges.
  111. Returns the quality 'q' of a mime-type when compared against the
  112. media-ranges in ranges. For example:
  113. >>> quality('text/html','text/*;q=0.3, text/html;q=0.7,
  114. text/html;level=1, text/html;level=2;q=0.4, */*;q=0.5')
  115. 0.7
  116. """
  117. parsed_ranges = [parse_media_range(r) for r in ranges.split(",")]
  118. return quality_parsed(mime_type, parsed_ranges)
  119. def best_match(supported, header):
  120. """Return mime-type with the highest quality ('q') from list of candidates.
  121. Takes a list of supported mime-types and finds the best match for all the
  122. media-ranges listed in header. The value of header must be a string that
  123. conforms to the format of the HTTP Accept: header. The value of 'supported'
  124. is a list of mime-types. The list of supported mime-types should be sorted
  125. in order of increasing desirability, in case of a situation where there is
  126. a tie.
  127. >>> best_match(['application/xbel+xml', 'text/xml'],
  128. 'text/*;q=0.5,*/*; q=0.1')
  129. 'text/xml'
  130. """
  131. split_header = _filter_blank(header.split(","))
  132. parsed_header = [parse_media_range(r) for r in split_header]
  133. weighted_matches = []
  134. pos = 0
  135. for mime_type in supported:
  136. weighted_matches.append(
  137. (fitness_and_quality_parsed(mime_type, parsed_header), pos, mime_type)
  138. )
  139. pos += 1
  140. weighted_matches.sort()
  141. return weighted_matches[-1][0][1] and weighted_matches[-1][2] or ""
  142. def _filter_blank(i):
  143. for s in i:
  144. if s.strip():
  145. yield s