serializer.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587
  1. #!/usr/bin/env python
  2. """A serializer that encodes EE object trees as JSON DAGs."""
  3. # Using lowercase function naming to match the JavaScript names.
  4. # pylint: disable=g-bad-name
  5. # pylint: disable=g-bad-import-order
  6. import collections
  7. import datetime
  8. import hashlib
  9. import json
  10. import math
  11. import numbers
  12. from . import _cloud_api_utils
  13. from . import ee_exception
  14. from . import encodable
  15. # The datetime for the beginning of the Unix epoch.
  16. _EPOCH_DATETIME = datetime.datetime.utcfromtimestamp(0)
  17. # Don't generate very deep expressions, as the backend rejects them.
  18. # The backend's limit is 100, and we want to stay well away from that
  19. # as a few extra levels of wrapping are always added.
  20. _DEPTH_LIMIT = 50
  21. def DatetimeToMicroseconds(date):
  22. """Convert a datetime to a timestamp, microseconds since the epoch."""
  23. td = (date - _EPOCH_DATETIME)
  24. microseconds = td.microseconds + (td.seconds + td.days * 24 * 3600) * 1e6
  25. return math.floor(microseconds)
  26. class Serializer(object):
  27. """A serializer for EE object trees."""
  28. def __init__(self,
  29. is_compound=True,
  30. for_cloud_api=False,
  31. unbound_name=None):
  32. """Constructs a serializer.
  33. Args:
  34. is_compound: Whether the encoding should factor out shared subtrees.
  35. for_cloud_api: Whether the encoding should be done for the Cloud API or
  36. the legacy API.
  37. unbound_name: Provides a name for unbound variables in objects.
  38. """
  39. # Whether the encoding should factor out shared subtrees.
  40. self._is_compound = bool(is_compound)
  41. self._for_cloud_api = bool(for_cloud_api)
  42. self.unbound_name = unbound_name
  43. # A list of shared subtrees as [name, value] pairs.
  44. self._scope = []
  45. # A lookup table from object hash to subtree names as stored in self._scope
  46. self._encoded = {}
  47. # A lookup table from object ID as retrieved by id() to md5 hash values.
  48. self._hashcache = {}
  49. def _encode(self, obj):
  50. """Encodes a top level object to be executed server-side.
  51. Args:
  52. obj: The object to encode.
  53. Returns:
  54. An encoded object ready for JSON serialization.
  55. """
  56. if self._for_cloud_api:
  57. return self._encode_for_cloud_api(obj)
  58. value = self._encode_value(obj)
  59. if self._is_compound:
  60. if (isinstance(value, dict) and value['type'] == 'ValueRef' and
  61. len(self._scope) == 1):
  62. # Just one value. No need for complex structure.
  63. value = self._scope[0][1]
  64. else:
  65. # Wrap the scopes and final value with a CompoundValue.
  66. value = {'type': 'CompoundValue', 'scope': self._scope, 'value': value}
  67. # Clear state in case of future encoding.
  68. self._scope = []
  69. self._encoded = {}
  70. self._hashcache = {}
  71. return value
  72. def _encode_for_cloud_api(self, obj):
  73. """Encodes an object as an Expression or quasi-Expression."""
  74. value = self._encode_cloud_object(obj)
  75. if self._is_compound:
  76. # Wrap the scopes and final value into an Expression.
  77. value = _ExpressionOptimizer(value, self._scope).optimize()
  78. # Clear state in case of future encoding.
  79. self._scope = []
  80. self._encoded = {}
  81. self._hashcache = {}
  82. else:
  83. value = _ExpressionOptimizer(value).optimize()
  84. return value
  85. def _encode_value(self, obj):
  86. """Encodes a subtree as a Value in the EE API v2 (DAG) format.
  87. If _is_compound is True, this will fill the _scope and _encoded properties.
  88. Args:
  89. obj: The object to encode.
  90. Returns:
  91. An encoded object.
  92. """
  93. obj_id = id(obj)
  94. hashval = self._hashcache.get(obj_id)
  95. encoded = self._encoded.get(hashval, None)
  96. if self._is_compound and encoded:
  97. # Already encoded objects are encoded as ValueRefs and returned directly.
  98. return {'type': 'ValueRef', 'value': encoded}
  99. elif obj is None or isinstance(obj, (bool, numbers.Number, str)):
  100. # Primitives are encoded as is and not saved in the scope.
  101. return obj
  102. elif isinstance(obj, datetime.datetime):
  103. # A raw date slipped through. Wrap it. Calling ee.Date from here would
  104. # cause a circular dependency, so we encode it manually.
  105. return {
  106. 'type': 'Invocation',
  107. 'functionName': 'Date',
  108. 'arguments': {
  109. 'value': DatetimeToMicroseconds(obj) / 1e3
  110. }
  111. }
  112. elif isinstance(obj, encodable.Encodable):
  113. # Some objects know how to encode themselves.
  114. result = obj.encode(self._encode_value)
  115. if (not isinstance(result, (list, tuple)) and
  116. (not isinstance(result, (dict)) or result['type'] == 'ArgumentRef')):
  117. # Optimization: simple enough that adding it to the scope is probably
  118. # not worth it.
  119. return result
  120. elif isinstance(obj, encodable.EncodableFunction):
  121. result = obj.encode_invocation(self._encode_value)
  122. if (not isinstance(result, (list, tuple)) and
  123. (not isinstance(result, (dict)) or result['type'] == 'ArgumentRef')):
  124. # Optimization: simple enough that adding it to the scope is probably
  125. # not worth it.
  126. return result
  127. elif isinstance(obj, (list, tuple)):
  128. # Lists are encoded recursively.
  129. result = [self._encode_value(i) for i in obj]
  130. elif isinstance(obj, dict):
  131. # Dictionary are encoded recursively and wrapped in a type specifier.
  132. result = {
  133. 'type':
  134. 'Dictionary',
  135. 'value':
  136. dict([(key, self._encode_value(value))
  137. for key, value in obj.items()])
  138. }
  139. else:
  140. raise ee_exception.EEException('Can\'t encode object: %s' % obj)
  141. if self._is_compound:
  142. # Save the new object and return a ValueRef.
  143. hashval = hashlib.md5(json.dumps(result).encode()).digest()
  144. self._hashcache[obj_id] = hashval
  145. name = self._encoded.get(hashval, None)
  146. if not name:
  147. name = str(len(self._scope))
  148. self._scope.append((name, result))
  149. self._encoded[hashval] = name
  150. return {'type': 'ValueRef', 'value': name}
  151. else:
  152. return result
  153. def _encode_cloud_object(self, obj):
  154. """Encodes an object using the Cloud API Expression form.
  155. If _is_compound is True, this will fill the _scope and _encoded properties.
  156. Args:
  157. obj: The object to encode.
  158. Returns:
  159. If _is_compound is True, a string that is the key under which the
  160. encoded object is stored in _scope.
  161. If _is_compound is False, the encoded object as a single quasi-Expression.
  162. """
  163. obj_id = id(obj)
  164. hashval = self._hashcache.get(obj_id)
  165. reference = self._encoded.get(hashval, None)
  166. if reference:
  167. return reference
  168. elif obj is None or isinstance(obj, (bool, str)):
  169. result = {'constantValue': obj}
  170. elif isinstance(obj, numbers.Number):
  171. result = _cloud_api_utils.encode_number_as_cloud_value(obj)
  172. elif isinstance(obj, datetime.datetime):
  173. # A raw date slipped through. Wrap it. Calling ee.Date from here would
  174. # cause a circular dependency, so we encode it manually.
  175. result = {
  176. 'functionInvocationValue': {
  177. 'functionName': 'Date',
  178. 'arguments': {
  179. 'value': {
  180. 'constantValue': DatetimeToMicroseconds(obj) / 1e3
  181. }
  182. }
  183. }
  184. }
  185. elif isinstance(obj, encodable.Encodable):
  186. # Some objects know how to encode themselves.
  187. result = obj.encode_cloud_value(self._encode_cloud_object)
  188. elif isinstance(obj, (list, tuple)):
  189. # Lists are encoded recursively.
  190. if self._is_compound:
  191. result = {
  192. 'arrayValue': {
  193. 'values': [{
  194. 'valueReference': self._encode_cloud_object(i)
  195. } for i in obj]
  196. }
  197. }
  198. else:
  199. result = {
  200. 'arrayValue': {
  201. 'values': [self._encode_cloud_object(i) for i in obj]
  202. }
  203. }
  204. elif isinstance(obj, dict):
  205. # Dictionary are encoded recursively and wrapped in a type specifier.
  206. # We iterate through the entries in a deterministic order, not because it
  207. # affects the order of the entries in the output result, but because it
  208. # affects the names that they are assigned in _scope; without the
  209. # ordering, the encoding process may produce one of multiple different
  210. # (albeit equivalent) representations.
  211. if self._is_compound:
  212. result = {
  213. 'dictionaryValue': {
  214. 'values': {
  215. key: {
  216. 'valueReference': self._encode_cloud_object(obj[key])
  217. } for key in sorted(obj)
  218. }
  219. }
  220. }
  221. else:
  222. result = {
  223. 'dictionaryValue': {
  224. 'values': {
  225. key: self._encode_cloud_object(obj[key])
  226. for key in sorted(obj)
  227. }
  228. }
  229. }
  230. else:
  231. raise ee_exception.EEException('Can\'t encode object: %s' % obj)
  232. if self._is_compound:
  233. # Save the new object and return a ValueRef.
  234. hashval = hashlib.md5(json.dumps(result).encode()).digest()
  235. self._hashcache[obj_id] = hashval
  236. name = self._encoded.get(hashval, None)
  237. if not name:
  238. name = str(len(self._scope))
  239. self._scope.append((name, result))
  240. self._encoded[hashval] = name
  241. return name
  242. else:
  243. return result
  244. def encode(obj, is_compound=True, for_cloud_api=True, unbound_name=None):
  245. """Serialize an object to a JSON-compatible structure for API calls.
  246. Args:
  247. obj: The object to serialize.
  248. is_compound: Whether the encoding should factor out shared subtrees.
  249. for_cloud_api: Whether the encoding should be done for the Cloud API or the
  250. legacy API.
  251. unbound_name: Provides a name for unbound variables in objects. Unbound
  252. variables are otherwise disallowed. See the Count Functions usage in
  253. customfunction.py.
  254. Returns:
  255. A JSON-compatible structure representing the input.
  256. """
  257. serializer = Serializer(
  258. is_compound, for_cloud_api=for_cloud_api, unbound_name=unbound_name)
  259. return serializer._encode(obj) # pylint: disable=protected-access
  260. def toJSON(obj, opt_pretty=False, for_cloud_api=True):
  261. """Serialize an object to a JSON string appropriate for API calls.
  262. Args:
  263. obj: The object to serialize.
  264. opt_pretty: True to pretty-print the object.
  265. for_cloud_api: Whether the encoding should be done for the Cloud API or the
  266. legacy API.
  267. Returns:
  268. A JSON string representing the input.
  269. """
  270. serializer = Serializer(not opt_pretty, for_cloud_api=for_cloud_api)
  271. encoded = serializer._encode(obj) # pylint: disable=protected-access
  272. return json.dumps(encoded, indent=2 if opt_pretty else None)
  273. def toReadableJSON(obj, for_cloud_api=True):
  274. """Convert an object to readable JSON."""
  275. return toJSON(obj, True, for_cloud_api=for_cloud_api)
  276. class _ExpressionOptimizer(object):
  277. """Optimises the representation of an Expression.
  278. The Expressions generated by recursive encoding can be inefficiently
  279. represented. This class helps improve the representation.
  280. The initial representation is intentionally simple, as it makes the encoding
  281. logic simple. Constants end up as individual ValueNodes, though the Expression
  282. format itself allows complex constants (nested arrays and/or dicts containing
  283. constant values). There are also often places where references to ValueNodes
  284. can be replaced by direct inclusion of those ValueNodes.
  285. This operates in two modes:
  286. - It can be passed an Expression as a dict of named ValueNodes, and the name
  287. that represents the final result. In this case, it returns the optimised
  288. Expression in the same form. This is the "compound" mode.
  289. - It can be passed a quasi-Expression as a single object. In this case, it
  290. returns the optimised quasi-Expression in the same form. This is the
  291. "non-compound" mode. A "quasi-Expression" is essentially an Expression DAG
  292. that's been expanded to a tree by replacing references with the actual thing
  293. being referenced. This means that if the same entity is referenced more than
  294. once, it will be duplicated in the tree.
  295. The rules that the optimiser follows are straightforward:
  296. - If a value is referred to only once, lift it into the place that references
  297. it.
  298. - If a value is a numeric or boolean constant, lift it into all the places
  299. that reference it.
  300. - If a value is a string constant, lift it if it is referred to only once.
  301. - Collapse dicts and arrays of constants to constant dicts/arrays.
  302. """
  303. def __init__(self, result, values=None):
  304. """Builds an ExpressionOptimizer.
  305. Args:
  306. result: The result to optimize, either as a key of "values", or as a
  307. quasi-Expression.
  308. values: If provided (in compound mode), a set of named ValueNodes.
  309. """
  310. self._result = result
  311. # We want to make sure the process is deterministic.
  312. self._values = collections.OrderedDict(
  313. values) if values is not None else None
  314. if self._is_compound():
  315. self._single_uses = self._find_single_uses()
  316. self._optimized_values = {}
  317. self._reference_map = {}
  318. def _is_compound(self):
  319. return self._values is not None
  320. def _find_single_uses(self):
  321. """Finds the names of all named values that are referred to only once."""
  322. reference_counts = collections.defaultdict(int)
  323. reference_counts[self._result] += 1
  324. def _contained_reference(value):
  325. """Gets a contained reference from a ValueNode, if there is one."""
  326. if 'functionDefinitionValue' in value:
  327. return value['functionDefinitionValue']['body']
  328. elif 'functionInvocationValue' in value:
  329. function_invocation = value['functionInvocationValue']
  330. if 'functionReference' in function_invocation:
  331. return function_invocation['functionReference']
  332. elif 'valueReference' in value:
  333. return value['valueReference']
  334. return None
  335. def increment_reference_count(value):
  336. reference = _contained_reference(value)
  337. if reference is not None:
  338. reference_counts[reference] += 1
  339. self._visit_all_values_in_expression(increment_reference_count)
  340. return set(reference for reference, count in reference_counts.items()
  341. if count == 1)
  342. def optimize(self):
  343. """Optimises the expression, returning the optimised form."""
  344. optimized_result = self._optimize_referred_value(self._result)
  345. if self._is_compound():
  346. return {'result': optimized_result, 'values': self._optimized_values}
  347. else:
  348. return optimized_result
  349. def _optimize_referred_value(self, reference_or_value):
  350. """Recursively optimises a value.
  351. Optimises a value and everything recursively reachable from it.
  352. This operates differently depending on the mode.
  353. In compound mode:
  354. Takes a name (in _values) for a ValueNode, optimises the referenced
  355. ValueNode, and returns a name (in _optimized_values) for the optimised
  356. ValueNode. Updates _optimized_values and _reference_map.
  357. In non-compound mode:
  358. Takes a quasi-ValueNode, optimises it, and returns the optimised
  359. quasi-ValueNode.
  360. Args:
  361. reference_or_value: The name in _values of the value to optimise, or the
  362. actual value itself.
  363. Returns:
  364. The name, in _optimized_values, of the optimised value, or the optimised
  365. value itself.
  366. """
  367. if self._is_compound():
  368. if reference_or_value in self._reference_map:
  369. return self._reference_map[reference_or_value]
  370. mapped_reference = str(len(self._reference_map))
  371. self._reference_map[reference_or_value] = mapped_reference
  372. self._optimized_values[mapped_reference] = self._optimize_value(
  373. self._values[reference_or_value], 0)
  374. return mapped_reference
  375. else:
  376. return self._optimize_value(reference_or_value, 0)
  377. def _optimize_value(self, value, depth):
  378. """Optimises a single value.
  379. Args:
  380. value: The ValueNode to optimise, in dict form.
  381. depth: How deep in the encoded output this value will be placed.
  382. Returns:
  383. An optimised version of that value, created by lifting in all feasible
  384. constants and references, subject (in compound mode) to a depth limit.
  385. """
  386. if any(
  387. x in value for x in
  388. ['constantValue', 'integerValue', 'bytesValue', 'argumentReference']):
  389. # Not optimisable.
  390. return value
  391. elif 'arrayValue' in value:
  392. # Optimise recursively, then turn an array of constants into a constant
  393. # array.
  394. optimized_array = [
  395. self._optimize_value(array_value, depth + 3)
  396. for array_value in value['arrayValue']['values']
  397. ]
  398. if all(self._is_constant_value(v) for v in optimized_array):
  399. optimized_array = [v['constantValue'] for v in optimized_array]
  400. return {'constantValue': optimized_array}
  401. else:
  402. return {'arrayValue': {'values': optimized_array}}
  403. elif 'dictionaryValue' in value:
  404. # Optimise recursively, then turn a dict of constants into a constant
  405. # dict.
  406. optimized_dict = {
  407. key: self._optimize_value(dict_value, depth + 3)
  408. for key, dict_value in value['dictionaryValue']['values'].items()
  409. }
  410. if all(self._is_constant_value(v) for v in optimized_dict.values()):
  411. optimized_dict = {
  412. k: v['constantValue'] for k, v in optimized_dict.items()
  413. }
  414. return {'constantValue': optimized_dict}
  415. else:
  416. return {'dictionaryValue': {'values': optimized_dict}}
  417. elif 'functionDefinitionValue' in value:
  418. function_definition = value['functionDefinitionValue']
  419. return {
  420. 'functionDefinitionValue': {
  421. 'argumentNames': function_definition['argumentNames'],
  422. 'body': self._optimize_referred_value(function_definition['body'])
  423. }
  424. }
  425. elif 'functionInvocationValue' in value:
  426. function_invocation = value['functionInvocationValue']
  427. arguments = function_invocation['arguments']
  428. optimized_invocation = {}
  429. if 'functionName' in function_invocation:
  430. optimized_invocation['functionName'] = function_invocation[
  431. 'functionName']
  432. else:
  433. optimized_invocation[
  434. 'functionReference'] = self._optimize_referred_value(
  435. function_invocation['functionReference'])
  436. optimized_invocation['arguments'] = {
  437. k: self._optimize_value(arguments[k], depth + 3)
  438. for k, v in arguments.items()
  439. }
  440. return {'functionInvocationValue': optimized_invocation}
  441. elif 'valueReference' in value:
  442. # Lift if possible: anything used only here, anything lightweight.
  443. reference = value['valueReference']
  444. if not self._is_compound():
  445. return self._optimize_value(reference, depth)
  446. referenced_value = self._values[reference]
  447. if reference in self._single_uses and depth < _DEPTH_LIMIT:
  448. return self._optimize_value(referenced_value, depth)
  449. else:
  450. if self._is_always_liftable(referenced_value):
  451. return referenced_value
  452. return {'valueReference': self._optimize_referred_value(reference)}
  453. def _is_always_liftable(self, value):
  454. """Determines if a value is simple enough to lift unconditionally."""
  455. # Non-string constants and argument references are simple enough.
  456. if 'constantValue' in value:
  457. return self._is_liftable_constant(value['constantValue'])
  458. else:
  459. return 'argumentReference' in value
  460. def _is_liftable_constant(self, value):
  461. """Whether a constant is simple enough to lift to where it's referenced."""
  462. return value is None or isinstance(value, (bool, numbers.Number))
  463. def _is_constant_value(self, value):
  464. """Whether a ValueNode (as a dict) is a constant."""
  465. return 'constantValue' in value
  466. def _visit_all_values_in_expression(self, visitor):
  467. """Calls visitor on all ValueNodes in the expression.
  468. Args:
  469. visitor: A callable that will be invoked once at every ValueNode in the
  470. expression, including nested ValueNodes.
  471. """
  472. self._visit_all_values(self._result, self._values[self._result], set(),
  473. visitor)
  474. def _visit_all_values(self, reference, value, visited, visitor):
  475. """Calls visitor on a ValueNode and its descendants.
  476. Args:
  477. reference: A reference to the ValueNode, or None.
  478. value: The ValueNode, in dict form.
  479. visited: A set of references for which the visitor has already been
  480. invoked.
  481. visitor: The callable to invoke.
  482. """
  483. if reference is not None:
  484. if reference in visited:
  485. return
  486. visited.add(reference)
  487. visitor(value)
  488. if 'arrayValue' in value:
  489. for v in value['arrayValue']['values']:
  490. self._visit_all_values(None, v, visited, visitor)
  491. elif 'dictionaryValue' in value:
  492. d = value['dictionaryValue']['values']
  493. for k in sorted(d):
  494. self._visit_all_values(None, d[k], visited, visitor)
  495. elif 'functionDefinitionValue' in value:
  496. definition_reference = value['functionDefinitionValue']['body']
  497. self._visit_all_values(definition_reference,
  498. self._values[definition_reference], visited,
  499. visitor)
  500. elif 'functionInvocationValue' in value:
  501. function_invocation = value['functionInvocationValue']
  502. if 'functionReference' in function_invocation:
  503. function_reference = function_invocation['functionReference']
  504. self._visit_all_values(function_reference,
  505. self._values[function_reference], visited,
  506. visitor)
  507. arguments = function_invocation['arguments']
  508. for k in sorted(arguments):
  509. self._visit_all_values(None, arguments[k], visited, visitor)
  510. elif 'valueReference' in value:
  511. value_reference = value['valueReference']
  512. self._visit_all_values(value_reference, self._values[value_reference],
  513. visited, visitor)