search_backend.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283
  1. ###############################################################################
  2. #
  3. # CSW Client
  4. # ---------------------------------------------------------
  5. # QGIS Catalog Service client.
  6. #
  7. # Copyright (C) 2023 Tom Kralidis (tomkralidis@gmail.com)
  8. #
  9. # This source is free software; you can redistribute it and/or modify it under
  10. # the terms of the GNU General Public License as published by the Free
  11. # Software Foundation; either version 2 of the License, or (at your option)
  12. # any later version.
  13. #
  14. # This code is distributed in the hope that it will be useful, but WITHOUT ANY
  15. # WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
  16. # FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
  17. # details.
  18. #
  19. # You should have received a copy of the GNU General Public License along
  20. # with this program; if not, write to the Free Software Foundation, Inc.,
  21. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
  22. #
  23. ###############################################################################
  24. import warnings
  25. import owslib
  26. from owslib.fes import BBox, PropertyIsLike
  27. with warnings.catch_warnings():
  28. warnings.filterwarnings("ignore", category=ResourceWarning)
  29. warnings.filterwarnings("ignore", category=ImportWarning)
  30. from owslib.csw import CatalogueServiceWeb # spellok
  31. if owslib.__version__ < '0.25':
  32. OWSLIB_OAREC_SUPPORTED = False
  33. else:
  34. OWSLIB_OAREC_SUPPORTED = True
  35. CATALOG_TYPES = [
  36. 'OGC CSW 2.0.2',
  37. 'OGC API - Records'
  38. ]
  39. class SearchBase:
  40. def __init__(self, url, timeout, username=None, password=None, auth=None):
  41. self.url = url
  42. self.timeout = timeout
  43. self.username = username
  44. self.password = password
  45. self.auth = auth
  46. self.service_info_template = None
  47. self.record_info_template = None
  48. self.request = None
  49. self.response = None
  50. self.matches = 0
  51. self.returned = 0
  52. self.format = None
  53. def get_service_info(self):
  54. pass
  55. def query_records(self):
  56. pass
  57. def records(self):
  58. pass
  59. def get_record(self, identifier):
  60. pass
  61. def parse_link(self, link):
  62. return link
  63. class CSW202Search(SearchBase):
  64. def __init__(self, url, timeout, username, password, auth):
  65. super().__init__(url, timeout, username, password, auth)
  66. self.type = CATALOG_TYPES[0]
  67. self.format = 'xml'
  68. self.service_info_template = 'csw_service_metadata.html'
  69. self.record_info_template = 'record_metadata_dc.html'
  70. self.constraints = []
  71. self.conn = CatalogueServiceWeb(self.url, # spellok
  72. timeout=self.timeout,
  73. username=self.username,
  74. password=self.password,
  75. auth=self.auth)
  76. self.request = self.conn.request
  77. self.response = self.conn.response
  78. def query_records(self, bbox=[], keywords=None, limit=10, offset=1):
  79. self.constraints = []
  80. # only apply spatial filter if bbox is not global
  81. # even for a global bbox, if a spatial filter is applied, then
  82. # the CSW server will skip records without a bbox
  83. if bbox and bbox != ['-180', '-90', '180', '90']:
  84. minx, miny, maxx, maxy = bbox
  85. self.constraints.append(BBox([miny, minx, maxy, maxx],
  86. crs='urn:ogc:def:crs:EPSG::4326'))
  87. # keywords
  88. if keywords:
  89. # TODO: handle multiple word searches
  90. self.constraints.append(PropertyIsLike('csw:AnyText', keywords))
  91. if len(self.constraints) > 1: # exclusive search (a && b)
  92. self.constraints = [self.constraints]
  93. self.conn.getrecords2(constraints=self.constraints, maxrecords=limit,
  94. startposition=offset, esn='full')
  95. self.matches = self.conn.results['matches']
  96. self.returned = self.conn.results['returned']
  97. self.request = self.conn.request
  98. self.response = self.conn.response
  99. def records(self):
  100. recs = []
  101. for record in self.conn.records:
  102. rec = {
  103. 'identifier': None,
  104. 'type': None,
  105. 'title': None,
  106. 'bbox': None
  107. }
  108. if self.conn.records[record].identifier:
  109. rec['identifier'] = self.conn.records[record].identifier
  110. if self.conn.records[record].type:
  111. rec['type'] = self.conn.records[record].type
  112. if self.conn.records[record].title:
  113. rec['title'] = self.conn.records[record].title
  114. if self.conn.records[record].bbox:
  115. rec['bbox'] = bbox_list_to_dict(
  116. self.conn.records[record].bbox)
  117. rec['links'] = (self.conn.records[record].uris +
  118. self.conn.records[record].references)
  119. recs.append(rec)
  120. return recs
  121. def get_record(self, identifier):
  122. self.conn.getrecordbyid([identifier])
  123. return self.conn.records[identifier]
  124. class OARecSearch(SearchBase):
  125. def __init__(self, url, timeout, auth):
  126. try:
  127. from owslib.ogcapi.records import Records
  128. except ModuleNotFoundError:
  129. # OWSLIB_OAREC_SUPPORTED already set to False
  130. pass
  131. super().__init__(url, timeout, auth)
  132. self.type = CATALOG_TYPES[1]
  133. self.format = 'json'
  134. self.service_info_template = 'oarec_service_metadata.html'
  135. self.record_info_template = 'record_metadata_oarec.html'
  136. self.base_url = None
  137. self.record_collection = None
  138. if '/collections/' in self.url: # catalog is a collection
  139. self.base_url, self.record_collection = self.url.split('/collections/') # noqa
  140. self.conn = Records(
  141. self.base_url, timeout=self.timeout, auth=self.auth)
  142. c = self.conn.collection(self.record_collection)
  143. try:
  144. self.conn.links = c['links']
  145. self.conn.title = c['title']
  146. self.conn.description = c['description']
  147. except KeyError:
  148. pass
  149. self.request = self.conn.request
  150. else:
  151. self.conn = Records(self.url, timeout=self.timeout, auth=self.auth)
  152. self.request = None
  153. self.response = self.conn.response
  154. def query_records(self, bbox=[], keywords=None, limit=10, offset=1):
  155. # set zero-based offset (default MetaSearch behavior is CSW-based
  156. # offset of 1
  157. offset2 = offset - 1
  158. params = {
  159. 'collection_id': self.record_collection,
  160. 'limit': limit,
  161. 'startindex': offset2
  162. }
  163. if keywords:
  164. params['q'] = keywords
  165. if bbox and bbox != ['-180', '-90', '180', '90']:
  166. params['bbox'] = bbox
  167. self.response = self.conn.collection_items(**params)
  168. self.matches = self.response.get('numberMatched', 0)
  169. self.returned = self.response.get('numberReturned', 0)
  170. self.request = self.conn.request
  171. def records(self):
  172. recs = []
  173. for rec in self.response['features']:
  174. rec1 = {
  175. 'identifier': rec['id'],
  176. 'type': rec['properties']['type'],
  177. 'bbox': None,
  178. 'title': rec['properties']['title'],
  179. 'links': rec.get('links', [])
  180. }
  181. try:
  182. if rec.get('geometry') is not None:
  183. rec1['bbox'] = bbox_list_to_dict([
  184. rec['geometry']['coordinates'][0][0][0],
  185. rec['geometry']['coordinates'][0][0][1],
  186. rec['geometry']['coordinates'][0][2][0],
  187. rec['geometry']['coordinates'][0][2][1]
  188. ])
  189. except KeyError:
  190. pass
  191. recs.append(rec1)
  192. return recs
  193. def get_record(self, identifier):
  194. return self.conn.collection_item(self.record_collection, identifier)
  195. def parse_link(self, link):
  196. link2 = {}
  197. if 'href' in link:
  198. link2['url'] = link['href']
  199. if 'type' in link:
  200. link2['protocol'] = link['type']
  201. if 'title' in link:
  202. link2['title'] = link['title']
  203. if 'id' in link:
  204. link2['name'] = link['id']
  205. return link2
  206. def get_catalog_service(url, catalog_type, timeout, username, password,
  207. auth=None):
  208. if catalog_type in [None, CATALOG_TYPES[0]]:
  209. return CSW202Search(url, timeout, username, password, auth)
  210. elif catalog_type == CATALOG_TYPES[1]:
  211. if not OWSLIB_OAREC_SUPPORTED:
  212. raise ValueError("OGC API - Records requires OWSLib 0.25 or above")
  213. return OARecSearch(url, timeout, auth)
  214. def bbox_list_to_dict(bbox):
  215. if isinstance(bbox, list):
  216. dict_ = {
  217. 'minx': bbox[0],
  218. 'maxx': bbox[2],
  219. 'miny': bbox[1],
  220. 'maxy': bbox[3]
  221. }
  222. else:
  223. dict_ = {
  224. 'minx': bbox.minx,
  225. 'maxx': bbox.maxx,
  226. 'miny': bbox.miny,
  227. 'maxy': bbox.maxy
  228. }
  229. return dict_