parser.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349
  1. import re
  2. import uuid
  3. from json import dumps as json_dumps
  4. from json import loads as json_loads
  5. from json.decoder import JSONDecodeError
  6. from requests import get
  7. from yaml import YAMLError, safe_load
  8. from core.tools.entities.common_entities import I18nObject
  9. from core.tools.entities.tool_bundle import ApiBasedToolBundle
  10. from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
  11. from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
  12. class ApiBasedToolSchemaParser:
  13. @staticmethod
  14. def parse_openapi_to_tool_bundle(openapi: dict, extra_info: dict = None, warning: dict = None) -> list[ApiBasedToolBundle]:
  15. warning = warning if warning is not None else {}
  16. extra_info = extra_info if extra_info is not None else {}
  17. # set description to extra_info
  18. if 'description' in openapi['info']:
  19. extra_info['description'] = openapi['info']['description']
  20. else:
  21. extra_info['description'] = ''
  22. if len(openapi['servers']) == 0:
  23. raise ToolProviderNotFoundError('No server found in the openapi yaml.')
  24. server_url = openapi['servers'][0]['url']
  25. # list all interfaces
  26. interfaces = []
  27. for path, path_item in openapi['paths'].items():
  28. methods = ['get', 'post', 'put', 'delete', 'patch', 'head', 'options', 'trace']
  29. for method in methods:
  30. if method in path_item:
  31. interfaces.append({
  32. 'path': path,
  33. 'method': method,
  34. 'operation': path_item[method],
  35. })
  36. # get all parameters
  37. bundles = []
  38. for interface in interfaces:
  39. # convert parameters
  40. parameters = []
  41. if 'parameters' in interface['operation']:
  42. for parameter in interface['operation']['parameters']:
  43. tool_parameter = ToolParameter(
  44. name=parameter['name'],
  45. label=I18nObject(
  46. en_US=parameter['name'],
  47. zh_Hans=parameter['name']
  48. ),
  49. human_description=I18nObject(
  50. en_US=parameter.get('description', ''),
  51. zh_Hans=parameter.get('description', '')
  52. ),
  53. type=ToolParameter.ToolParameterType.STRING,
  54. required=parameter.get('required', False),
  55. form=ToolParameter.ToolParameterForm.LLM,
  56. llm_description=parameter.get('description'),
  57. default=parameter['schema']['default'] if 'schema' in parameter and 'default' in parameter['schema'] else None,
  58. )
  59. # check if there is a type
  60. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
  61. if typ:
  62. tool_parameter.type = typ
  63. parameters.append(tool_parameter)
  64. # create tool bundle
  65. # check if there is a request body
  66. if 'requestBody' in interface['operation']:
  67. request_body = interface['operation']['requestBody']
  68. if 'content' in request_body:
  69. for content_type, content in request_body['content'].items():
  70. # if there is a reference, get the reference and overwrite the content
  71. if 'schema' not in content:
  72. content
  73. if '$ref' in content['schema']:
  74. # get the reference
  75. root = openapi
  76. reference = content['schema']['$ref'].split('/')[1:]
  77. for ref in reference:
  78. root = root[ref]
  79. # overwrite the content
  80. interface['operation']['requestBody']['content'][content_type]['schema'] = root
  81. # parse body parameters
  82. if 'schema' in interface['operation']['requestBody']['content'][content_type]:
  83. body_schema = interface['operation']['requestBody']['content'][content_type]['schema']
  84. required = body_schema['required'] if 'required' in body_schema else []
  85. properties = body_schema['properties'] if 'properties' in body_schema else {}
  86. for name, property in properties.items():
  87. tool = ToolParameter(
  88. name=name,
  89. label=I18nObject(
  90. en_US=name,
  91. zh_Hans=name
  92. ),
  93. human_description=I18nObject(
  94. en_US=property['description'] if 'description' in property else '',
  95. zh_Hans=property['description'] if 'description' in property else ''
  96. ),
  97. type=ToolParameter.ToolParameterType.STRING,
  98. required=name in required,
  99. form=ToolParameter.ToolParameterForm.LLM,
  100. llm_description=property['description'] if 'description' in property else '',
  101. default=property['default'] if 'default' in property else None,
  102. )
  103. # check if there is a type
  104. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
  105. if typ:
  106. tool.type = typ
  107. parameters.append(tool)
  108. # check if parameters is duplicated
  109. parameters_count = {}
  110. for parameter in parameters:
  111. if parameter.name not in parameters_count:
  112. parameters_count[parameter.name] = 0
  113. parameters_count[parameter.name] += 1
  114. for name, count in parameters_count.items():
  115. if count > 1:
  116. warning['duplicated_parameter'] = f'Parameter {name} is duplicated.'
  117. # check if there is a operation id, use $path_$method as operation id if not
  118. if 'operationId' not in interface['operation']:
  119. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  120. path = interface['path']
  121. if interface['path'].startswith('/'):
  122. path = interface['path'][1:]
  123. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  124. path = re.sub(r'[^a-zA-Z0-9_-]', '', path)
  125. if not path:
  126. path = str(uuid.uuid4())
  127. interface['operation']['operationId'] = f'{path}_{interface["method"]}'
  128. bundles.append(ApiBasedToolBundle(
  129. server_url=server_url + interface['path'],
  130. method=interface['method'],
  131. summary=interface['operation']['description'] if 'description' in interface['operation'] else
  132. interface['operation']['summary'] if 'summary' in interface['operation'] else None,
  133. operation_id=interface['operation']['operationId'],
  134. parameters=parameters,
  135. author='',
  136. icon=None,
  137. openapi=interface['operation'],
  138. ))
  139. return bundles
  140. @staticmethod
  141. def _get_tool_parameter_type(parameter: dict) -> ToolParameter.ToolParameterType:
  142. parameter = parameter or {}
  143. typ = None
  144. if 'type' in parameter:
  145. typ = parameter['type']
  146. elif 'schema' in parameter and 'type' in parameter['schema']:
  147. typ = parameter['schema']['type']
  148. if typ == 'integer' or typ == 'number':
  149. return ToolParameter.ToolParameterType.NUMBER
  150. elif typ == 'boolean':
  151. return ToolParameter.ToolParameterType.BOOLEAN
  152. elif typ == 'string':
  153. return ToolParameter.ToolParameterType.STRING
  154. @staticmethod
  155. def parse_openapi_yaml_to_tool_bundle(yaml: str, extra_info: dict = None, warning: dict = None) -> list[ApiBasedToolBundle]:
  156. """
  157. parse openapi yaml to tool bundle
  158. :param yaml: the yaml string
  159. :return: the tool bundle
  160. """
  161. warning = warning if warning is not None else {}
  162. extra_info = extra_info if extra_info is not None else {}
  163. openapi: dict = safe_load(yaml)
  164. if openapi is None:
  165. raise ToolApiSchemaError('Invalid openapi yaml.')
  166. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
  167. @staticmethod
  168. def parse_swagger_to_openapi(swagger: dict, extra_info: dict = None, warning: dict = None) -> dict:
  169. """
  170. parse swagger to openapi
  171. :param swagger: the swagger dict
  172. :return: the openapi dict
  173. """
  174. # convert swagger to openapi
  175. info = swagger.get('info', {
  176. 'title': 'Swagger',
  177. 'description': 'Swagger',
  178. 'version': '1.0.0'
  179. })
  180. servers = swagger.get('servers', [])
  181. if len(servers) == 0:
  182. raise ToolApiSchemaError('No server found in the swagger yaml.')
  183. openapi = {
  184. 'openapi': '3.0.0',
  185. 'info': {
  186. 'title': info.get('title', 'Swagger'),
  187. 'description': info.get('description', 'Swagger'),
  188. 'version': info.get('version', '1.0.0')
  189. },
  190. 'servers': swagger['servers'],
  191. 'paths': {},
  192. 'components': {
  193. 'schemas': {}
  194. }
  195. }
  196. # check paths
  197. if 'paths' not in swagger or len(swagger['paths']) == 0:
  198. raise ToolApiSchemaError('No paths found in the swagger yaml.')
  199. # convert paths
  200. for path, path_item in swagger['paths'].items():
  201. openapi['paths'][path] = {}
  202. for method, operation in path_item.items():
  203. if 'operationId' not in operation:
  204. raise ToolApiSchemaError(f'No operationId found in operation {method} {path}.')
  205. if ('summary' not in operation or len(operation['summary']) == 0) and \
  206. ('description' not in operation or len(operation['description']) == 0):
  207. warning['missing_summary'] = f'No summary or description found in operation {method} {path}.'
  208. openapi['paths'][path][method] = {
  209. 'operationId': operation['operationId'],
  210. 'summary': operation.get('summary', ''),
  211. 'description': operation.get('description', ''),
  212. 'parameters': operation.get('parameters', []),
  213. 'responses': operation.get('responses', {}),
  214. }
  215. if 'requestBody' in operation:
  216. openapi['paths'][path][method]['requestBody'] = operation['requestBody']
  217. # convert definitions
  218. for name, definition in swagger['definitions'].items():
  219. openapi['components']['schemas'][name] = definition
  220. return openapi
  221. @staticmethod
  222. def parse_openai_plugin_json_to_tool_bundle(json: str, extra_info: dict = None, warning: dict = None) -> list[ApiBasedToolBundle]:
  223. """
  224. parse openapi plugin yaml to tool bundle
  225. :param json: the json string
  226. :return: the tool bundle
  227. """
  228. warning = warning if warning is not None else {}
  229. extra_info = extra_info if extra_info is not None else {}
  230. try:
  231. openai_plugin = json_loads(json)
  232. api = openai_plugin['api']
  233. api_url = api['url']
  234. api_type = api['type']
  235. except:
  236. raise ToolProviderNotFoundError('Invalid openai plugin json.')
  237. if api_type != 'openapi':
  238. raise ToolNotSupportedError('Only openapi is supported now.')
  239. # get openapi yaml
  240. response = get(api_url, headers={
  241. 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) '
  242. }, timeout=5)
  243. if response.status_code != 200:
  244. raise ToolProviderNotFoundError('cannot get openapi yaml from url.')
  245. return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(response.text, extra_info=extra_info, warning=warning)
  246. @staticmethod
  247. def auto_parse_to_tool_bundle(content: str, extra_info: dict = None, warning: dict = None) -> tuple[list[ApiBasedToolBundle], str]:
  248. """
  249. auto parse to tool bundle
  250. :param content: the content
  251. :return: tools bundle, schema_type
  252. """
  253. warning = warning if warning is not None else {}
  254. extra_info = extra_info if extra_info is not None else {}
  255. content = content.strip()
  256. loaded_content = None
  257. json_error = None
  258. yaml_error = None
  259. try:
  260. loaded_content = json_loads(content)
  261. except JSONDecodeError as e:
  262. json_error = e
  263. if loaded_content is None:
  264. try:
  265. loaded_content = safe_load(content)
  266. except YAMLError as e:
  267. yaml_error = e
  268. if loaded_content is None:
  269. raise ToolApiSchemaError(f'Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)}, yaml error: {str(yaml_error)}')
  270. swagger_error = None
  271. openapi_error = None
  272. openapi_plugin_error = None
  273. schema_type = None
  274. try:
  275. openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(loaded_content, extra_info=extra_info, warning=warning)
  276. schema_type = ApiProviderSchemaType.OPENAPI.value
  277. return openapi, schema_type
  278. except ToolApiSchemaError as e:
  279. openapi_error = e
  280. # openai parse error, fallback to swagger
  281. try:
  282. converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(loaded_content, extra_info=extra_info, warning=warning)
  283. schema_type = ApiProviderSchemaType.SWAGGER.value
  284. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(converted_swagger, extra_info=extra_info, warning=warning), schema_type
  285. except ToolApiSchemaError as e:
  286. swagger_error = e
  287. # swagger parse error, fallback to openai plugin
  288. try:
  289. openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(json_dumps(loaded_content), extra_info=extra_info, warning=warning)
  290. return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
  291. except ToolNotSupportedError as e:
  292. # maybe it's not plugin at all
  293. openapi_plugin_error = e
  294. raise ToolApiSchemaError(f'Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)}, openapi plugin error: {str(openapi_plugin_error)}')