parser.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362
  1. import re
  2. import uuid
  3. from json import dumps as json_dumps
  4. from json import loads as json_loads
  5. from json.decoder import JSONDecodeError
  6. from typing import Optional
  7. from requests import get
  8. from yaml import YAMLError, safe_load
  9. from core.tools.entities.common_entities import I18nObject
  10. from core.tools.entities.tool_bundle import ApiToolBundle
  11. from core.tools.entities.tool_entities import ApiProviderSchemaType, ToolParameter
  12. from core.tools.errors import ToolApiSchemaError, ToolNotSupportedError, ToolProviderNotFoundError
  13. class ApiBasedToolSchemaParser:
  14. @staticmethod
  15. def parse_openapi_to_tool_bundle(
  16. openapi: dict, extra_info: Optional[dict], warning: Optional[dict]
  17. ) -> list[ApiToolBundle]:
  18. warning = warning if warning is not None else {}
  19. extra_info = extra_info if extra_info is not None else {}
  20. # set description to extra_info
  21. extra_info["description"] = openapi["info"].get("description", "")
  22. if len(openapi["servers"]) == 0:
  23. raise ToolProviderNotFoundError("No server found in the openapi yaml.")
  24. server_url = openapi["servers"][0]["url"]
  25. # list all interfaces
  26. interfaces = []
  27. for path, path_item in openapi["paths"].items():
  28. methods = ["get", "post", "put", "delete", "patch", "head", "options", "trace"]
  29. for method in methods:
  30. if method in path_item:
  31. interfaces.append(
  32. {
  33. "path": path,
  34. "method": method,
  35. "operation": path_item[method],
  36. }
  37. )
  38. # get all parameters
  39. bundles = []
  40. for interface in interfaces:
  41. # convert parameters
  42. parameters = []
  43. if "parameters" in interface["operation"]:
  44. for parameter in interface["operation"]["parameters"]:
  45. tool_parameter = ToolParameter(
  46. name=parameter["name"],
  47. label=I18nObject(en_US=parameter["name"], zh_Hans=parameter["name"]),
  48. human_description=I18nObject(
  49. en_US=parameter.get("description", ""), zh_Hans=parameter.get("description", "")
  50. ),
  51. type=ToolParameter.ToolParameterType.STRING,
  52. required=parameter.get("required", False),
  53. form=ToolParameter.ToolParameterForm.LLM,
  54. llm_description=parameter.get("description"),
  55. default=parameter["schema"]["default"]
  56. if "schema" in parameter and "default" in parameter["schema"]
  57. else None,
  58. )
  59. # check if there is a type
  60. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(parameter)
  61. if typ:
  62. tool_parameter.type = typ
  63. parameters.append(tool_parameter)
  64. # create tool bundle
  65. # check if there is a request body
  66. if "requestBody" in interface["operation"]:
  67. request_body = interface["operation"]["requestBody"]
  68. if "content" in request_body:
  69. for content_type, content in request_body["content"].items():
  70. # if there is a reference, get the reference and overwrite the content
  71. if "schema" not in content:
  72. continue
  73. if "$ref" in content["schema"]:
  74. # get the reference
  75. root = openapi
  76. reference = content["schema"]["$ref"].split("/")[1:]
  77. for ref in reference:
  78. root = root[ref]
  79. # overwrite the content
  80. interface["operation"]["requestBody"]["content"][content_type]["schema"] = root
  81. # parse body parameters
  82. if "schema" in interface["operation"]["requestBody"]["content"][content_type]:
  83. body_schema = interface["operation"]["requestBody"]["content"][content_type]["schema"]
  84. required = body_schema.get("required", [])
  85. properties = body_schema.get("properties", {})
  86. for name, property in properties.items():
  87. tool = ToolParameter(
  88. name=name,
  89. label=I18nObject(en_US=name, zh_Hans=name),
  90. human_description=I18nObject(
  91. en_US=property.get("description", ""), zh_Hans=property.get("description", "")
  92. ),
  93. type=ToolParameter.ToolParameterType.STRING,
  94. required=name in required,
  95. form=ToolParameter.ToolParameterForm.LLM,
  96. llm_description=property.get("description", ""),
  97. default=property.get("default", None),
  98. )
  99. # check if there is a type
  100. typ = ApiBasedToolSchemaParser._get_tool_parameter_type(property)
  101. if typ:
  102. tool.type = typ
  103. parameters.append(tool)
  104. # check if parameters is duplicated
  105. parameters_count = {}
  106. for parameter in parameters:
  107. if parameter.name not in parameters_count:
  108. parameters_count[parameter.name] = 0
  109. parameters_count[parameter.name] += 1
  110. for name, count in parameters_count.items():
  111. if count > 1:
  112. warning["duplicated_parameter"] = f"Parameter {name} is duplicated."
  113. # check if there is a operation id, use $path_$method as operation id if not
  114. if "operationId" not in interface["operation"]:
  115. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  116. path = interface["path"]
  117. if interface["path"].startswith("/"):
  118. path = interface["path"][1:]
  119. # remove special characters like / to ensure the operation id is valid ^[a-zA-Z0-9_-]{1,64}$
  120. path = re.sub(r"[^a-zA-Z0-9_-]", "", path)
  121. if not path:
  122. path = str(uuid.uuid4())
  123. interface["operation"]["operationId"] = f'{path}_{interface["method"]}'
  124. bundles.append(
  125. ApiToolBundle(
  126. server_url=server_url + interface["path"],
  127. method=interface["method"],
  128. summary=interface["operation"]["description"]
  129. if "description" in interface["operation"]
  130. else interface["operation"].get("summary", None),
  131. operation_id=interface["operation"]["operationId"],
  132. parameters=parameters,
  133. author="",
  134. icon=None,
  135. openapi=interface["operation"],
  136. )
  137. )
  138. return bundles
  139. @staticmethod
  140. def _get_tool_parameter_type(parameter: dict) -> ToolParameter.ToolParameterType:
  141. parameter = parameter or {}
  142. typ = None
  143. if "type" in parameter:
  144. typ = parameter["type"]
  145. elif "schema" in parameter and "type" in parameter["schema"]:
  146. typ = parameter["schema"]["type"]
  147. if typ in {"integer", "number"}:
  148. return ToolParameter.ToolParameterType.NUMBER
  149. elif typ == "boolean":
  150. return ToolParameter.ToolParameterType.BOOLEAN
  151. elif typ == "string":
  152. return ToolParameter.ToolParameterType.STRING
  153. @staticmethod
  154. def parse_openapi_yaml_to_tool_bundle(
  155. yaml: str, extra_info: Optional[dict], warning: Optional[dict]
  156. ) -> list[ApiToolBundle]:
  157. """
  158. parse openapi yaml to tool bundle
  159. :param yaml: the yaml string
  160. :return: the tool bundle
  161. """
  162. warning = warning if warning is not None else {}
  163. extra_info = extra_info if extra_info is not None else {}
  164. openapi: dict = safe_load(yaml)
  165. if openapi is None:
  166. raise ToolApiSchemaError("Invalid openapi yaml.")
  167. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(openapi, extra_info=extra_info, warning=warning)
  168. @staticmethod
  169. def parse_swagger_to_openapi(swagger: dict, extra_info: Optional[dict], warning: Optional[dict]) -> dict:
  170. """
  171. parse swagger to openapi
  172. :param swagger: the swagger dict
  173. :return: the openapi dict
  174. """
  175. # convert swagger to openapi
  176. info = swagger.get("info", {"title": "Swagger", "description": "Swagger", "version": "1.0.0"})
  177. servers = swagger.get("servers", [])
  178. if len(servers) == 0:
  179. raise ToolApiSchemaError("No server found in the swagger yaml.")
  180. openapi = {
  181. "openapi": "3.0.0",
  182. "info": {
  183. "title": info.get("title", "Swagger"),
  184. "description": info.get("description", "Swagger"),
  185. "version": info.get("version", "1.0.0"),
  186. },
  187. "servers": swagger["servers"],
  188. "paths": {},
  189. "components": {"schemas": {}},
  190. }
  191. # check paths
  192. if "paths" not in swagger or len(swagger["paths"]) == 0:
  193. raise ToolApiSchemaError("No paths found in the swagger yaml.")
  194. # convert paths
  195. for path, path_item in swagger["paths"].items():
  196. openapi["paths"][path] = {}
  197. for method, operation in path_item.items():
  198. if "operationId" not in operation:
  199. raise ToolApiSchemaError(f"No operationId found in operation {method} {path}.")
  200. if ("summary" not in operation or len(operation["summary"]) == 0) and (
  201. "description" not in operation or len(operation["description"]) == 0
  202. ):
  203. warning["missing_summary"] = f"No summary or description found in operation {method} {path}."
  204. openapi["paths"][path][method] = {
  205. "operationId": operation["operationId"],
  206. "summary": operation.get("summary", ""),
  207. "description": operation.get("description", ""),
  208. "parameters": operation.get("parameters", []),
  209. "responses": operation.get("responses", {}),
  210. }
  211. if "requestBody" in operation:
  212. openapi["paths"][path][method]["requestBody"] = operation["requestBody"]
  213. # convert definitions
  214. for name, definition in swagger["definitions"].items():
  215. openapi["components"]["schemas"][name] = definition
  216. return openapi
  217. @staticmethod
  218. def parse_openai_plugin_json_to_tool_bundle(
  219. json: str, extra_info: Optional[dict], warning: Optional[dict]
  220. ) -> list[ApiToolBundle]:
  221. """
  222. parse openapi plugin yaml to tool bundle
  223. :param json: the json string
  224. :return: the tool bundle
  225. """
  226. warning = warning if warning is not None else {}
  227. extra_info = extra_info if extra_info is not None else {}
  228. try:
  229. openai_plugin = json_loads(json)
  230. api = openai_plugin["api"]
  231. api_url = api["url"]
  232. api_type = api["type"]
  233. except:
  234. raise ToolProviderNotFoundError("Invalid openai plugin json.")
  235. if api_type != "openapi":
  236. raise ToolNotSupportedError("Only openapi is supported now.")
  237. # get openapi yaml
  238. response = get(api_url, headers={"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) "}, timeout=5)
  239. if response.status_code != 200:
  240. raise ToolProviderNotFoundError("cannot get openapi yaml from url.")
  241. return ApiBasedToolSchemaParser.parse_openapi_yaml_to_tool_bundle(
  242. response.text, extra_info=extra_info, warning=warning
  243. )
  244. @staticmethod
  245. def auto_parse_to_tool_bundle(
  246. content: str, extra_info: Optional[dict] = None, warning: Optional[dict] = None
  247. ) -> tuple[list[ApiToolBundle], str]:
  248. """
  249. auto parse to tool bundle
  250. :param content: the content
  251. :return: tools bundle, schema_type
  252. """
  253. warning = warning if warning is not None else {}
  254. extra_info = extra_info if extra_info is not None else {}
  255. content = content.strip()
  256. loaded_content = None
  257. json_error = None
  258. yaml_error = None
  259. try:
  260. loaded_content = json_loads(content)
  261. except JSONDecodeError as e:
  262. json_error = e
  263. if loaded_content is None:
  264. try:
  265. loaded_content = safe_load(content)
  266. except YAMLError as e:
  267. yaml_error = e
  268. if loaded_content is None:
  269. raise ToolApiSchemaError(
  270. f"Invalid api schema, schema is neither json nor yaml. json error: {str(json_error)},"
  271. f" yaml error: {str(yaml_error)}"
  272. )
  273. swagger_error = None
  274. openapi_error = None
  275. openapi_plugin_error = None
  276. schema_type = None
  277. try:
  278. openapi = ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  279. loaded_content, extra_info=extra_info, warning=warning
  280. )
  281. schema_type = ApiProviderSchemaType.OPENAPI.value
  282. return openapi, schema_type
  283. except ToolApiSchemaError as e:
  284. openapi_error = e
  285. # openai parse error, fallback to swagger
  286. try:
  287. converted_swagger = ApiBasedToolSchemaParser.parse_swagger_to_openapi(
  288. loaded_content, extra_info=extra_info, warning=warning
  289. )
  290. schema_type = ApiProviderSchemaType.SWAGGER.value
  291. return ApiBasedToolSchemaParser.parse_openapi_to_tool_bundle(
  292. converted_swagger, extra_info=extra_info, warning=warning
  293. ), schema_type
  294. except ToolApiSchemaError as e:
  295. swagger_error = e
  296. # swagger parse error, fallback to openai plugin
  297. try:
  298. openapi_plugin = ApiBasedToolSchemaParser.parse_openai_plugin_json_to_tool_bundle(
  299. json_dumps(loaded_content), extra_info=extra_info, warning=warning
  300. )
  301. return openapi_plugin, ApiProviderSchemaType.OPENAI_PLUGIN.value
  302. except ToolNotSupportedError as e:
  303. # maybe it's not plugin at all
  304. openapi_plugin_error = e
  305. raise ToolApiSchemaError(
  306. f"Invalid api schema, openapi error: {str(openapi_error)}, swagger error: {str(swagger_error)},"
  307. f" openapi plugin error: {str(openapi_plugin_error)}"
  308. )