app_model_config_service.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527
  1. import re
  2. import uuid
  3. from core.entities.agent_entities import PlanningStrategy
  4. from core.external_data_tool.factory import ExternalDataToolFactory
  5. from core.model_runtime.entities.model_entities import ModelPropertyKey, ModelType
  6. from core.model_runtime.model_providers import model_provider_factory
  7. from core.moderation.factory import ModerationFactory
  8. from core.prompt.prompt_transform import AppMode
  9. from core.provider_manager import ProviderManager
  10. from models.account import Account
  11. from services.dataset_service import DatasetService
  12. SUPPORT_TOOLS = ["dataset", "google_search", "web_reader", "wikipedia", "current_datetime"]
  13. class AppModelConfigService:
  14. @classmethod
  15. def is_dataset_exists(cls, account: Account, dataset_id: str) -> bool:
  16. # verify if the dataset ID exists
  17. dataset = DatasetService.get_dataset(dataset_id)
  18. if not dataset:
  19. return False
  20. if dataset.tenant_id != account.current_tenant_id:
  21. return False
  22. return True
  23. @classmethod
  24. def validate_model_completion_params(cls, cp: dict, model_name: str) -> dict:
  25. # 6. model.completion_params
  26. if not isinstance(cp, dict):
  27. raise ValueError("model.completion_params must be of object type")
  28. # stop
  29. if 'stop' not in cp:
  30. cp["stop"] = []
  31. elif not isinstance(cp["stop"], list):
  32. raise ValueError("stop in model.completion_params must be of list type")
  33. if len(cp["stop"]) > 4:
  34. raise ValueError("stop sequences must be less than 4")
  35. return cp
  36. @classmethod
  37. def validate_configuration(cls, tenant_id: str, account: Account, config: dict, app_mode: str) -> dict:
  38. # opening_statement
  39. if 'opening_statement' not in config or not config["opening_statement"]:
  40. config["opening_statement"] = ""
  41. if not isinstance(config["opening_statement"], str):
  42. raise ValueError("opening_statement must be of string type")
  43. # suggested_questions
  44. if 'suggested_questions' not in config or not config["suggested_questions"]:
  45. config["suggested_questions"] = []
  46. if not isinstance(config["suggested_questions"], list):
  47. raise ValueError("suggested_questions must be of list type")
  48. for question in config["suggested_questions"]:
  49. if not isinstance(question, str):
  50. raise ValueError("Elements in suggested_questions list must be of string type")
  51. # suggested_questions_after_answer
  52. if 'suggested_questions_after_answer' not in config or not config["suggested_questions_after_answer"]:
  53. config["suggested_questions_after_answer"] = {
  54. "enabled": False
  55. }
  56. if not isinstance(config["suggested_questions_after_answer"], dict):
  57. raise ValueError("suggested_questions_after_answer must be of dict type")
  58. if "enabled" not in config["suggested_questions_after_answer"] or not config["suggested_questions_after_answer"]["enabled"]:
  59. config["suggested_questions_after_answer"]["enabled"] = False
  60. if not isinstance(config["suggested_questions_after_answer"]["enabled"], bool):
  61. raise ValueError("enabled in suggested_questions_after_answer must be of boolean type")
  62. # speech_to_text
  63. if 'speech_to_text' not in config or not config["speech_to_text"]:
  64. config["speech_to_text"] = {
  65. "enabled": False
  66. }
  67. if not isinstance(config["speech_to_text"], dict):
  68. raise ValueError("speech_to_text must be of dict type")
  69. if "enabled" not in config["speech_to_text"] or not config["speech_to_text"]["enabled"]:
  70. config["speech_to_text"]["enabled"] = False
  71. if not isinstance(config["speech_to_text"]["enabled"], bool):
  72. raise ValueError("enabled in speech_to_text must be of boolean type")
  73. # text_to_speech
  74. if 'text_to_speech' not in config or not config["text_to_speech"]:
  75. config["text_to_speech"] = {
  76. "enabled": False,
  77. "voice": "",
  78. "language": ""
  79. }
  80. if not isinstance(config["text_to_speech"], dict):
  81. raise ValueError("text_to_speech must be of dict type")
  82. if "enabled" not in config["text_to_speech"] or not config["text_to_speech"]["enabled"]:
  83. config["text_to_speech"]["enabled"] = False
  84. config["text_to_speech"]["voice"] = ""
  85. config["text_to_speech"]["language"] = ""
  86. if not isinstance(config["text_to_speech"]["enabled"], bool):
  87. raise ValueError("enabled in text_to_speech must be of boolean type")
  88. # return retriever resource
  89. if 'retriever_resource' not in config or not config["retriever_resource"]:
  90. config["retriever_resource"] = {
  91. "enabled": False
  92. }
  93. if not isinstance(config["retriever_resource"], dict):
  94. raise ValueError("retriever_resource must be of dict type")
  95. if "enabled" not in config["retriever_resource"] or not config["retriever_resource"]["enabled"]:
  96. config["retriever_resource"]["enabled"] = False
  97. if not isinstance(config["retriever_resource"]["enabled"], bool):
  98. raise ValueError("enabled in retriever_resource must be of boolean type")
  99. # more_like_this
  100. if 'more_like_this' not in config or not config["more_like_this"]:
  101. config["more_like_this"] = {
  102. "enabled": False
  103. }
  104. if not isinstance(config["more_like_this"], dict):
  105. raise ValueError("more_like_this must be of dict type")
  106. if "enabled" not in config["more_like_this"] or not config["more_like_this"]["enabled"]:
  107. config["more_like_this"]["enabled"] = False
  108. if not isinstance(config["more_like_this"]["enabled"], bool):
  109. raise ValueError("enabled in more_like_this must be of boolean type")
  110. # model
  111. if 'model' not in config:
  112. raise ValueError("model is required")
  113. if not isinstance(config["model"], dict):
  114. raise ValueError("model must be of object type")
  115. # model.provider
  116. provider_entities = model_provider_factory.get_providers()
  117. model_provider_names = [provider.provider for provider in provider_entities]
  118. if 'provider' not in config["model"] or config["model"]["provider"] not in model_provider_names:
  119. raise ValueError(f"model.provider is required and must be in {str(model_provider_names)}")
  120. # model.name
  121. if 'name' not in config["model"]:
  122. raise ValueError("model.name is required")
  123. provider_manager = ProviderManager()
  124. models = provider_manager.get_configurations(tenant_id).get_models(
  125. provider=config["model"]["provider"],
  126. model_type=ModelType.LLM
  127. )
  128. if not models:
  129. raise ValueError("model.name must be in the specified model list")
  130. model_ids = [m.model for m in models]
  131. if config["model"]["name"] not in model_ids:
  132. raise ValueError("model.name must be in the specified model list")
  133. model_mode = None
  134. for model in models:
  135. if model.model == config["model"]["name"]:
  136. model_mode = model.model_properties.get(ModelPropertyKey.MODE)
  137. break
  138. # model.mode
  139. if model_mode:
  140. config['model']["mode"] = model_mode
  141. else:
  142. config['model']["mode"] = "completion"
  143. # model.completion_params
  144. if 'completion_params' not in config["model"]:
  145. raise ValueError("model.completion_params is required")
  146. config["model"]["completion_params"] = cls.validate_model_completion_params(
  147. config["model"]["completion_params"],
  148. config["model"]["name"]
  149. )
  150. # user_input_form
  151. if "user_input_form" not in config or not config["user_input_form"]:
  152. config["user_input_form"] = []
  153. if not isinstance(config["user_input_form"], list):
  154. raise ValueError("user_input_form must be a list of objects")
  155. variables = []
  156. for item in config["user_input_form"]:
  157. key = list(item.keys())[0]
  158. if key not in ["text-input", "select", "paragraph", "external_data_tool"]:
  159. raise ValueError("Keys in user_input_form list can only be 'text-input', 'paragraph' or 'select'")
  160. form_item = item[key]
  161. if 'label' not in form_item:
  162. raise ValueError("label is required in user_input_form")
  163. if not isinstance(form_item["label"], str):
  164. raise ValueError("label in user_input_form must be of string type")
  165. if 'variable' not in form_item:
  166. raise ValueError("variable is required in user_input_form")
  167. if not isinstance(form_item["variable"], str):
  168. raise ValueError("variable in user_input_form must be of string type")
  169. pattern = re.compile(r"^(?!\d)[\u4e00-\u9fa5A-Za-z0-9_\U0001F300-\U0001F64F\U0001F680-\U0001F6FF]{1,100}$")
  170. if pattern.match(form_item["variable"]) is None:
  171. raise ValueError("variable in user_input_form must be a string, "
  172. "and cannot start with a number")
  173. variables.append(form_item["variable"])
  174. if 'required' not in form_item or not form_item["required"]:
  175. form_item["required"] = False
  176. if not isinstance(form_item["required"], bool):
  177. raise ValueError("required in user_input_form must be of boolean type")
  178. if key == "select":
  179. if 'options' not in form_item or not form_item["options"]:
  180. form_item["options"] = []
  181. if not isinstance(form_item["options"], list):
  182. raise ValueError("options in user_input_form must be a list of strings")
  183. if "default" in form_item and form_item['default'] \
  184. and form_item["default"] not in form_item["options"]:
  185. raise ValueError("default value in user_input_form must be in the options list")
  186. # pre_prompt
  187. if "pre_prompt" not in config or not config["pre_prompt"]:
  188. config["pre_prompt"] = ""
  189. if not isinstance(config["pre_prompt"], str):
  190. raise ValueError("pre_prompt must be of string type")
  191. # agent_mode
  192. if "agent_mode" not in config or not config["agent_mode"]:
  193. config["agent_mode"] = {
  194. "enabled": False,
  195. "tools": []
  196. }
  197. if not isinstance(config["agent_mode"], dict):
  198. raise ValueError("agent_mode must be of object type")
  199. if "enabled" not in config["agent_mode"] or not config["agent_mode"]["enabled"]:
  200. config["agent_mode"]["enabled"] = False
  201. if not isinstance(config["agent_mode"]["enabled"], bool):
  202. raise ValueError("enabled in agent_mode must be of boolean type")
  203. if "strategy" not in config["agent_mode"] or not config["agent_mode"]["strategy"]:
  204. config["agent_mode"]["strategy"] = PlanningStrategy.ROUTER.value
  205. if config["agent_mode"]["strategy"] not in [member.value for member in list(PlanningStrategy.__members__.values())]:
  206. raise ValueError("strategy in agent_mode must be in the specified strategy list")
  207. if "tools" not in config["agent_mode"] or not config["agent_mode"]["tools"]:
  208. config["agent_mode"]["tools"] = []
  209. if not isinstance(config["agent_mode"]["tools"], list):
  210. raise ValueError("tools in agent_mode must be a list of objects")
  211. for tool in config["agent_mode"]["tools"]:
  212. key = list(tool.keys())[0]
  213. if key in SUPPORT_TOOLS:
  214. # old style, use tool name as key
  215. tool_item = tool[key]
  216. if "enabled" not in tool_item or not tool_item["enabled"]:
  217. tool_item["enabled"] = False
  218. if not isinstance(tool_item["enabled"], bool):
  219. raise ValueError("enabled in agent_mode.tools must be of boolean type")
  220. if key == "dataset":
  221. if 'id' not in tool_item:
  222. raise ValueError("id is required in dataset")
  223. try:
  224. uuid.UUID(tool_item["id"])
  225. except ValueError:
  226. raise ValueError("id in dataset must be of UUID type")
  227. if not cls.is_dataset_exists(account, tool_item["id"]):
  228. raise ValueError("Dataset ID does not exist, please check your permission.")
  229. else:
  230. # latest style, use key-value pair
  231. if "enabled" not in tool or not tool["enabled"]:
  232. tool["enabled"] = False
  233. if "provider_type" not in tool:
  234. raise ValueError("provider_type is required in agent_mode.tools")
  235. if "provider_id" not in tool:
  236. raise ValueError("provider_id is required in agent_mode.tools")
  237. if "tool_name" not in tool:
  238. raise ValueError("tool_name is required in agent_mode.tools")
  239. if "tool_parameters" not in tool:
  240. raise ValueError("tool_parameters is required in agent_mode.tools")
  241. # dataset_query_variable
  242. cls.is_dataset_query_variable_valid(config, app_mode)
  243. # advanced prompt validation
  244. cls.is_advanced_prompt_valid(config, app_mode)
  245. # external data tools validation
  246. cls.is_external_data_tools_valid(tenant_id, config)
  247. # moderation validation
  248. cls.is_moderation_valid(tenant_id, config)
  249. # file upload validation
  250. cls.is_file_upload_valid(config)
  251. # Filter out extra parameters
  252. filtered_config = {
  253. "opening_statement": config["opening_statement"],
  254. "suggested_questions": config["suggested_questions"],
  255. "suggested_questions_after_answer": config["suggested_questions_after_answer"],
  256. "speech_to_text": config["speech_to_text"],
  257. "text_to_speech": config["text_to_speech"],
  258. "retriever_resource": config["retriever_resource"],
  259. "more_like_this": config["more_like_this"],
  260. "sensitive_word_avoidance": config["sensitive_word_avoidance"],
  261. "external_data_tools": config["external_data_tools"],
  262. "model": {
  263. "provider": config["model"]["provider"],
  264. "name": config["model"]["name"],
  265. "mode": config['model']["mode"],
  266. "completion_params": config["model"]["completion_params"]
  267. },
  268. "user_input_form": config["user_input_form"],
  269. "dataset_query_variable": config.get('dataset_query_variable'),
  270. "pre_prompt": config["pre_prompt"],
  271. "agent_mode": config["agent_mode"],
  272. "prompt_type": config["prompt_type"],
  273. "chat_prompt_config": config["chat_prompt_config"],
  274. "completion_prompt_config": config["completion_prompt_config"],
  275. "dataset_configs": config["dataset_configs"],
  276. "file_upload": config["file_upload"]
  277. }
  278. return filtered_config
  279. @classmethod
  280. def is_moderation_valid(cls, tenant_id: str, config: dict):
  281. if 'sensitive_word_avoidance' not in config or not config["sensitive_word_avoidance"]:
  282. config["sensitive_word_avoidance"] = {
  283. "enabled": False
  284. }
  285. if not isinstance(config["sensitive_word_avoidance"], dict):
  286. raise ValueError("sensitive_word_avoidance must be of dict type")
  287. if "enabled" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["enabled"]:
  288. config["sensitive_word_avoidance"]["enabled"] = False
  289. if not config["sensitive_word_avoidance"]["enabled"]:
  290. return
  291. if "type" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["type"]:
  292. raise ValueError("sensitive_word_avoidance.type is required")
  293. type = config["sensitive_word_avoidance"]["type"]
  294. config = config["sensitive_word_avoidance"]["config"]
  295. ModerationFactory.validate_config(
  296. name=type,
  297. tenant_id=tenant_id,
  298. config=config
  299. )
  300. @classmethod
  301. def is_file_upload_valid(cls, config: dict):
  302. if 'file_upload' not in config or not config["file_upload"]:
  303. config["file_upload"] = {}
  304. if not isinstance(config["file_upload"], dict):
  305. raise ValueError("file_upload must be of dict type")
  306. # check image config
  307. if 'image' not in config["file_upload"] or not config["file_upload"]["image"]:
  308. config["file_upload"]["image"] = {"enabled": False}
  309. if config['file_upload']['image']['enabled']:
  310. number_limits = config['file_upload']['image']['number_limits']
  311. if number_limits < 1 or number_limits > 6:
  312. raise ValueError("number_limits must be in [1, 6]")
  313. detail = config['file_upload']['image']['detail']
  314. if detail not in ['high', 'low']:
  315. raise ValueError("detail must be in ['high', 'low']")
  316. transfer_methods = config['file_upload']['image']['transfer_methods']
  317. if not isinstance(transfer_methods, list):
  318. raise ValueError("transfer_methods must be of list type")
  319. for method in transfer_methods:
  320. if method not in ['remote_url', 'local_file']:
  321. raise ValueError("transfer_methods must be in ['remote_url', 'local_file']")
  322. @classmethod
  323. def is_external_data_tools_valid(cls, tenant_id: str, config: dict):
  324. if 'external_data_tools' not in config or not config["external_data_tools"]:
  325. config["external_data_tools"] = []
  326. if not isinstance(config["external_data_tools"], list):
  327. raise ValueError("external_data_tools must be of list type")
  328. for tool in config["external_data_tools"]:
  329. if "enabled" not in tool or not tool["enabled"]:
  330. tool["enabled"] = False
  331. if not tool["enabled"]:
  332. continue
  333. if "type" not in tool or not tool["type"]:
  334. raise ValueError("external_data_tools[].type is required")
  335. type = tool["type"]
  336. config = tool["config"]
  337. ExternalDataToolFactory.validate_config(
  338. name=type,
  339. tenant_id=tenant_id,
  340. config=config
  341. )
  342. @classmethod
  343. def is_dataset_query_variable_valid(cls, config: dict, mode: str) -> None:
  344. # Only check when mode is completion
  345. if mode != 'completion':
  346. return
  347. agent_mode = config.get("agent_mode", {})
  348. tools = agent_mode.get("tools", [])
  349. dataset_exists = "dataset" in str(tools)
  350. dataset_query_variable = config.get("dataset_query_variable")
  351. if dataset_exists and not dataset_query_variable:
  352. raise ValueError("Dataset query variable is required when dataset is exist")
  353. @classmethod
  354. def is_advanced_prompt_valid(cls, config: dict, app_mode: str) -> None:
  355. # prompt_type
  356. if 'prompt_type' not in config or not config["prompt_type"]:
  357. config["prompt_type"] = "simple"
  358. if config['prompt_type'] not in ['simple', 'advanced']:
  359. raise ValueError("prompt_type must be in ['simple', 'advanced']")
  360. # chat_prompt_config
  361. if 'chat_prompt_config' not in config or not config["chat_prompt_config"]:
  362. config["chat_prompt_config"] = {}
  363. if not isinstance(config["chat_prompt_config"], dict):
  364. raise ValueError("chat_prompt_config must be of object type")
  365. # completion_prompt_config
  366. if 'completion_prompt_config' not in config or not config["completion_prompt_config"]:
  367. config["completion_prompt_config"] = {}
  368. if not isinstance(config["completion_prompt_config"], dict):
  369. raise ValueError("completion_prompt_config must be of object type")
  370. # dataset_configs
  371. if 'dataset_configs' not in config or not config["dataset_configs"]:
  372. config["dataset_configs"] = {'retrieval_model': 'single'}
  373. if 'datasets' not in config["dataset_configs"] or not config["dataset_configs"]["datasets"]:
  374. config["dataset_configs"]["datasets"] = {
  375. "strategy": "router",
  376. "datasets": []
  377. }
  378. if not isinstance(config["dataset_configs"], dict):
  379. raise ValueError("dataset_configs must be of object type")
  380. if config["dataset_configs"]['retrieval_model'] == 'multiple':
  381. if not config["dataset_configs"]['reranking_model']:
  382. raise ValueError("reranking_model has not been set")
  383. if not isinstance(config["dataset_configs"]['reranking_model'], dict):
  384. raise ValueError("reranking_model must be of object type")
  385. if not isinstance(config["dataset_configs"], dict):
  386. raise ValueError("dataset_configs must be of object type")
  387. if config['prompt_type'] == 'advanced':
  388. if not config['chat_prompt_config'] and not config['completion_prompt_config']:
  389. raise ValueError("chat_prompt_config or completion_prompt_config is required when prompt_type is advanced")
  390. if config['model']["mode"] not in ['chat', 'completion']:
  391. raise ValueError("model.mode must be in ['chat', 'completion'] when prompt_type is advanced")
  392. if app_mode == AppMode.CHAT.value and config['model']["mode"] == "completion":
  393. user_prefix = config['completion_prompt_config']['conversation_histories_role']['user_prefix']
  394. assistant_prefix = config['completion_prompt_config']['conversation_histories_role']['assistant_prefix']
  395. if not user_prefix:
  396. config['completion_prompt_config']['conversation_histories_role']['user_prefix'] = 'Human'
  397. if not assistant_prefix:
  398. config['completion_prompt_config']['conversation_histories_role']['assistant_prefix'] = 'Assistant'
  399. if config['model']["mode"] == "chat":
  400. prompt_list = config['chat_prompt_config']['prompt']
  401. if len(prompt_list) > 10:
  402. raise ValueError("prompt messages must be less than 10")