app_model_config_service.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. import re
  2. import uuid
  3. from core.agent.agent_executor import PlanningStrategy
  4. from core.constant import llm_constant
  5. from models.account import Account
  6. from services.dataset_service import DatasetService
  7. from core.llm.llm_builder import LLMBuilder
  8. MODEL_PROVIDERS = [
  9. 'openai',
  10. 'anthropic',
  11. ]
  12. MODELS_BY_APP_MODE = {
  13. 'chat': [
  14. 'claude-instant-1',
  15. 'claude-2',
  16. 'gpt-4',
  17. 'gpt-4-32k',
  18. 'gpt-3.5-turbo',
  19. 'gpt-3.5-turbo-16k',
  20. ],
  21. 'completion': [
  22. 'claude-instant-1',
  23. 'claude-2',
  24. 'gpt-4',
  25. 'gpt-4-32k',
  26. 'gpt-3.5-turbo',
  27. 'gpt-3.5-turbo-16k',
  28. 'text-davinci-003',
  29. ]
  30. }
  31. SUPPORT_AGENT_MODELS = [
  32. "gpt-4",
  33. "gpt-4-32k",
  34. "gpt-3.5-turbo",
  35. "gpt-3.5-turbo-16k",
  36. ]
  37. SUPPORT_TOOLS = ["dataset", "google_search", "web_reader", "wikipedia", "current_datetime"]
  38. class AppModelConfigService:
  39. @staticmethod
  40. def is_dataset_exists(account: Account, dataset_id: str) -> bool:
  41. # verify if the dataset ID exists
  42. dataset = DatasetService.get_dataset(dataset_id)
  43. if not dataset:
  44. return False
  45. if dataset.tenant_id != account.current_tenant_id:
  46. return False
  47. return True
  48. @staticmethod
  49. def validate_model_completion_params(cp: dict, model_name: str) -> dict:
  50. # 6. model.completion_params
  51. if not isinstance(cp, dict):
  52. raise ValueError("model.completion_params must be of object type")
  53. # max_tokens
  54. if 'max_tokens' not in cp:
  55. cp["max_tokens"] = 512
  56. if not isinstance(cp["max_tokens"], int) or cp["max_tokens"] <= 0 or cp["max_tokens"] > \
  57. llm_constant.max_context_token_length[model_name]:
  58. raise ValueError(
  59. "max_tokens must be an integer greater than 0 "
  60. "and not exceeding the maximum value of the corresponding model")
  61. # temperature
  62. if 'temperature' not in cp:
  63. cp["temperature"] = 1
  64. if not isinstance(cp["temperature"], (float, int)) or cp["temperature"] < 0 or cp["temperature"] > 2:
  65. raise ValueError("temperature must be a float between 0 and 2")
  66. # top_p
  67. if 'top_p' not in cp:
  68. cp["top_p"] = 1
  69. if not isinstance(cp["top_p"], (float, int)) or cp["top_p"] < 0 or cp["top_p"] > 2:
  70. raise ValueError("top_p must be a float between 0 and 2")
  71. # presence_penalty
  72. if 'presence_penalty' not in cp:
  73. cp["presence_penalty"] = 0
  74. if not isinstance(cp["presence_penalty"], (float, int)) or cp["presence_penalty"] < -2 or cp["presence_penalty"] > 2:
  75. raise ValueError("presence_penalty must be a float between -2 and 2")
  76. # presence_penalty
  77. if 'frequency_penalty' not in cp:
  78. cp["frequency_penalty"] = 0
  79. if not isinstance(cp["frequency_penalty"], (float, int)) or cp["frequency_penalty"] < -2 or cp["frequency_penalty"] > 2:
  80. raise ValueError("frequency_penalty must be a float between -2 and 2")
  81. # Filter out extra parameters
  82. filtered_cp = {
  83. "max_tokens": cp["max_tokens"],
  84. "temperature": cp["temperature"],
  85. "top_p": cp["top_p"],
  86. "presence_penalty": cp["presence_penalty"],
  87. "frequency_penalty": cp["frequency_penalty"]
  88. }
  89. return filtered_cp
  90. @staticmethod
  91. def validate_configuration(account: Account, config: dict, mode: str) -> dict:
  92. # opening_statement
  93. if 'opening_statement' not in config or not config["opening_statement"]:
  94. config["opening_statement"] = ""
  95. if not isinstance(config["opening_statement"], str):
  96. raise ValueError("opening_statement must be of string type")
  97. # suggested_questions
  98. if 'suggested_questions' not in config or not config["suggested_questions"]:
  99. config["suggested_questions"] = []
  100. if not isinstance(config["suggested_questions"], list):
  101. raise ValueError("suggested_questions must be of list type")
  102. for question in config["suggested_questions"]:
  103. if not isinstance(question, str):
  104. raise ValueError("Elements in suggested_questions list must be of string type")
  105. # suggested_questions_after_answer
  106. if 'suggested_questions_after_answer' not in config or not config["suggested_questions_after_answer"]:
  107. config["suggested_questions_after_answer"] = {
  108. "enabled": False
  109. }
  110. if not isinstance(config["suggested_questions_after_answer"], dict):
  111. raise ValueError("suggested_questions_after_answer must be of dict type")
  112. if "enabled" not in config["suggested_questions_after_answer"] or not config["suggested_questions_after_answer"]["enabled"]:
  113. config["suggested_questions_after_answer"]["enabled"] = False
  114. if not isinstance(config["suggested_questions_after_answer"]["enabled"], bool):
  115. raise ValueError("enabled in suggested_questions_after_answer must be of boolean type")
  116. # speech_to_text
  117. if 'speech_to_text' not in config or not config["speech_to_text"]:
  118. config["speech_to_text"] = {
  119. "enabled": False
  120. }
  121. if not isinstance(config["speech_to_text"], dict):
  122. raise ValueError("speech_to_text must be of dict type")
  123. if "enabled" not in config["speech_to_text"] or not config["speech_to_text"]["enabled"]:
  124. config["speech_to_text"]["enabled"] = False
  125. if not isinstance(config["speech_to_text"]["enabled"], bool):
  126. raise ValueError("enabled in speech_to_text must be of boolean type")
  127. # more_like_this
  128. if 'more_like_this' not in config or not config["more_like_this"]:
  129. config["more_like_this"] = {
  130. "enabled": False
  131. }
  132. if not isinstance(config["more_like_this"], dict):
  133. raise ValueError("more_like_this must be of dict type")
  134. if "enabled" not in config["more_like_this"] or not config["more_like_this"]["enabled"]:
  135. config["more_like_this"]["enabled"] = False
  136. if not isinstance(config["more_like_this"]["enabled"], bool):
  137. raise ValueError("enabled in more_like_this must be of boolean type")
  138. # sensitive_word_avoidance
  139. if 'sensitive_word_avoidance' not in config or not config["sensitive_word_avoidance"]:
  140. config["sensitive_word_avoidance"] = {
  141. "enabled": False
  142. }
  143. if not isinstance(config["sensitive_word_avoidance"], dict):
  144. raise ValueError("sensitive_word_avoidance must be of dict type")
  145. if "enabled" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["enabled"]:
  146. config["sensitive_word_avoidance"]["enabled"] = False
  147. if not isinstance(config["sensitive_word_avoidance"]["enabled"], bool):
  148. raise ValueError("enabled in sensitive_word_avoidance must be of boolean type")
  149. if "words" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["words"]:
  150. config["sensitive_word_avoidance"]["words"] = ""
  151. if not isinstance(config["sensitive_word_avoidance"]["words"], str):
  152. raise ValueError("words in sensitive_word_avoidance must be of string type")
  153. if "canned_response" not in config["sensitive_word_avoidance"] or not config["sensitive_word_avoidance"]["canned_response"]:
  154. config["sensitive_word_avoidance"]["canned_response"] = ""
  155. if not isinstance(config["sensitive_word_avoidance"]["canned_response"], str):
  156. raise ValueError("canned_response in sensitive_word_avoidance must be of string type")
  157. # model
  158. if 'model' not in config:
  159. raise ValueError("model is required")
  160. if not isinstance(config["model"], dict):
  161. raise ValueError("model must be of object type")
  162. # model.provider
  163. if 'provider' not in config["model"] or config["model"]["provider"] not in MODEL_PROVIDERS:
  164. raise ValueError(f"model.provider is required and must be in {str(MODEL_PROVIDERS)}")
  165. # model.name
  166. if 'name' not in config["model"]:
  167. raise ValueError("model.name is required")
  168. if config["model"]["name"] not in MODELS_BY_APP_MODE[mode]:
  169. raise ValueError("model.name must be in the specified model list")
  170. # model.completion_params
  171. if 'completion_params' not in config["model"]:
  172. raise ValueError("model.completion_params is required")
  173. config["model"]["completion_params"] = AppModelConfigService.validate_model_completion_params(
  174. config["model"]["completion_params"],
  175. config["model"]["name"]
  176. )
  177. # user_input_form
  178. if "user_input_form" not in config or not config["user_input_form"]:
  179. config["user_input_form"] = []
  180. if not isinstance(config["user_input_form"], list):
  181. raise ValueError("user_input_form must be a list of objects")
  182. variables = []
  183. for item in config["user_input_form"]:
  184. key = list(item.keys())[0]
  185. if key not in ["text-input", "select"]:
  186. raise ValueError("Keys in user_input_form list can only be 'text-input' or 'select'")
  187. form_item = item[key]
  188. if 'label' not in form_item:
  189. raise ValueError("label is required in user_input_form")
  190. if not isinstance(form_item["label"], str):
  191. raise ValueError("label in user_input_form must be of string type")
  192. if 'variable' not in form_item:
  193. raise ValueError("variable is required in user_input_form")
  194. if not isinstance(form_item["variable"], str):
  195. raise ValueError("variable in user_input_form must be of string type")
  196. pattern = re.compile(r"^(?!\d)[\u4e00-\u9fa5A-Za-z0-9_\U0001F300-\U0001F64F\U0001F680-\U0001F6FF]{1,100}$")
  197. if pattern.match(form_item["variable"]) is None:
  198. raise ValueError("variable in user_input_form must be a string, "
  199. "and cannot start with a number")
  200. variables.append(form_item["variable"])
  201. if 'required' not in form_item or not form_item["required"]:
  202. form_item["required"] = False
  203. if not isinstance(form_item["required"], bool):
  204. raise ValueError("required in user_input_form must be of boolean type")
  205. if key == "select":
  206. if 'options' not in form_item or not form_item["options"]:
  207. form_item["options"] = []
  208. if not isinstance(form_item["options"], list):
  209. raise ValueError("options in user_input_form must be a list of strings")
  210. if "default" in form_item and form_item['default'] \
  211. and form_item["default"] not in form_item["options"]:
  212. raise ValueError("default value in user_input_form must be in the options list")
  213. # pre_prompt
  214. if "pre_prompt" not in config or not config["pre_prompt"]:
  215. config["pre_prompt"] = ""
  216. if not isinstance(config["pre_prompt"], str):
  217. raise ValueError("pre_prompt must be of string type")
  218. template_vars = re.findall(r"\{\{(\w+)\}\}", config["pre_prompt"])
  219. for var in template_vars:
  220. if var not in variables:
  221. raise ValueError("Template variables in pre_prompt must be defined in user_input_form")
  222. # agent_mode
  223. if "agent_mode" not in config or not config["agent_mode"]:
  224. config["agent_mode"] = {
  225. "enabled": False,
  226. "tools": []
  227. }
  228. if not isinstance(config["agent_mode"], dict):
  229. raise ValueError("agent_mode must be of object type")
  230. if "enabled" not in config["agent_mode"] or not config["agent_mode"]["enabled"]:
  231. config["agent_mode"]["enabled"] = False
  232. if not isinstance(config["agent_mode"]["enabled"], bool):
  233. raise ValueError("enabled in agent_mode must be of boolean type")
  234. if "strategy" not in config["agent_mode"] or not config["agent_mode"]["strategy"]:
  235. config["agent_mode"]["strategy"] = PlanningStrategy.ROUTER.value
  236. if config["agent_mode"]["strategy"] not in [member.value for member in list(PlanningStrategy.__members__.values())]:
  237. raise ValueError("strategy in agent_mode must be in the specified strategy list")
  238. if "tools" not in config["agent_mode"] or not config["agent_mode"]["tools"]:
  239. config["agent_mode"]["tools"] = []
  240. if not isinstance(config["agent_mode"]["tools"], list):
  241. raise ValueError("tools in agent_mode must be a list of objects")
  242. for tool in config["agent_mode"]["tools"]:
  243. key = list(tool.keys())[0]
  244. if key not in SUPPORT_TOOLS:
  245. raise ValueError("Keys in agent_mode.tools must be in the specified tool list")
  246. tool_item = tool[key]
  247. if "enabled" not in tool_item or not tool_item["enabled"]:
  248. tool_item["enabled"] = False
  249. if not isinstance(tool_item["enabled"], bool):
  250. raise ValueError("enabled in agent_mode.tools must be of boolean type")
  251. if key == "dataset":
  252. if 'id' not in tool_item:
  253. raise ValueError("id is required in dataset")
  254. try:
  255. uuid.UUID(tool_item["id"])
  256. except ValueError:
  257. raise ValueError("id in dataset must be of UUID type")
  258. if not AppModelConfigService.is_dataset_exists(account, tool_item["id"]):
  259. raise ValueError("Dataset ID does not exist, please check your permission.")
  260. # Filter out extra parameters
  261. filtered_config = {
  262. "opening_statement": config["opening_statement"],
  263. "suggested_questions": config["suggested_questions"],
  264. "suggested_questions_after_answer": config["suggested_questions_after_answer"],
  265. "speech_to_text": config["speech_to_text"],
  266. "more_like_this": config["more_like_this"],
  267. "sensitive_word_avoidance": config["sensitive_word_avoidance"],
  268. "model": {
  269. "provider": config["model"]["provider"],
  270. "name": config["model"]["name"],
  271. "completion_params": config["model"]["completion_params"]
  272. },
  273. "user_input_form": config["user_input_form"],
  274. "pre_prompt": config["pre_prompt"],
  275. "agent_mode": config["agent_mode"]
  276. }
  277. return filtered_config