app_runner.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. import time
  2. from typing import Generator, List, Optional, Tuple, Union, cast
  3. from core.application_queue_manager import ApplicationQueueManager, PublishFrom
  4. from core.entities.application_entities import (
  5. ApplicationGenerateEntity,
  6. AppOrchestrationConfigEntity,
  7. ExternalDataVariableEntity,
  8. InvokeFrom,
  9. ModelConfigEntity,
  10. PromptTemplateEntity,
  11. )
  12. from core.features.annotation_reply import AnnotationReplyFeature
  13. from core.features.external_data_fetch import ExternalDataFetchFeature
  14. from core.features.hosting_moderation import HostingModerationFeature
  15. from core.features.moderation import ModerationFeature
  16. from core.file.file_obj import FileObj
  17. from core.memory.token_buffer_memory import TokenBufferMemory
  18. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  19. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  20. from core.model_runtime.entities.model_entities import ModelPropertyKey
  21. from core.model_runtime.errors.invoke import InvokeBadRequestError
  22. from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
  23. from core.prompt.prompt_transform import PromptTransform
  24. from models.model import App, Message, MessageAnnotation
  25. class AppRunner:
  26. def get_pre_calculate_rest_tokens(self, app_record: App,
  27. model_config: ModelConfigEntity,
  28. prompt_template_entity: PromptTemplateEntity,
  29. inputs: dict[str, str],
  30. files: list[FileObj],
  31. query: Optional[str] = None) -> int:
  32. """
  33. Get pre calculate rest tokens
  34. :param app_record: app record
  35. :param model_config: model config entity
  36. :param prompt_template_entity: prompt template entity
  37. :param inputs: inputs
  38. :param files: files
  39. :param query: query
  40. :return:
  41. """
  42. model_type_instance = model_config.provider_model_bundle.model_type_instance
  43. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  44. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  45. max_tokens = 0
  46. for parameter_rule in model_config.model_schema.parameter_rules:
  47. if (parameter_rule.name == 'max_tokens'
  48. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  49. max_tokens = (model_config.parameters.get(parameter_rule.name)
  50. or model_config.parameters.get(parameter_rule.use_template)) or 0
  51. if model_context_tokens is None:
  52. return -1
  53. if max_tokens is None:
  54. max_tokens = 0
  55. # get prompt messages without memory and context
  56. prompt_messages, stop = self.organize_prompt_messages(
  57. app_record=app_record,
  58. model_config=model_config,
  59. prompt_template_entity=prompt_template_entity,
  60. inputs=inputs,
  61. files=files,
  62. query=query
  63. )
  64. prompt_tokens = model_type_instance.get_num_tokens(
  65. model_config.model,
  66. model_config.credentials,
  67. prompt_messages
  68. )
  69. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  70. if rest_tokens < 0:
  71. raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
  72. "or shrink the max token, or switch to a llm with a larger token limit size.")
  73. return rest_tokens
  74. def recale_llm_max_tokens(self, model_config: ModelConfigEntity,
  75. prompt_messages: List[PromptMessage]):
  76. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  77. model_type_instance = model_config.provider_model_bundle.model_type_instance
  78. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  79. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  80. max_tokens = 0
  81. for parameter_rule in model_config.model_schema.parameter_rules:
  82. if (parameter_rule.name == 'max_tokens'
  83. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  84. max_tokens = (model_config.parameters.get(parameter_rule.name)
  85. or model_config.parameters.get(parameter_rule.use_template)) or 0
  86. if model_context_tokens is None:
  87. return -1
  88. if max_tokens is None:
  89. max_tokens = 0
  90. prompt_tokens = model_type_instance.get_num_tokens(
  91. model_config.model,
  92. model_config.credentials,
  93. prompt_messages
  94. )
  95. if prompt_tokens + max_tokens > model_context_tokens:
  96. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  97. for parameter_rule in model_config.model_schema.parameter_rules:
  98. if (parameter_rule.name == 'max_tokens'
  99. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  100. model_config.parameters[parameter_rule.name] = max_tokens
  101. def organize_prompt_messages(self, app_record: App,
  102. model_config: ModelConfigEntity,
  103. prompt_template_entity: PromptTemplateEntity,
  104. inputs: dict[str, str],
  105. files: list[FileObj],
  106. query: Optional[str] = None,
  107. context: Optional[str] = None,
  108. memory: Optional[TokenBufferMemory] = None) \
  109. -> Tuple[List[PromptMessage], Optional[List[str]]]:
  110. """
  111. Organize prompt messages
  112. :param context:
  113. :param app_record: app record
  114. :param model_config: model config entity
  115. :param prompt_template_entity: prompt template entity
  116. :param inputs: inputs
  117. :param files: files
  118. :param query: query
  119. :param memory: memory
  120. :return:
  121. """
  122. prompt_transform = PromptTransform()
  123. # get prompt without memory and context
  124. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  125. prompt_messages, stop = prompt_transform.get_prompt(
  126. app_mode=app_record.mode,
  127. prompt_template_entity=prompt_template_entity,
  128. inputs=inputs,
  129. query=query if query else '',
  130. files=files,
  131. context=context,
  132. memory=memory,
  133. model_config=model_config
  134. )
  135. else:
  136. prompt_messages = prompt_transform.get_advanced_prompt(
  137. app_mode=app_record.mode,
  138. prompt_template_entity=prompt_template_entity,
  139. inputs=inputs,
  140. query=query,
  141. files=files,
  142. context=context,
  143. memory=memory,
  144. model_config=model_config
  145. )
  146. stop = model_config.stop
  147. return prompt_messages, stop
  148. def direct_output(self, queue_manager: ApplicationQueueManager,
  149. app_orchestration_config: AppOrchestrationConfigEntity,
  150. prompt_messages: list,
  151. text: str,
  152. stream: bool,
  153. usage: Optional[LLMUsage] = None) -> None:
  154. """
  155. Direct output
  156. :param queue_manager: application queue manager
  157. :param app_orchestration_config: app orchestration config
  158. :param prompt_messages: prompt messages
  159. :param text: text
  160. :param stream: stream
  161. :param usage: usage
  162. :return:
  163. """
  164. if stream:
  165. index = 0
  166. for token in text:
  167. queue_manager.publish_chunk_message(LLMResultChunk(
  168. model=app_orchestration_config.model_config.model,
  169. prompt_messages=prompt_messages,
  170. delta=LLMResultChunkDelta(
  171. index=index,
  172. message=AssistantPromptMessage(content=token)
  173. )
  174. ), PublishFrom.APPLICATION_MANAGER)
  175. index += 1
  176. time.sleep(0.01)
  177. queue_manager.publish_message_end(
  178. llm_result=LLMResult(
  179. model=app_orchestration_config.model_config.model,
  180. prompt_messages=prompt_messages,
  181. message=AssistantPromptMessage(content=text),
  182. usage=usage if usage else LLMUsage.empty_usage()
  183. ),
  184. pub_from=PublishFrom.APPLICATION_MANAGER
  185. )
  186. def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
  187. queue_manager: ApplicationQueueManager,
  188. stream: bool,
  189. agent: bool = False) -> None:
  190. """
  191. Handle invoke result
  192. :param invoke_result: invoke result
  193. :param queue_manager: application queue manager
  194. :param stream: stream
  195. :return:
  196. """
  197. if not stream:
  198. self._handle_invoke_result_direct(
  199. invoke_result=invoke_result,
  200. queue_manager=queue_manager,
  201. agent=agent
  202. )
  203. else:
  204. self._handle_invoke_result_stream(
  205. invoke_result=invoke_result,
  206. queue_manager=queue_manager,
  207. agent=agent
  208. )
  209. def _handle_invoke_result_direct(self, invoke_result: LLMResult,
  210. queue_manager: ApplicationQueueManager,
  211. agent: bool) -> None:
  212. """
  213. Handle invoke result direct
  214. :param invoke_result: invoke result
  215. :param queue_manager: application queue manager
  216. :return:
  217. """
  218. queue_manager.publish_message_end(
  219. llm_result=invoke_result,
  220. pub_from=PublishFrom.APPLICATION_MANAGER
  221. )
  222. def _handle_invoke_result_stream(self, invoke_result: Generator,
  223. queue_manager: ApplicationQueueManager,
  224. agent: bool) -> None:
  225. """
  226. Handle invoke result
  227. :param invoke_result: invoke result
  228. :param queue_manager: application queue manager
  229. :return:
  230. """
  231. model = None
  232. prompt_messages = []
  233. text = ''
  234. usage = None
  235. for result in invoke_result:
  236. if not agent:
  237. queue_manager.publish_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  238. else:
  239. queue_manager.publish_agent_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  240. text += result.delta.message.content
  241. if not model:
  242. model = result.model
  243. if not prompt_messages:
  244. prompt_messages = result.prompt_messages
  245. if not usage and result.delta.usage:
  246. usage = result.delta.usage
  247. if not usage:
  248. usage = LLMUsage.empty_usage()
  249. llm_result = LLMResult(
  250. model=model,
  251. prompt_messages=prompt_messages,
  252. message=AssistantPromptMessage(content=text),
  253. usage=usage
  254. )
  255. queue_manager.publish_message_end(
  256. llm_result=llm_result,
  257. pub_from=PublishFrom.APPLICATION_MANAGER
  258. )
  259. def moderation_for_inputs(self, app_id: str,
  260. tenant_id: str,
  261. app_orchestration_config_entity: AppOrchestrationConfigEntity,
  262. inputs: dict,
  263. query: str) -> Tuple[bool, dict, str]:
  264. """
  265. Process sensitive_word_avoidance.
  266. :param app_id: app id
  267. :param tenant_id: tenant id
  268. :param app_orchestration_config_entity: app orchestration config entity
  269. :param inputs: inputs
  270. :param query: query
  271. :return:
  272. """
  273. moderation_feature = ModerationFeature()
  274. return moderation_feature.check(
  275. app_id=app_id,
  276. tenant_id=tenant_id,
  277. app_orchestration_config_entity=app_orchestration_config_entity,
  278. inputs=inputs,
  279. query=query,
  280. )
  281. def check_hosting_moderation(self, application_generate_entity: ApplicationGenerateEntity,
  282. queue_manager: ApplicationQueueManager,
  283. prompt_messages: list[PromptMessage]) -> bool:
  284. """
  285. Check hosting moderation
  286. :param application_generate_entity: application generate entity
  287. :param queue_manager: queue manager
  288. :param prompt_messages: prompt messages
  289. :return:
  290. """
  291. hosting_moderation_feature = HostingModerationFeature()
  292. moderation_result = hosting_moderation_feature.check(
  293. application_generate_entity=application_generate_entity,
  294. prompt_messages=prompt_messages
  295. )
  296. if moderation_result:
  297. self.direct_output(
  298. queue_manager=queue_manager,
  299. app_orchestration_config=application_generate_entity.app_orchestration_config_entity,
  300. prompt_messages=prompt_messages,
  301. text="I apologize for any confusion, " \
  302. "but I'm an AI assistant to be helpful, harmless, and honest.",
  303. stream=application_generate_entity.stream
  304. )
  305. return moderation_result
  306. def fill_in_inputs_from_external_data_tools(self, tenant_id: str,
  307. app_id: str,
  308. external_data_tools: list[ExternalDataVariableEntity],
  309. inputs: dict,
  310. query: str) -> dict:
  311. """
  312. Fill in variable inputs from external data tools if exists.
  313. :param tenant_id: workspace id
  314. :param app_id: app id
  315. :param external_data_tools: external data tools configs
  316. :param inputs: the inputs
  317. :param query: the query
  318. :return: the filled inputs
  319. """
  320. external_data_fetch_feature = ExternalDataFetchFeature()
  321. return external_data_fetch_feature.fetch(
  322. tenant_id=tenant_id,
  323. app_id=app_id,
  324. external_data_tools=external_data_tools,
  325. inputs=inputs,
  326. query=query
  327. )
  328. def query_app_annotations_to_reply(self, app_record: App,
  329. message: Message,
  330. query: str,
  331. user_id: str,
  332. invoke_from: InvokeFrom) -> Optional[MessageAnnotation]:
  333. """
  334. Query app annotations to reply
  335. :param app_record: app record
  336. :param message: message
  337. :param query: query
  338. :param user_id: user id
  339. :param invoke_from: invoke from
  340. :return:
  341. """
  342. annotation_reply_feature = AnnotationReplyFeature()
  343. return annotation_reply_feature.query(
  344. app_record=app_record,
  345. message=message,
  346. query=query,
  347. user_id=user_id,
  348. invoke_from=invoke_from
  349. )