app_runner.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. import time
  2. from collections.abc import Generator
  3. from typing import Optional, Union, cast
  4. from core.application_queue_manager import ApplicationQueueManager, PublishFrom
  5. from core.entities.application_entities import (
  6. ApplicationGenerateEntity,
  7. AppOrchestrationConfigEntity,
  8. ExternalDataVariableEntity,
  9. InvokeFrom,
  10. ModelConfigEntity,
  11. PromptTemplateEntity,
  12. )
  13. from core.features.annotation_reply import AnnotationReplyFeature
  14. from core.features.external_data_fetch import ExternalDataFetchFeature
  15. from core.features.hosting_moderation import HostingModerationFeature
  16. from core.features.moderation import ModerationFeature
  17. from core.file.file_obj import FileObj
  18. from core.memory.token_buffer_memory import TokenBufferMemory
  19. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta, LLMUsage
  20. from core.model_runtime.entities.message_entities import AssistantPromptMessage, PromptMessage
  21. from core.model_runtime.entities.model_entities import ModelPropertyKey
  22. from core.model_runtime.errors.invoke import InvokeBadRequestError
  23. from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
  24. from core.prompt.prompt_transform import PromptTransform
  25. from models.model import App, Message, MessageAnnotation
  26. class AppRunner:
  27. def get_pre_calculate_rest_tokens(self, app_record: App,
  28. model_config: ModelConfigEntity,
  29. prompt_template_entity: PromptTemplateEntity,
  30. inputs: dict[str, str],
  31. files: list[FileObj],
  32. query: Optional[str] = None) -> int:
  33. """
  34. Get pre calculate rest tokens
  35. :param app_record: app record
  36. :param model_config: model config entity
  37. :param prompt_template_entity: prompt template entity
  38. :param inputs: inputs
  39. :param files: files
  40. :param query: query
  41. :return:
  42. """
  43. model_type_instance = model_config.provider_model_bundle.model_type_instance
  44. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  45. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  46. max_tokens = 0
  47. for parameter_rule in model_config.model_schema.parameter_rules:
  48. if (parameter_rule.name == 'max_tokens'
  49. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  50. max_tokens = (model_config.parameters.get(parameter_rule.name)
  51. or model_config.parameters.get(parameter_rule.use_template)) or 0
  52. if model_context_tokens is None:
  53. return -1
  54. if max_tokens is None:
  55. max_tokens = 0
  56. # get prompt messages without memory and context
  57. prompt_messages, stop = self.organize_prompt_messages(
  58. app_record=app_record,
  59. model_config=model_config,
  60. prompt_template_entity=prompt_template_entity,
  61. inputs=inputs,
  62. files=files,
  63. query=query
  64. )
  65. prompt_tokens = model_type_instance.get_num_tokens(
  66. model_config.model,
  67. model_config.credentials,
  68. prompt_messages
  69. )
  70. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  71. if rest_tokens < 0:
  72. raise InvokeBadRequestError("Query or prefix prompt is too long, you can reduce the prefix prompt, "
  73. "or shrink the max token, or switch to a llm with a larger token limit size.")
  74. return rest_tokens
  75. def recalc_llm_max_tokens(self, model_config: ModelConfigEntity,
  76. prompt_messages: list[PromptMessage]):
  77. # recalc max_tokens if sum(prompt_token + max_tokens) over model token limit
  78. model_type_instance = model_config.provider_model_bundle.model_type_instance
  79. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  80. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  81. max_tokens = 0
  82. for parameter_rule in model_config.model_schema.parameter_rules:
  83. if (parameter_rule.name == 'max_tokens'
  84. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  85. max_tokens = (model_config.parameters.get(parameter_rule.name)
  86. or model_config.parameters.get(parameter_rule.use_template)) or 0
  87. if model_context_tokens is None:
  88. return -1
  89. if max_tokens is None:
  90. max_tokens = 0
  91. prompt_tokens = model_type_instance.get_num_tokens(
  92. model_config.model,
  93. model_config.credentials,
  94. prompt_messages
  95. )
  96. if prompt_tokens + max_tokens > model_context_tokens:
  97. max_tokens = max(model_context_tokens - prompt_tokens, 16)
  98. for parameter_rule in model_config.model_schema.parameter_rules:
  99. if (parameter_rule.name == 'max_tokens'
  100. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  101. model_config.parameters[parameter_rule.name] = max_tokens
  102. def organize_prompt_messages(self, app_record: App,
  103. model_config: ModelConfigEntity,
  104. prompt_template_entity: PromptTemplateEntity,
  105. inputs: dict[str, str],
  106. files: list[FileObj],
  107. query: Optional[str] = None,
  108. context: Optional[str] = None,
  109. memory: Optional[TokenBufferMemory] = None) \
  110. -> tuple[list[PromptMessage], Optional[list[str]]]:
  111. """
  112. Organize prompt messages
  113. :param context:
  114. :param app_record: app record
  115. :param model_config: model config entity
  116. :param prompt_template_entity: prompt template entity
  117. :param inputs: inputs
  118. :param files: files
  119. :param query: query
  120. :param memory: memory
  121. :return:
  122. """
  123. prompt_transform = PromptTransform()
  124. # get prompt without memory and context
  125. if prompt_template_entity.prompt_type == PromptTemplateEntity.PromptType.SIMPLE:
  126. prompt_messages, stop = prompt_transform.get_prompt(
  127. app_mode=app_record.mode,
  128. prompt_template_entity=prompt_template_entity,
  129. inputs=inputs,
  130. query=query if query else '',
  131. files=files,
  132. context=context,
  133. memory=memory,
  134. model_config=model_config
  135. )
  136. else:
  137. prompt_messages = prompt_transform.get_advanced_prompt(
  138. app_mode=app_record.mode,
  139. prompt_template_entity=prompt_template_entity,
  140. inputs=inputs,
  141. query=query,
  142. files=files,
  143. context=context,
  144. memory=memory,
  145. model_config=model_config
  146. )
  147. stop = model_config.stop
  148. return prompt_messages, stop
  149. def direct_output(self, queue_manager: ApplicationQueueManager,
  150. app_orchestration_config: AppOrchestrationConfigEntity,
  151. prompt_messages: list,
  152. text: str,
  153. stream: bool,
  154. usage: Optional[LLMUsage] = None) -> None:
  155. """
  156. Direct output
  157. :param queue_manager: application queue manager
  158. :param app_orchestration_config: app orchestration config
  159. :param prompt_messages: prompt messages
  160. :param text: text
  161. :param stream: stream
  162. :param usage: usage
  163. :return:
  164. """
  165. if stream:
  166. index = 0
  167. for token in text:
  168. queue_manager.publish_chunk_message(LLMResultChunk(
  169. model=app_orchestration_config.model_config.model,
  170. prompt_messages=prompt_messages,
  171. delta=LLMResultChunkDelta(
  172. index=index,
  173. message=AssistantPromptMessage(content=token)
  174. )
  175. ), PublishFrom.APPLICATION_MANAGER)
  176. index += 1
  177. time.sleep(0.01)
  178. queue_manager.publish_message_end(
  179. llm_result=LLMResult(
  180. model=app_orchestration_config.model_config.model,
  181. prompt_messages=prompt_messages,
  182. message=AssistantPromptMessage(content=text),
  183. usage=usage if usage else LLMUsage.empty_usage()
  184. ),
  185. pub_from=PublishFrom.APPLICATION_MANAGER
  186. )
  187. def _handle_invoke_result(self, invoke_result: Union[LLMResult, Generator],
  188. queue_manager: ApplicationQueueManager,
  189. stream: bool,
  190. agent: bool = False) -> None:
  191. """
  192. Handle invoke result
  193. :param invoke_result: invoke result
  194. :param queue_manager: application queue manager
  195. :param stream: stream
  196. :return:
  197. """
  198. if not stream:
  199. self._handle_invoke_result_direct(
  200. invoke_result=invoke_result,
  201. queue_manager=queue_manager,
  202. agent=agent
  203. )
  204. else:
  205. self._handle_invoke_result_stream(
  206. invoke_result=invoke_result,
  207. queue_manager=queue_manager,
  208. agent=agent
  209. )
  210. def _handle_invoke_result_direct(self, invoke_result: LLMResult,
  211. queue_manager: ApplicationQueueManager,
  212. agent: bool) -> None:
  213. """
  214. Handle invoke result direct
  215. :param invoke_result: invoke result
  216. :param queue_manager: application queue manager
  217. :return:
  218. """
  219. queue_manager.publish_message_end(
  220. llm_result=invoke_result,
  221. pub_from=PublishFrom.APPLICATION_MANAGER
  222. )
  223. def _handle_invoke_result_stream(self, invoke_result: Generator,
  224. queue_manager: ApplicationQueueManager,
  225. agent: bool) -> None:
  226. """
  227. Handle invoke result
  228. :param invoke_result: invoke result
  229. :param queue_manager: application queue manager
  230. :return:
  231. """
  232. model = None
  233. prompt_messages = []
  234. text = ''
  235. usage = None
  236. for result in invoke_result:
  237. if not agent:
  238. queue_manager.publish_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  239. else:
  240. queue_manager.publish_agent_chunk_message(result, PublishFrom.APPLICATION_MANAGER)
  241. text += result.delta.message.content
  242. if not model:
  243. model = result.model
  244. if not prompt_messages:
  245. prompt_messages = result.prompt_messages
  246. if not usage and result.delta.usage:
  247. usage = result.delta.usage
  248. if not usage:
  249. usage = LLMUsage.empty_usage()
  250. llm_result = LLMResult(
  251. model=model,
  252. prompt_messages=prompt_messages,
  253. message=AssistantPromptMessage(content=text),
  254. usage=usage
  255. )
  256. queue_manager.publish_message_end(
  257. llm_result=llm_result,
  258. pub_from=PublishFrom.APPLICATION_MANAGER
  259. )
  260. def moderation_for_inputs(self, app_id: str,
  261. tenant_id: str,
  262. app_orchestration_config_entity: AppOrchestrationConfigEntity,
  263. inputs: dict,
  264. query: str) -> tuple[bool, dict, str]:
  265. """
  266. Process sensitive_word_avoidance.
  267. :param app_id: app id
  268. :param tenant_id: tenant id
  269. :param app_orchestration_config_entity: app orchestration config entity
  270. :param inputs: inputs
  271. :param query: query
  272. :return:
  273. """
  274. moderation_feature = ModerationFeature()
  275. return moderation_feature.check(
  276. app_id=app_id,
  277. tenant_id=tenant_id,
  278. app_orchestration_config_entity=app_orchestration_config_entity,
  279. inputs=inputs,
  280. query=query,
  281. )
  282. def check_hosting_moderation(self, application_generate_entity: ApplicationGenerateEntity,
  283. queue_manager: ApplicationQueueManager,
  284. prompt_messages: list[PromptMessage]) -> bool:
  285. """
  286. Check hosting moderation
  287. :param application_generate_entity: application generate entity
  288. :param queue_manager: queue manager
  289. :param prompt_messages: prompt messages
  290. :return:
  291. """
  292. hosting_moderation_feature = HostingModerationFeature()
  293. moderation_result = hosting_moderation_feature.check(
  294. application_generate_entity=application_generate_entity,
  295. prompt_messages=prompt_messages
  296. )
  297. if moderation_result:
  298. self.direct_output(
  299. queue_manager=queue_manager,
  300. app_orchestration_config=application_generate_entity.app_orchestration_config_entity,
  301. prompt_messages=prompt_messages,
  302. text="I apologize for any confusion, " \
  303. "but I'm an AI assistant to be helpful, harmless, and honest.",
  304. stream=application_generate_entity.stream
  305. )
  306. return moderation_result
  307. def fill_in_inputs_from_external_data_tools(self, tenant_id: str,
  308. app_id: str,
  309. external_data_tools: list[ExternalDataVariableEntity],
  310. inputs: dict,
  311. query: str) -> dict:
  312. """
  313. Fill in variable inputs from external data tools if exists.
  314. :param tenant_id: workspace id
  315. :param app_id: app id
  316. :param external_data_tools: external data tools configs
  317. :param inputs: the inputs
  318. :param query: the query
  319. :return: the filled inputs
  320. """
  321. external_data_fetch_feature = ExternalDataFetchFeature()
  322. return external_data_fetch_feature.fetch(
  323. tenant_id=tenant_id,
  324. app_id=app_id,
  325. external_data_tools=external_data_tools,
  326. inputs=inputs,
  327. query=query
  328. )
  329. def query_app_annotations_to_reply(self, app_record: App,
  330. message: Message,
  331. query: str,
  332. user_id: str,
  333. invoke_from: InvokeFrom) -> Optional[MessageAnnotation]:
  334. """
  335. Query app annotations to reply
  336. :param app_record: app record
  337. :param message: message
  338. :param query: query
  339. :param user_id: user id
  340. :param invoke_from: invoke from
  341. :return:
  342. """
  343. annotation_reply_feature = AnnotationReplyFeature()
  344. return annotation_reply_feature.query(
  345. app_record=app_record,
  346. message=message,
  347. query=query,
  348. user_id=user_id,
  349. invoke_from=invoke_from
  350. )