test_llm.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366
  1. import os
  2. from collections.abc import Generator
  3. import pytest
  4. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
  5. from core.model_runtime.entities.message_entities import (
  6. AssistantPromptMessage,
  7. PromptMessageTool,
  8. SystemPromptMessage,
  9. TextPromptMessageContent,
  10. UserPromptMessage,
  11. )
  12. from core.model_runtime.entities.model_entities import AIModelEntity
  13. from core.model_runtime.errors.validate import CredentialsValidateFailedError
  14. from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
  15. """FOR MOCK FIXTURES, DO NOT REMOVE"""
  16. from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
  17. from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
  18. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
  19. def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
  20. model = XinferenceAILargeLanguageModel()
  21. with pytest.raises(CredentialsValidateFailedError):
  22. model.validate_credentials(
  23. model="ChatGLM3",
  24. credentials={
  25. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  26. "model_uid": "www " + os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  27. },
  28. )
  29. with pytest.raises(CredentialsValidateFailedError):
  30. model.validate_credentials(model="aaaaa", credentials={"server_url": "", "model_uid": ""})
  31. model.validate_credentials(
  32. model="ChatGLM3",
  33. credentials={
  34. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  35. "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  36. },
  37. )
  38. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
  39. def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
  40. model = XinferenceAILargeLanguageModel()
  41. response = model.invoke(
  42. model="ChatGLM3",
  43. credentials={
  44. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  45. "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  46. },
  47. prompt_messages=[
  48. SystemPromptMessage(
  49. content="You are a helpful AI assistant.",
  50. ),
  51. UserPromptMessage(content="Hello World!"),
  52. ],
  53. model_parameters={
  54. "temperature": 0.7,
  55. "top_p": 1.0,
  56. },
  57. stop=["you"],
  58. user="abc-123",
  59. stream=False,
  60. )
  61. assert isinstance(response, LLMResult)
  62. assert len(response.message.content) > 0
  63. assert response.usage.total_tokens > 0
  64. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
  65. def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
  66. model = XinferenceAILargeLanguageModel()
  67. response = model.invoke(
  68. model="ChatGLM3",
  69. credentials={
  70. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  71. "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  72. },
  73. prompt_messages=[
  74. SystemPromptMessage(
  75. content="You are a helpful AI assistant.",
  76. ),
  77. UserPromptMessage(content="Hello World!"),
  78. ],
  79. model_parameters={
  80. "temperature": 0.7,
  81. "top_p": 1.0,
  82. },
  83. stop=["you"],
  84. stream=True,
  85. user="abc-123",
  86. )
  87. assert isinstance(response, Generator)
  88. for chunk in response:
  89. assert isinstance(chunk, LLMResultChunk)
  90. assert isinstance(chunk.delta, LLMResultChunkDelta)
  91. assert isinstance(chunk.delta.message, AssistantPromptMessage)
  92. assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  93. """
  94. Function calling of xinference does not support stream mode currently
  95. """
  96. # def test_invoke_stream_chat_model_with_functions():
  97. # model = XinferenceAILargeLanguageModel()
  98. # response = model.invoke(
  99. # model='ChatGLM3-6b',
  100. # credentials={
  101. # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  102. # 'model_type': 'text-generation',
  103. # 'model_name': 'ChatGLM3',
  104. # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  105. # },
  106. # prompt_messages=[
  107. # SystemPromptMessage(
  108. # content='你是一个天气机器人,可以通过调用函数来获取天气信息',
  109. # ),
  110. # UserPromptMessage(
  111. # content='波士顿天气如何?'
  112. # )
  113. # ],
  114. # model_parameters={
  115. # 'temperature': 0,
  116. # 'top_p': 1.0,
  117. # },
  118. # stop=['you'],
  119. # user='abc-123',
  120. # stream=True,
  121. # tools=[
  122. # PromptMessageTool(
  123. # name='get_current_weather',
  124. # description='Get the current weather in a given location',
  125. # parameters={
  126. # "type": "object",
  127. # "properties": {
  128. # "location": {
  129. # "type": "string",
  130. # "description": "The city and state e.g. San Francisco, CA"
  131. # },
  132. # "unit": {
  133. # "type": "string",
  134. # "enum": ["celsius", "fahrenheit"]
  135. # }
  136. # },
  137. # "required": [
  138. # "location"
  139. # ]
  140. # }
  141. # )
  142. # ]
  143. # )
  144. # assert isinstance(response, Generator)
  145. # call: LLMResultChunk = None
  146. # chunks = []
  147. # for chunk in response:
  148. # chunks.append(chunk)
  149. # assert isinstance(chunk, LLMResultChunk)
  150. # assert isinstance(chunk.delta, LLMResultChunkDelta)
  151. # assert isinstance(chunk.delta.message, AssistantPromptMessage)
  152. # assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  153. # if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
  154. # call = chunk
  155. # break
  156. # assert call is not None
  157. # assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
  158. # def test_invoke_chat_model_with_functions():
  159. # model = XinferenceAILargeLanguageModel()
  160. # response = model.invoke(
  161. # model='ChatGLM3-6b',
  162. # credentials={
  163. # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  164. # 'model_type': 'text-generation',
  165. # 'model_name': 'ChatGLM3',
  166. # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  167. # },
  168. # prompt_messages=[
  169. # UserPromptMessage(
  170. # content='What is the weather like in San Francisco?'
  171. # )
  172. # ],
  173. # model_parameters={
  174. # 'temperature': 0.7,
  175. # 'top_p': 1.0,
  176. # },
  177. # stop=['you'],
  178. # user='abc-123',
  179. # stream=False,
  180. # tools=[
  181. # PromptMessageTool(
  182. # name='get_current_weather',
  183. # description='Get the current weather in a given location',
  184. # parameters={
  185. # "type": "object",
  186. # "properties": {
  187. # "location": {
  188. # "type": "string",
  189. # "description": "The city and state e.g. San Francisco, CA"
  190. # },
  191. # "unit": {
  192. # "type": "string",
  193. # "enum": [
  194. # "c",
  195. # "f"
  196. # ]
  197. # }
  198. # },
  199. # "required": [
  200. # "location"
  201. # ]
  202. # }
  203. # )
  204. # ]
  205. # )
  206. # assert isinstance(response, LLMResult)
  207. # assert len(response.message.content) > 0
  208. # assert response.usage.total_tokens > 0
  209. # assert response.message.tool_calls[0].function.name == 'get_current_weather'
  210. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
  211. def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
  212. model = XinferenceAILargeLanguageModel()
  213. with pytest.raises(CredentialsValidateFailedError):
  214. model.validate_credentials(
  215. model="alapaca",
  216. credentials={
  217. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  218. "model_uid": "www " + os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  219. },
  220. )
  221. with pytest.raises(CredentialsValidateFailedError):
  222. model.validate_credentials(model="alapaca", credentials={"server_url": "", "model_uid": ""})
  223. model.validate_credentials(
  224. model="alapaca",
  225. credentials={
  226. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  227. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  228. },
  229. )
  230. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
  231. def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
  232. model = XinferenceAILargeLanguageModel()
  233. response = model.invoke(
  234. model="alapaca",
  235. credentials={
  236. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  237. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  238. },
  239. prompt_messages=[UserPromptMessage(content="the United States is")],
  240. model_parameters={
  241. "temperature": 0.7,
  242. "top_p": 1.0,
  243. },
  244. stop=["you"],
  245. user="abc-123",
  246. stream=False,
  247. )
  248. assert isinstance(response, LLMResult)
  249. assert len(response.message.content) > 0
  250. assert response.usage.total_tokens > 0
  251. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
  252. def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
  253. model = XinferenceAILargeLanguageModel()
  254. response = model.invoke(
  255. model="alapaca",
  256. credentials={
  257. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  258. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  259. },
  260. prompt_messages=[UserPromptMessage(content="the United States is")],
  261. model_parameters={
  262. "temperature": 0.7,
  263. "top_p": 1.0,
  264. },
  265. stop=["you"],
  266. stream=True,
  267. user="abc-123",
  268. )
  269. assert isinstance(response, Generator)
  270. for chunk in response:
  271. assert isinstance(chunk, LLMResultChunk)
  272. assert isinstance(chunk.delta, LLMResultChunkDelta)
  273. assert isinstance(chunk.delta.message, AssistantPromptMessage)
  274. assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  275. def test_get_num_tokens():
  276. model = XinferenceAILargeLanguageModel()
  277. num_tokens = model.get_num_tokens(
  278. model="ChatGLM3",
  279. credentials={
  280. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  281. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  282. },
  283. prompt_messages=[
  284. SystemPromptMessage(
  285. content="You are a helpful AI assistant.",
  286. ),
  287. UserPromptMessage(content="Hello World!"),
  288. ],
  289. tools=[
  290. PromptMessageTool(
  291. name="get_current_weather",
  292. description="Get the current weather in a given location",
  293. parameters={
  294. "type": "object",
  295. "properties": {
  296. "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
  297. "unit": {"type": "string", "enum": ["c", "f"]},
  298. },
  299. "required": ["location"],
  300. },
  301. )
  302. ],
  303. )
  304. assert isinstance(num_tokens, int)
  305. assert num_tokens == 77
  306. num_tokens = model.get_num_tokens(
  307. model="ChatGLM3",
  308. credentials={
  309. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  310. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  311. },
  312. prompt_messages=[
  313. SystemPromptMessage(
  314. content="You are a helpful AI assistant.",
  315. ),
  316. UserPromptMessage(content="Hello World!"),
  317. ],
  318. )
  319. assert isinstance(num_tokens, int)
  320. assert num_tokens == 21