test_llm.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. import os
  2. from collections.abc import Generator
  3. import pytest
  4. from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
  5. from core.model_runtime.entities.message_entities import (
  6. AssistantPromptMessage,
  7. PromptMessageTool,
  8. SystemPromptMessage,
  9. UserPromptMessage,
  10. )
  11. from core.model_runtime.errors.validate import CredentialsValidateFailedError
  12. from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
  13. """FOR MOCK FIXTURES, DO NOT REMOVE"""
  14. from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
  15. from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
  16. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
  17. def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
  18. model = XinferenceAILargeLanguageModel()
  19. with pytest.raises(CredentialsValidateFailedError):
  20. model.validate_credentials(
  21. model="ChatGLM3",
  22. credentials={
  23. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  24. "model_uid": "www " + os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  25. },
  26. )
  27. with pytest.raises(CredentialsValidateFailedError):
  28. model.validate_credentials(model="aaaaa", credentials={"server_url": "", "model_uid": ""})
  29. model.validate_credentials(
  30. model="ChatGLM3",
  31. credentials={
  32. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  33. "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  34. },
  35. )
  36. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
  37. def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
  38. model = XinferenceAILargeLanguageModel()
  39. response = model.invoke(
  40. model="ChatGLM3",
  41. credentials={
  42. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  43. "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  44. },
  45. prompt_messages=[
  46. SystemPromptMessage(
  47. content="You are a helpful AI assistant.",
  48. ),
  49. UserPromptMessage(content="Hello World!"),
  50. ],
  51. model_parameters={
  52. "temperature": 0.7,
  53. "top_p": 1.0,
  54. },
  55. stop=["you"],
  56. user="abc-123",
  57. stream=False,
  58. )
  59. assert isinstance(response, LLMResult)
  60. assert len(response.message.content) > 0
  61. assert response.usage.total_tokens > 0
  62. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
  63. def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
  64. model = XinferenceAILargeLanguageModel()
  65. response = model.invoke(
  66. model="ChatGLM3",
  67. credentials={
  68. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  69. "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
  70. },
  71. prompt_messages=[
  72. SystemPromptMessage(
  73. content="You are a helpful AI assistant.",
  74. ),
  75. UserPromptMessage(content="Hello World!"),
  76. ],
  77. model_parameters={
  78. "temperature": 0.7,
  79. "top_p": 1.0,
  80. },
  81. stop=["you"],
  82. stream=True,
  83. user="abc-123",
  84. )
  85. assert isinstance(response, Generator)
  86. for chunk in response:
  87. assert isinstance(chunk, LLMResultChunk)
  88. assert isinstance(chunk.delta, LLMResultChunkDelta)
  89. assert isinstance(chunk.delta.message, AssistantPromptMessage)
  90. assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  91. """
  92. Function calling of xinference does not support stream mode currently
  93. """
  94. # def test_invoke_stream_chat_model_with_functions():
  95. # model = XinferenceAILargeLanguageModel()
  96. # response = model.invoke(
  97. # model='ChatGLM3-6b',
  98. # credentials={
  99. # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  100. # 'model_type': 'text-generation',
  101. # 'model_name': 'ChatGLM3',
  102. # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  103. # },
  104. # prompt_messages=[
  105. # SystemPromptMessage(
  106. # content='你是一个天气机器人,可以通过调用函数来获取天气信息',
  107. # ),
  108. # UserPromptMessage(
  109. # content='波士顿天气如何?'
  110. # )
  111. # ],
  112. # model_parameters={
  113. # 'temperature': 0,
  114. # 'top_p': 1.0,
  115. # },
  116. # stop=['you'],
  117. # user='abc-123',
  118. # stream=True,
  119. # tools=[
  120. # PromptMessageTool(
  121. # name='get_current_weather',
  122. # description='Get the current weather in a given location',
  123. # parameters={
  124. # "type": "object",
  125. # "properties": {
  126. # "location": {
  127. # "type": "string",
  128. # "description": "The city and state e.g. San Francisco, CA"
  129. # },
  130. # "unit": {
  131. # "type": "string",
  132. # "enum": ["celsius", "fahrenheit"]
  133. # }
  134. # },
  135. # "required": [
  136. # "location"
  137. # ]
  138. # }
  139. # )
  140. # ]
  141. # )
  142. # assert isinstance(response, Generator)
  143. # call: LLMResultChunk = None
  144. # chunks = []
  145. # for chunk in response:
  146. # chunks.append(chunk)
  147. # assert isinstance(chunk, LLMResultChunk)
  148. # assert isinstance(chunk.delta, LLMResultChunkDelta)
  149. # assert isinstance(chunk.delta.message, AssistantPromptMessage)
  150. # assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  151. # if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
  152. # call = chunk
  153. # break
  154. # assert call is not None
  155. # assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
  156. # def test_invoke_chat_model_with_functions():
  157. # model = XinferenceAILargeLanguageModel()
  158. # response = model.invoke(
  159. # model='ChatGLM3-6b',
  160. # credentials={
  161. # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
  162. # 'model_type': 'text-generation',
  163. # 'model_name': 'ChatGLM3',
  164. # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
  165. # },
  166. # prompt_messages=[
  167. # UserPromptMessage(
  168. # content='What is the weather like in San Francisco?'
  169. # )
  170. # ],
  171. # model_parameters={
  172. # 'temperature': 0.7,
  173. # 'top_p': 1.0,
  174. # },
  175. # stop=['you'],
  176. # user='abc-123',
  177. # stream=False,
  178. # tools=[
  179. # PromptMessageTool(
  180. # name='get_current_weather',
  181. # description='Get the current weather in a given location',
  182. # parameters={
  183. # "type": "object",
  184. # "properties": {
  185. # "location": {
  186. # "type": "string",
  187. # "description": "The city and state e.g. San Francisco, CA"
  188. # },
  189. # "unit": {
  190. # "type": "string",
  191. # "enum": [
  192. # "c",
  193. # "f"
  194. # ]
  195. # }
  196. # },
  197. # "required": [
  198. # "location"
  199. # ]
  200. # }
  201. # )
  202. # ]
  203. # )
  204. # assert isinstance(response, LLMResult)
  205. # assert len(response.message.content) > 0
  206. # assert response.usage.total_tokens > 0
  207. # assert response.message.tool_calls[0].function.name == 'get_current_weather'
  208. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
  209. def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
  210. model = XinferenceAILargeLanguageModel()
  211. with pytest.raises(CredentialsValidateFailedError):
  212. model.validate_credentials(
  213. model="alapaca",
  214. credentials={
  215. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  216. "model_uid": "www " + os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  217. },
  218. )
  219. with pytest.raises(CredentialsValidateFailedError):
  220. model.validate_credentials(model="alapaca", credentials={"server_url": "", "model_uid": ""})
  221. model.validate_credentials(
  222. model="alapaca",
  223. credentials={
  224. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  225. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  226. },
  227. )
  228. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
  229. def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
  230. model = XinferenceAILargeLanguageModel()
  231. response = model.invoke(
  232. model="alapaca",
  233. credentials={
  234. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  235. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  236. },
  237. prompt_messages=[UserPromptMessage(content="the United States is")],
  238. model_parameters={
  239. "temperature": 0.7,
  240. "top_p": 1.0,
  241. },
  242. stop=["you"],
  243. user="abc-123",
  244. stream=False,
  245. )
  246. assert isinstance(response, LLMResult)
  247. assert len(response.message.content) > 0
  248. assert response.usage.total_tokens > 0
  249. @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
  250. def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
  251. model = XinferenceAILargeLanguageModel()
  252. response = model.invoke(
  253. model="alapaca",
  254. credentials={
  255. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  256. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  257. },
  258. prompt_messages=[UserPromptMessage(content="the United States is")],
  259. model_parameters={
  260. "temperature": 0.7,
  261. "top_p": 1.0,
  262. },
  263. stop=["you"],
  264. stream=True,
  265. user="abc-123",
  266. )
  267. assert isinstance(response, Generator)
  268. for chunk in response:
  269. assert isinstance(chunk, LLMResultChunk)
  270. assert isinstance(chunk.delta, LLMResultChunkDelta)
  271. assert isinstance(chunk.delta.message, AssistantPromptMessage)
  272. assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
  273. def test_get_num_tokens():
  274. model = XinferenceAILargeLanguageModel()
  275. num_tokens = model.get_num_tokens(
  276. model="ChatGLM3",
  277. credentials={
  278. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  279. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  280. },
  281. prompt_messages=[
  282. SystemPromptMessage(
  283. content="You are a helpful AI assistant.",
  284. ),
  285. UserPromptMessage(content="Hello World!"),
  286. ],
  287. tools=[
  288. PromptMessageTool(
  289. name="get_current_weather",
  290. description="Get the current weather in a given location",
  291. parameters={
  292. "type": "object",
  293. "properties": {
  294. "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
  295. "unit": {"type": "string", "enum": ["c", "f"]},
  296. },
  297. "required": ["location"],
  298. },
  299. )
  300. ],
  301. )
  302. assert isinstance(num_tokens, int)
  303. assert num_tokens == 77
  304. num_tokens = model.get_num_tokens(
  305. model="ChatGLM3",
  306. credentials={
  307. "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
  308. "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
  309. },
  310. prompt_messages=[
  311. SystemPromptMessage(
  312. content="You are a helpful AI assistant.",
  313. ),
  314. UserPromptMessage(content="Hello World!"),
  315. ],
  316. )
  317. assert isinstance(num_tokens, int)
  318. assert num_tokens == 21