123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366 |
- import os
- from collections.abc import Generator
- import pytest
- from core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDelta
- from core.model_runtime.entities.message_entities import (
- AssistantPromptMessage,
- PromptMessageTool,
- SystemPromptMessage,
- TextPromptMessageContent,
- UserPromptMessage,
- )
- from core.model_runtime.entities.model_entities import AIModelEntity
- from core.model_runtime.errors.validate import CredentialsValidateFailedError
- from core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel
- """FOR MOCK FIXTURES, DO NOT REMOVE"""
- from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mock
- from tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock
- @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
- def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):
- model = XinferenceAILargeLanguageModel()
- with pytest.raises(CredentialsValidateFailedError):
- model.validate_credentials(
- model="ChatGLM3",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": "www " + os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
- },
- )
- with pytest.raises(CredentialsValidateFailedError):
- model.validate_credentials(model="aaaaa", credentials={"server_url": "", "model_uid": ""})
- model.validate_credentials(
- model="ChatGLM3",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
- },
- )
- @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
- def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):
- model = XinferenceAILargeLanguageModel()
- response = model.invoke(
- model="ChatGLM3",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
- },
- prompt_messages=[
- SystemPromptMessage(
- content="You are a helpful AI assistant.",
- ),
- UserPromptMessage(content="Hello World!"),
- ],
- model_parameters={
- "temperature": 0.7,
- "top_p": 1.0,
- },
- stop=["you"],
- user="abc-123",
- stream=False,
- )
- assert isinstance(response, LLMResult)
- assert len(response.message.content) > 0
- assert response.usage.total_tokens > 0
- @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("chat", "none")], indirect=True)
- def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):
- model = XinferenceAILargeLanguageModel()
- response = model.invoke(
- model="ChatGLM3",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_CHAT_MODEL_UID"),
- },
- prompt_messages=[
- SystemPromptMessage(
- content="You are a helpful AI assistant.",
- ),
- UserPromptMessage(content="Hello World!"),
- ],
- model_parameters={
- "temperature": 0.7,
- "top_p": 1.0,
- },
- stop=["you"],
- stream=True,
- user="abc-123",
- )
- assert isinstance(response, Generator)
- for chunk in response:
- assert isinstance(chunk, LLMResultChunk)
- assert isinstance(chunk.delta, LLMResultChunkDelta)
- assert isinstance(chunk.delta.message, AssistantPromptMessage)
- assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
- """
- Function calling of xinference does not support stream mode currently
- """
- # def test_invoke_stream_chat_model_with_functions():
- # model = XinferenceAILargeLanguageModel()
- # response = model.invoke(
- # model='ChatGLM3-6b',
- # credentials={
- # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
- # 'model_type': 'text-generation',
- # 'model_name': 'ChatGLM3',
- # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
- # },
- # prompt_messages=[
- # SystemPromptMessage(
- # content='你是一个天气机器人,可以通过调用函数来获取天气信息',
- # ),
- # UserPromptMessage(
- # content='波士顿天气如何?'
- # )
- # ],
- # model_parameters={
- # 'temperature': 0,
- # 'top_p': 1.0,
- # },
- # stop=['you'],
- # user='abc-123',
- # stream=True,
- # tools=[
- # PromptMessageTool(
- # name='get_current_weather',
- # description='Get the current weather in a given location',
- # parameters={
- # "type": "object",
- # "properties": {
- # "location": {
- # "type": "string",
- # "description": "The city and state e.g. San Francisco, CA"
- # },
- # "unit": {
- # "type": "string",
- # "enum": ["celsius", "fahrenheit"]
- # }
- # },
- # "required": [
- # "location"
- # ]
- # }
- # )
- # ]
- # )
- # assert isinstance(response, Generator)
- # call: LLMResultChunk = None
- # chunks = []
- # for chunk in response:
- # chunks.append(chunk)
- # assert isinstance(chunk, LLMResultChunk)
- # assert isinstance(chunk.delta, LLMResultChunkDelta)
- # assert isinstance(chunk.delta.message, AssistantPromptMessage)
- # assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
- # if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:
- # call = chunk
- # break
- # assert call is not None
- # assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'
- # def test_invoke_chat_model_with_functions():
- # model = XinferenceAILargeLanguageModel()
- # response = model.invoke(
- # model='ChatGLM3-6b',
- # credentials={
- # 'server_url': os.environ.get('XINFERENCE_SERVER_URL'),
- # 'model_type': 'text-generation',
- # 'model_name': 'ChatGLM3',
- # 'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')
- # },
- # prompt_messages=[
- # UserPromptMessage(
- # content='What is the weather like in San Francisco?'
- # )
- # ],
- # model_parameters={
- # 'temperature': 0.7,
- # 'top_p': 1.0,
- # },
- # stop=['you'],
- # user='abc-123',
- # stream=False,
- # tools=[
- # PromptMessageTool(
- # name='get_current_weather',
- # description='Get the current weather in a given location',
- # parameters={
- # "type": "object",
- # "properties": {
- # "location": {
- # "type": "string",
- # "description": "The city and state e.g. San Francisco, CA"
- # },
- # "unit": {
- # "type": "string",
- # "enum": [
- # "c",
- # "f"
- # ]
- # }
- # },
- # "required": [
- # "location"
- # ]
- # }
- # )
- # ]
- # )
- # assert isinstance(response, LLMResult)
- # assert len(response.message.content) > 0
- # assert response.usage.total_tokens > 0
- # assert response.message.tool_calls[0].function.name == 'get_current_weather'
- @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
- def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):
- model = XinferenceAILargeLanguageModel()
- with pytest.raises(CredentialsValidateFailedError):
- model.validate_credentials(
- model="alapaca",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": "www " + os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
- },
- )
- with pytest.raises(CredentialsValidateFailedError):
- model.validate_credentials(model="alapaca", credentials={"server_url": "", "model_uid": ""})
- model.validate_credentials(
- model="alapaca",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
- },
- )
- @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
- def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):
- model = XinferenceAILargeLanguageModel()
- response = model.invoke(
- model="alapaca",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
- },
- prompt_messages=[UserPromptMessage(content="the United States is")],
- model_parameters={
- "temperature": 0.7,
- "top_p": 1.0,
- },
- stop=["you"],
- user="abc-123",
- stream=False,
- )
- assert isinstance(response, LLMResult)
- assert len(response.message.content) > 0
- assert response.usage.total_tokens > 0
- @pytest.mark.parametrize(("setup_openai_mock", "setup_xinference_mock"), [("completion", "none")], indirect=True)
- def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):
- model = XinferenceAILargeLanguageModel()
- response = model.invoke(
- model="alapaca",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
- },
- prompt_messages=[UserPromptMessage(content="the United States is")],
- model_parameters={
- "temperature": 0.7,
- "top_p": 1.0,
- },
- stop=["you"],
- stream=True,
- user="abc-123",
- )
- assert isinstance(response, Generator)
- for chunk in response:
- assert isinstance(chunk, LLMResultChunk)
- assert isinstance(chunk.delta, LLMResultChunkDelta)
- assert isinstance(chunk.delta.message, AssistantPromptMessage)
- assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True
- def test_get_num_tokens():
- model = XinferenceAILargeLanguageModel()
- num_tokens = model.get_num_tokens(
- model="ChatGLM3",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
- },
- prompt_messages=[
- SystemPromptMessage(
- content="You are a helpful AI assistant.",
- ),
- UserPromptMessage(content="Hello World!"),
- ],
- tools=[
- PromptMessageTool(
- name="get_current_weather",
- description="Get the current weather in a given location",
- parameters={
- "type": "object",
- "properties": {
- "location": {"type": "string", "description": "The city and state e.g. San Francisco, CA"},
- "unit": {"type": "string", "enum": ["c", "f"]},
- },
- "required": ["location"],
- },
- )
- ],
- )
- assert isinstance(num_tokens, int)
- assert num_tokens == 77
- num_tokens = model.get_num_tokens(
- model="ChatGLM3",
- credentials={
- "server_url": os.environ.get("XINFERENCE_SERVER_URL"),
- "model_uid": os.environ.get("XINFERENCE_GENERATION_MODEL_UID"),
- },
- prompt_messages=[
- SystemPromptMessage(
- content="You are a helpful AI assistant.",
- ),
- UserPromptMessage(content="Hello World!"),
- ],
- )
- assert isinstance(num_tokens, int)
- assert num_tokens == 21
|