| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397 | import osfrom collections.abc import Generatorimport pytestfrom core.model_runtime.entities.llm_entities import LLMResult, LLMResultChunk, LLMResultChunkDeltafrom core.model_runtime.entities.message_entities import (    AssistantPromptMessage,    PromptMessageTool,    SystemPromptMessage,    TextPromptMessageContent,    UserPromptMessage,)from core.model_runtime.entities.model_entities import AIModelEntityfrom core.model_runtime.errors.validate import CredentialsValidateFailedErrorfrom core.model_runtime.model_providers.xinference.llm.llm import XinferenceAILargeLanguageModel"""FOR MOCK FIXTURES, DO NOT REMOVE"""from tests.integration_tests.model_runtime.__mock.openai import setup_openai_mockfrom tests.integration_tests.model_runtime.__mock.xinference import setup_xinference_mock@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)def test_validate_credentials_for_chat_model(setup_openai_mock, setup_xinference_mock):    model = XinferenceAILargeLanguageModel()    with pytest.raises(CredentialsValidateFailedError):        model.validate_credentials(            model='ChatGLM3',            credentials={                'server_url': os.environ.get('XINFERENCE_SERVER_URL'),                'model_uid': 'www ' + os.environ.get('XINFERENCE_CHAT_MODEL_UID')            }        )    with pytest.raises(CredentialsValidateFailedError):        model.validate_credentials(            model='aaaaa',            credentials={                'server_url': '',                'model_uid': ''            }        )    model.validate_credentials(        model='ChatGLM3',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')        }    )@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)def test_invoke_chat_model(setup_openai_mock, setup_xinference_mock):    model = XinferenceAILargeLanguageModel()    response = model.invoke(        model='ChatGLM3',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')        },        prompt_messages=[            SystemPromptMessage(                content='You are a helpful AI assistant.',            ),            UserPromptMessage(                content='Hello World!'            )        ],        model_parameters={            'temperature': 0.7,            'top_p': 1.0,        },        stop=['you'],        user="abc-123",        stream=False    )    assert isinstance(response, LLMResult)    assert len(response.message.content) > 0    assert response.usage.total_tokens > 0@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['chat', 'none']], indirect=True)def test_invoke_stream_chat_model(setup_openai_mock, setup_xinference_mock):    model = XinferenceAILargeLanguageModel()    response = model.invoke(        model='ChatGLM3',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')        },        prompt_messages=[            SystemPromptMessage(                content='You are a helpful AI assistant.',            ),            UserPromptMessage(                content='Hello World!'            )        ],        model_parameters={            'temperature': 0.7,            'top_p': 1.0,        },        stop=['you'],        stream=True,        user="abc-123"    )    assert isinstance(response, Generator)    for chunk in response:        assert isinstance(chunk, LLMResultChunk)        assert isinstance(chunk.delta, LLMResultChunkDelta)        assert isinstance(chunk.delta.message, AssistantPromptMessage)        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True"""    Funtion calling of xinference does not support stream mode currently"""# def test_invoke_stream_chat_model_with_functions():#     model = XinferenceAILargeLanguageModel()#     response = model.invoke(#         model='ChatGLM3-6b',#         credentials={#             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),#             'model_type': 'text-generation',#             'model_name': 'ChatGLM3',#             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')#         },#         prompt_messages=[#             SystemPromptMessage(#                 content='你是一个天气机器人,可以通过调用函数来获取天气信息',#             ),#             UserPromptMessage(#                 content='波士顿天气如何?'#             )#         ],#         model_parameters={#             'temperature': 0,#             'top_p': 1.0,#         },#         stop=['you'],#         user='abc-123',#         stream=True,#         tools=[#             PromptMessageTool(#                 name='get_current_weather',#                 description='Get the current weather in a given location',#                 parameters={#                     "type": "object",#                     "properties": {#                         "location": {#                         "type": "string",#                             "description": "The city and state e.g. San Francisco, CA"#                         },#                         "unit": {#                             "type": "string",#                             "enum": ["celsius", "fahrenheit"]#                         }#                     },#                     "required": [#                         "location"#                     ]#                 }#             )#         ]#     )#     assert isinstance(response, Generator)    #     call: LLMResultChunk = None#     chunks = []#     for chunk in response:#         chunks.append(chunk)#         assert isinstance(chunk, LLMResultChunk)#         assert isinstance(chunk.delta, LLMResultChunkDelta)#         assert isinstance(chunk.delta.message, AssistantPromptMessage)#         assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else True#         if chunk.delta.message.tool_calls and len(chunk.delta.message.tool_calls) > 0:#             call = chunk#             break#     assert call is not None#     assert call.delta.message.tool_calls[0].function.name == 'get_current_weather'# def test_invoke_chat_model_with_functions():#     model = XinferenceAILargeLanguageModel()#     response = model.invoke(#         model='ChatGLM3-6b',#         credentials={#             'server_url': os.environ.get('XINFERENCE_SERVER_URL'),#             'model_type': 'text-generation',#             'model_name': 'ChatGLM3',#             'model_uid': os.environ.get('XINFERENCE_CHAT_MODEL_UID')#         },#         prompt_messages=[#             UserPromptMessage(#                 content='What is the weather like in San Francisco?'#             )#         ],#         model_parameters={#             'temperature': 0.7,#             'top_p': 1.0,#         },#         stop=['you'],#         user='abc-123',#         stream=False,#         tools=[#             PromptMessageTool(#                 name='get_current_weather',#                 description='Get the current weather in a given location',#                 parameters={#                     "type": "object",#                     "properties": {#                         "location": {#                         "type": "string",#                             "description": "The city and state e.g. San Francisco, CA"#                         },#                         "unit": {#                             "type": "string",#                             "enum": [#                                 "c",#                                 "f"#                             ]#                         }#                     },#                     "required": [#                         "location"#                     ]#                 }#             )#         ]#     )#     assert isinstance(response, LLMResult)#     assert len(response.message.content) > 0#     assert response.usage.total_tokens > 0#     assert response.message.tool_calls[0].function.name == 'get_current_weather'@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)def test_validate_credentials_for_generation_model(setup_openai_mock, setup_xinference_mock):    model = XinferenceAILargeLanguageModel()    with pytest.raises(CredentialsValidateFailedError):        model.validate_credentials(            model='alapaca',            credentials={                'server_url': os.environ.get('XINFERENCE_SERVER_URL'),                'model_uid': 'www ' + os.environ.get('XINFERENCE_GENERATION_MODEL_UID')            }        )    with pytest.raises(CredentialsValidateFailedError):        model.validate_credentials(            model='alapaca',            credentials={                'server_url': '',                'model_uid': ''            }        )    model.validate_credentials(        model='alapaca',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')        }    )@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)def test_invoke_generation_model(setup_openai_mock, setup_xinference_mock):    model = XinferenceAILargeLanguageModel()    response = model.invoke(        model='alapaca',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')        },        prompt_messages=[            UserPromptMessage(                content='the United States is'            )        ],        model_parameters={            'temperature': 0.7,            'top_p': 1.0,        },        stop=['you'],        user="abc-123",        stream=False    )    assert isinstance(response, LLMResult)    assert len(response.message.content) > 0    assert response.usage.total_tokens > 0@pytest.mark.parametrize('setup_openai_mock, setup_xinference_mock', [['completion', 'none']], indirect=True)def test_invoke_stream_generation_model(setup_openai_mock, setup_xinference_mock):    model = XinferenceAILargeLanguageModel()    response = model.invoke(        model='alapaca',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')        },        prompt_messages=[            UserPromptMessage(                content='the United States is'            )        ],        model_parameters={            'temperature': 0.7,            'top_p': 1.0,        },        stop=['you'],        stream=True,        user="abc-123"    )    assert isinstance(response, Generator)    for chunk in response:        assert isinstance(chunk, LLMResultChunk)        assert isinstance(chunk.delta, LLMResultChunkDelta)        assert isinstance(chunk.delta.message, AssistantPromptMessage)        assert len(chunk.delta.message.content) > 0 if chunk.delta.finish_reason is None else Truedef test_get_num_tokens():    model = XinferenceAILargeLanguageModel()    num_tokens = model.get_num_tokens(        model='ChatGLM3',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')        },        prompt_messages=[            SystemPromptMessage(                content='You are a helpful AI assistant.',            ),            UserPromptMessage(                content='Hello World!'            )        ],        tools=[            PromptMessageTool(                name='get_current_weather',                description='Get the current weather in a given location',                parameters={                    "type": "object",                    "properties": {                        "location": {                        "type": "string",                            "description": "The city and state e.g. San Francisco, CA"                        },                        "unit": {                            "type": "string",                            "enum": [                                "c",                                "f"                            ]                        }                    },                    "required": [                        "location"                    ]                }            )        ]    )    assert isinstance(num_tokens, int)    assert num_tokens == 77    num_tokens = model.get_num_tokens(        model='ChatGLM3',        credentials={            'server_url': os.environ.get('XINFERENCE_SERVER_URL'),            'model_uid': os.environ.get('XINFERENCE_GENERATION_MODEL_UID')        },        prompt_messages=[            SystemPromptMessage(                content='You are a helpful AI assistant.',            ),            UserPromptMessage(                content='Hello World!'            )        ],    )    assert isinstance(num_tokens, int)    assert num_tokens == 21
 |