xinference_provider.py 8.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212
  1. import json
  2. from typing import Type
  3. import requests
  4. from xinference_client.client.restful.restful_client import Client
  5. from core.helper import encrypter
  6. from core.model_providers.models.embedding.xinference_embedding import XinferenceEmbedding
  7. from core.model_providers.models.entity.model_params import KwargRule, ModelKwargsRules, ModelType, ModelMode
  8. from core.model_providers.models.llm.xinference_model import XinferenceModel
  9. from core.model_providers.models.reranking.xinference_reranking import XinferenceReranking
  10. from core.model_providers.providers.base import BaseModelProvider, CredentialsValidateFailedError
  11. from core.model_providers.models.base import BaseProviderModel
  12. from core.third_party.langchain.embeddings.xinference_embedding import XinferenceEmbeddings
  13. from core.third_party.langchain.llms.xinference_llm import XinferenceLLM
  14. from models.provider import ProviderType
  15. class XinferenceProvider(BaseModelProvider):
  16. @property
  17. def provider_name(self):
  18. """
  19. Returns the name of a provider.
  20. """
  21. return 'xinference'
  22. def _get_fixed_model_list(self, model_type: ModelType) -> list[dict]:
  23. return []
  24. def _get_text_generation_model_mode(self, model_name) -> str:
  25. return ModelMode.COMPLETION.value
  26. def get_model_class(self, model_type: ModelType) -> Type[BaseProviderModel]:
  27. """
  28. Returns the model class.
  29. :param model_type:
  30. :return:
  31. """
  32. if model_type == ModelType.TEXT_GENERATION:
  33. model_class = XinferenceModel
  34. elif model_type == ModelType.EMBEDDINGS:
  35. model_class = XinferenceEmbedding
  36. elif model_type == ModelType.RERANKING:
  37. model_class = XinferenceReranking
  38. else:
  39. raise NotImplementedError
  40. return model_class
  41. def get_model_parameter_rules(self, model_name: str, model_type: ModelType) -> ModelKwargsRules:
  42. """
  43. get model parameter rules.
  44. :param model_name:
  45. :param model_type:
  46. :return:
  47. """
  48. credentials = self.get_model_credentials(model_name, model_type)
  49. if credentials['model_format'] == "ggmlv3" and credentials["model_handle_type"] == "chatglm":
  50. return ModelKwargsRules(
  51. temperature=KwargRule[float](min=0.01, max=2, default=1, precision=2),
  52. top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
  53. presence_penalty=KwargRule[float](enabled=False),
  54. frequency_penalty=KwargRule[float](enabled=False),
  55. max_tokens=KwargRule[int](min=10, max=4000, default=256, precision=0),
  56. )
  57. elif credentials['model_format'] == "ggmlv3":
  58. return ModelKwargsRules(
  59. temperature=KwargRule[float](min=0.01, max=2, default=1, precision=2),
  60. top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
  61. presence_penalty=KwargRule[float](min=-2, max=2, default=0, precision=2),
  62. frequency_penalty=KwargRule[float](min=-2, max=2, default=0, precision=2),
  63. max_tokens=KwargRule[int](min=10, max=4000, default=256, precision=0),
  64. )
  65. else:
  66. return ModelKwargsRules(
  67. temperature=KwargRule[float](min=0.01, max=2, default=1, precision=2),
  68. top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
  69. presence_penalty=KwargRule[float](enabled=False),
  70. frequency_penalty=KwargRule[float](enabled=False),
  71. max_tokens=KwargRule[int](min=10, max=4000, default=256, precision=0),
  72. )
  73. @classmethod
  74. def is_model_credentials_valid_or_raise(cls, model_name: str, model_type: ModelType, credentials: dict):
  75. """
  76. check model credentials valid.
  77. :param model_name:
  78. :param model_type:
  79. :param credentials:
  80. """
  81. if 'server_url' not in credentials:
  82. raise CredentialsValidateFailedError('Xinference Server URL must be provided.')
  83. if 'model_uid' not in credentials:
  84. raise CredentialsValidateFailedError('Xinference Model UID must be provided.')
  85. try:
  86. credential_kwargs = {
  87. 'server_url': credentials['server_url'],
  88. 'model_uid': credentials['model_uid'],
  89. }
  90. if model_type == ModelType.TEXT_GENERATION:
  91. llm = XinferenceLLM(
  92. **credential_kwargs
  93. )
  94. llm("ping")
  95. elif model_type == ModelType.EMBEDDINGS:
  96. embedding = XinferenceEmbeddings(
  97. **credential_kwargs
  98. )
  99. embedding.embed_query("ping")
  100. elif model_type == ModelType.RERANKING:
  101. rerank_client = Client(credential_kwargs['server_url'])
  102. model = rerank_client.get_model(credential_kwargs['model_uid'])
  103. model.rerank(query="ping", documents=["ping", "pong"], top_n=2)
  104. except Exception as ex:
  105. raise CredentialsValidateFailedError(str(ex))
  106. @classmethod
  107. def encrypt_model_credentials(cls, tenant_id: str, model_name: str, model_type: ModelType,
  108. credentials: dict) -> dict:
  109. """
  110. encrypt model credentials for save.
  111. :param tenant_id:
  112. :param model_name:
  113. :param model_type:
  114. :param credentials:
  115. :return:
  116. """
  117. if model_type == ModelType.TEXT_GENERATION:
  118. extra_credentials = cls._get_extra_credentials(credentials)
  119. credentials.update(extra_credentials)
  120. credentials['server_url'] = encrypter.encrypt_token(tenant_id, credentials['server_url'])
  121. return credentials
  122. def get_model_credentials(self, model_name: str, model_type: ModelType, obfuscated: bool = False) -> dict:
  123. """
  124. get credentials for llm use.
  125. :param model_name:
  126. :param model_type:
  127. :param obfuscated:
  128. :return:
  129. """
  130. if self.provider.provider_type != ProviderType.CUSTOM.value:
  131. raise NotImplementedError
  132. provider_model = self._get_provider_model(model_name, model_type)
  133. if not provider_model.encrypted_config:
  134. return {
  135. 'server_url': None,
  136. 'model_uid': None,
  137. }
  138. credentials = json.loads(provider_model.encrypted_config)
  139. if credentials['server_url']:
  140. credentials['server_url'] = encrypter.decrypt_token(
  141. self.provider.tenant_id,
  142. credentials['server_url']
  143. )
  144. if obfuscated:
  145. credentials['server_url'] = encrypter.obfuscated_token(credentials['server_url'])
  146. return credentials
  147. @classmethod
  148. def _get_extra_credentials(self, credentials: dict) -> dict:
  149. url = f"{credentials['server_url']}/v1/models/{credentials['model_uid']}"
  150. response = requests.get(url)
  151. if response.status_code != 200:
  152. raise RuntimeError(
  153. f"Failed to get the model description, detail: {response.json()['detail']}"
  154. )
  155. desc = response.json()
  156. extra_credentials = {
  157. 'model_format': desc['model_format'],
  158. }
  159. if desc["model_format"] == "ggmlv3" and "chatglm" in desc["model_name"]:
  160. extra_credentials['model_handle_type'] = 'chatglm'
  161. elif "generate" in desc["model_ability"]:
  162. extra_credentials['model_handle_type'] = 'generate'
  163. elif "chat" in desc["model_ability"]:
  164. extra_credentials['model_handle_type'] = 'chat'
  165. else:
  166. raise NotImplementedError(f"Model handle type not supported.")
  167. return extra_credentials
  168. @classmethod
  169. def is_provider_credentials_valid_or_raise(cls, credentials: dict):
  170. return
  171. @classmethod
  172. def encrypt_provider_credentials(cls, tenant_id: str, credentials: dict) -> dict:
  173. return {}
  174. def get_provider_credentials(self, obfuscated: bool = False) -> dict:
  175. return {}