xinference_provider.py 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204
  1. import json
  2. from typing import Type
  3. import requests
  4. from core.helper import encrypter
  5. from core.model_providers.models.embedding.xinference_embedding import XinferenceEmbedding
  6. from core.model_providers.models.entity.model_params import KwargRule, ModelKwargsRules, ModelType, ModelMode
  7. from core.model_providers.models.llm.xinference_model import XinferenceModel
  8. from core.model_providers.providers.base import BaseModelProvider, CredentialsValidateFailedError
  9. from core.model_providers.models.base import BaseProviderModel
  10. from core.third_party.langchain.embeddings.xinference_embedding import XinferenceEmbeddings
  11. from core.third_party.langchain.llms.xinference_llm import XinferenceLLM
  12. from models.provider import ProviderType
  13. class XinferenceProvider(BaseModelProvider):
  14. @property
  15. def provider_name(self):
  16. """
  17. Returns the name of a provider.
  18. """
  19. return 'xinference'
  20. def _get_fixed_model_list(self, model_type: ModelType) -> list[dict]:
  21. return []
  22. def _get_text_generation_model_mode(self, model_name) -> str:
  23. return ModelMode.COMPLETION.value
  24. def get_model_class(self, model_type: ModelType) -> Type[BaseProviderModel]:
  25. """
  26. Returns the model class.
  27. :param model_type:
  28. :return:
  29. """
  30. if model_type == ModelType.TEXT_GENERATION:
  31. model_class = XinferenceModel
  32. elif model_type == ModelType.EMBEDDINGS:
  33. model_class = XinferenceEmbedding
  34. else:
  35. raise NotImplementedError
  36. return model_class
  37. def get_model_parameter_rules(self, model_name: str, model_type: ModelType) -> ModelKwargsRules:
  38. """
  39. get model parameter rules.
  40. :param model_name:
  41. :param model_type:
  42. :return:
  43. """
  44. credentials = self.get_model_credentials(model_name, model_type)
  45. if credentials['model_format'] == "ggmlv3" and credentials["model_handle_type"] == "chatglm":
  46. return ModelKwargsRules(
  47. temperature=KwargRule[float](min=0.01, max=2, default=1, precision=2),
  48. top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
  49. presence_penalty=KwargRule[float](enabled=False),
  50. frequency_penalty=KwargRule[float](enabled=False),
  51. max_tokens=KwargRule[int](min=10, max=4000, default=256, precision=0),
  52. )
  53. elif credentials['model_format'] == "ggmlv3":
  54. return ModelKwargsRules(
  55. temperature=KwargRule[float](min=0.01, max=2, default=1, precision=2),
  56. top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
  57. presence_penalty=KwargRule[float](min=-2, max=2, default=0, precision=2),
  58. frequency_penalty=KwargRule[float](min=-2, max=2, default=0, precision=2),
  59. max_tokens=KwargRule[int](min=10, max=4000, default=256, precision=0),
  60. )
  61. else:
  62. return ModelKwargsRules(
  63. temperature=KwargRule[float](min=0.01, max=2, default=1, precision=2),
  64. top_p=KwargRule[float](min=0, max=1, default=0.7, precision=2),
  65. presence_penalty=KwargRule[float](enabled=False),
  66. frequency_penalty=KwargRule[float](enabled=False),
  67. max_tokens=KwargRule[int](min=10, max=4000, default=256, precision=0),
  68. )
  69. @classmethod
  70. def is_model_credentials_valid_or_raise(cls, model_name: str, model_type: ModelType, credentials: dict):
  71. """
  72. check model credentials valid.
  73. :param model_name:
  74. :param model_type:
  75. :param credentials:
  76. """
  77. if 'server_url' not in credentials:
  78. raise CredentialsValidateFailedError('Xinference Server URL must be provided.')
  79. if 'model_uid' not in credentials:
  80. raise CredentialsValidateFailedError('Xinference Model UID must be provided.')
  81. try:
  82. credential_kwargs = {
  83. 'server_url': credentials['server_url'],
  84. 'model_uid': credentials['model_uid'],
  85. }
  86. if model_type == ModelType.TEXT_GENERATION:
  87. llm = XinferenceLLM(
  88. **credential_kwargs
  89. )
  90. llm("ping")
  91. elif model_type == ModelType.EMBEDDINGS:
  92. embedding = XinferenceEmbeddings(
  93. **credential_kwargs
  94. )
  95. embedding.embed_query("ping")
  96. except Exception as ex:
  97. raise CredentialsValidateFailedError(str(ex))
  98. @classmethod
  99. def encrypt_model_credentials(cls, tenant_id: str, model_name: str, model_type: ModelType,
  100. credentials: dict) -> dict:
  101. """
  102. encrypt model credentials for save.
  103. :param tenant_id:
  104. :param model_name:
  105. :param model_type:
  106. :param credentials:
  107. :return:
  108. """
  109. if model_type == ModelType.TEXT_GENERATION:
  110. extra_credentials = cls._get_extra_credentials(credentials)
  111. credentials.update(extra_credentials)
  112. credentials['server_url'] = encrypter.encrypt_token(tenant_id, credentials['server_url'])
  113. return credentials
  114. def get_model_credentials(self, model_name: str, model_type: ModelType, obfuscated: bool = False) -> dict:
  115. """
  116. get credentials for llm use.
  117. :param model_name:
  118. :param model_type:
  119. :param obfuscated:
  120. :return:
  121. """
  122. if self.provider.provider_type != ProviderType.CUSTOM.value:
  123. raise NotImplementedError
  124. provider_model = self._get_provider_model(model_name, model_type)
  125. if not provider_model.encrypted_config:
  126. return {
  127. 'server_url': None,
  128. 'model_uid': None,
  129. }
  130. credentials = json.loads(provider_model.encrypted_config)
  131. if credentials['server_url']:
  132. credentials['server_url'] = encrypter.decrypt_token(
  133. self.provider.tenant_id,
  134. credentials['server_url']
  135. )
  136. if obfuscated:
  137. credentials['server_url'] = encrypter.obfuscated_token(credentials['server_url'])
  138. return credentials
  139. @classmethod
  140. def _get_extra_credentials(self, credentials: dict) -> dict:
  141. url = f"{credentials['server_url']}/v1/models/{credentials['model_uid']}"
  142. response = requests.get(url)
  143. if response.status_code != 200:
  144. raise RuntimeError(
  145. f"Failed to get the model description, detail: {response.json()['detail']}"
  146. )
  147. desc = response.json()
  148. extra_credentials = {
  149. 'model_format': desc['model_format'],
  150. }
  151. if desc["model_format"] == "ggmlv3" and "chatglm" in desc["model_name"]:
  152. extra_credentials['model_handle_type'] = 'chatglm'
  153. elif "generate" in desc["model_ability"]:
  154. extra_credentials['model_handle_type'] = 'generate'
  155. elif "chat" in desc["model_ability"]:
  156. extra_credentials['model_handle_type'] = 'chat'
  157. else:
  158. raise NotImplementedError(f"Model handle type not supported.")
  159. return extra_credentials
  160. @classmethod
  161. def is_provider_credentials_valid_or_raise(cls, credentials: dict):
  162. return
  163. @classmethod
  164. def encrypt_provider_credentials(cls, tenant_id: str, credentials: dict) -> dict:
  165. return {}
  166. def get_provider_credentials(self, obfuscated: bool = False) -> dict:
  167. return {}