calc_token_mixin.py 1.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849
  1. from typing import cast
  2. from core.entities.application_entities import ModelConfigEntity
  3. from core.model_runtime.entities.message_entities import PromptMessage
  4. from core.model_runtime.entities.model_entities import ModelPropertyKey
  5. from core.model_runtime.model_providers.__base.large_language_model import LargeLanguageModel
  6. class CalcTokenMixin:
  7. def get_message_rest_tokens(self, model_config: ModelConfigEntity, messages: list[PromptMessage], **kwargs) -> int:
  8. """
  9. Got the rest tokens available for the model after excluding messages tokens and completion max tokens
  10. :param model_config:
  11. :param messages:
  12. :return:
  13. """
  14. model_type_instance = model_config.provider_model_bundle.model_type_instance
  15. model_type_instance = cast(LargeLanguageModel, model_type_instance)
  16. model_context_tokens = model_config.model_schema.model_properties.get(ModelPropertyKey.CONTEXT_SIZE)
  17. max_tokens = 0
  18. for parameter_rule in model_config.model_schema.parameter_rules:
  19. if (parameter_rule.name == 'max_tokens'
  20. or (parameter_rule.use_template and parameter_rule.use_template == 'max_tokens')):
  21. max_tokens = (model_config.parameters.get(parameter_rule.name)
  22. or model_config.parameters.get(parameter_rule.use_template)) or 0
  23. if model_context_tokens is None:
  24. return 0
  25. if max_tokens is None:
  26. max_tokens = 0
  27. prompt_tokens = model_type_instance.get_num_tokens(
  28. model_config.model,
  29. model_config.credentials,
  30. messages
  31. )
  32. rest_tokens = model_context_tokens - max_tokens - prompt_tokens
  33. return rest_tokens
  34. class ExceededLLMTokensLimitError(Exception):
  35. pass