|  | @@ -173,7 +173,7 @@ class CohereLargeLanguageModel(LargeLanguageModel):
 | 
	
		
			
				|  |  |          :return: full response or stream response chunk generator result
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          # initialize client
 | 
	
		
			
				|  |  | -        client = cohere.Client(credentials.get('api_key'))
 | 
	
		
			
				|  |  | +        client = cohere.Client(credentials.get('api_key'), base_url=credentials.get('base_url'))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          if stop:
 | 
	
		
			
				|  |  |              model_parameters['end_sequences'] = stop
 | 
	
	
		
			
				|  | @@ -233,7 +233,8 @@ class CohereLargeLanguageModel(LargeLanguageModel):
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          return response
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    def _handle_generate_stream_response(self, model: str, credentials: dict, response: Iterator[GenerateStreamedResponse],
 | 
	
		
			
				|  |  | +    def _handle_generate_stream_response(self, model: str, credentials: dict,
 | 
	
		
			
				|  |  | +                                         response: Iterator[GenerateStreamedResponse],
 | 
	
		
			
				|  |  |                                           prompt_messages: list[PromptMessage]) -> Generator:
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          Handle llm stream response
 | 
	
	
		
			
				|  | @@ -317,7 +318,7 @@ class CohereLargeLanguageModel(LargeLanguageModel):
 | 
	
		
			
				|  |  |          :return: full response or stream response chunk generator result
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          # initialize client
 | 
	
		
			
				|  |  | -        client = cohere.Client(credentials.get('api_key'))
 | 
	
		
			
				|  |  | +        client = cohere.Client(credentials.get('api_key'), base_url=credentials.get('base_url'))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          if stop:
 | 
	
		
			
				|  |  |              model_parameters['stop_sequences'] = stop
 | 
	
	
		
			
				|  | @@ -636,7 +637,7 @@ class CohereLargeLanguageModel(LargeLanguageModel):
 | 
	
		
			
				|  |  |          :return: number of tokens
 | 
	
		
			
				|  |  |          """
 | 
	
		
			
				|  |  |          # initialize client
 | 
	
		
			
				|  |  | -        client = cohere.Client(credentials.get('api_key'))
 | 
	
		
			
				|  |  | +        client = cohere.Client(credentials.get('api_key'), base_url=credentials.get('base_url'))
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          response = client.tokenize(
 | 
	
		
			
				|  |  |              text=text,
 |