| 
					
				 | 
			
			
				@@ -9,11 +9,11 @@ from xinference.client import RESTfulChatglmCppChatModelHandle, \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 class XinferenceLLM(Xinference): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def _call( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        prompt: str, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        stop: Optional[List[str]] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        run_manager: Optional[CallbackManagerForLLMRun] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        **kwargs: Any, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            prompt: str, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            stop: Optional[List[str]] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            run_manager: Optional[CallbackManagerForLLMRun] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            **kwargs: Any, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ) -> str: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """Call the xinference model and return the output. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -56,10 +56,10 @@ class XinferenceLLM(Xinference): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if generate_config and generate_config.get("stream"): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 combined_text_output = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 for token in self._stream_generate( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    model=model, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    prompt=prompt, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    run_manager=run_manager, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    generate_config=generate_config, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        model=model, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        prompt=prompt, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        run_manager=run_manager, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        generate_config=generate_config, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 ): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     combined_text_output += token 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 return combined_text_output 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -73,10 +73,10 @@ class XinferenceLLM(Xinference): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if generate_config and generate_config.get("stream"): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 combined_text_output = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 for token in self._stream_generate( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    model=model, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    prompt=prompt, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    run_manager=run_manager, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    generate_config=generate_config, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        model=model, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        prompt=prompt, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        run_manager=run_manager, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        generate_config=generate_config, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 ): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     combined_text_output += token 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 completion = combined_text_output 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -89,13 +89,13 @@ class XinferenceLLM(Xinference): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             return completion 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def _stream_generate( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        model: Union["RESTfulGenerateModelHandle", "RESTfulChatModelHandle", "RESTfulChatglmCppChatModelHandle"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        prompt: str, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        run_manager: Optional[CallbackManagerForLLMRun] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        generate_config: Optional[Union["LlamaCppGenerateConfig", "PytorchGenerateConfig", "ChatglmCppGenerateConfig"]] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            self, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            model: Union["RESTfulGenerateModelHandle", "RESTfulChatModelHandle", "RESTfulChatglmCppChatModelHandle"], 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            prompt: str, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            run_manager: Optional[CallbackManagerForLLMRun] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            generate_config: Optional[ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                Union["LlamaCppGenerateConfig", "PytorchGenerateConfig", "ChatglmCppGenerateConfig"]] = None, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     ) -> Generator[str, None, None]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Args: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -123,6 +123,10 @@ class XinferenceLLM(Xinference): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if choices: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     choice = choices[0] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if isinstance(choice, dict): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if 'finish_reason' in choice and choice['finish_reason'] \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                and choice['finish_reason'] in ['stop', 'length']: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            break 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         if 'text' in choice: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             token = choice.get("text", "") 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         elif 'delta' in choice and 'content' in choice['delta']: 
			 |