| 
					
				 | 
			
			
				@@ -213,18 +213,21 @@ class SparkLargeLanguageModel(LargeLanguageModel): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         :param prompt_messages: prompt messages 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         :return: llm response chunk generator result 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        completion = "" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for index, content in enumerate(client.subscribe()): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if isinstance(content, dict): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 delta = content["data"] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 delta = content 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            completion += delta 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             assistant_prompt_message = AssistantPromptMessage( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 content=delta or "", 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				- 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            temp_assistant_prompt_message = AssistantPromptMessage( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                content=completion, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            ) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             prompt_tokens = self.get_num_tokens(model, credentials, prompt_messages) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            completion_tokens = self.get_num_tokens(model, credentials, [assistant_prompt_message]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            completion_tokens = self.get_num_tokens(model, credentials, [temp_assistant_prompt_message]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # transform usage 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             usage = self._calc_response_usage(model, credentials, prompt_tokens, completion_tokens) 
			 |