Browse Source

Feat/sdk vision support (#1531)

Co-authored-by: Joel <iamjoel007@gmail.com>
Garfield Dai 1 year ago
parent
commit
5b7071e4b0

+ 3 - 0
api/controllers/service_api/app/file.py

@@ -26,6 +26,9 @@ class FileApi(AppApiResource):
         if 'file' not in request.files:
             raise NoFileUploadedError()
 
+        if not file.mimetype:
+            raise UnsupportedFileTypeError()
+
         if len(request.files) > 1:
             raise TooManyFilesError()
 

+ 13 - 7
sdks/nodejs-client/README.md

@@ -14,27 +14,33 @@ import { DifyClient, ChatClient, CompletionClient } from 'dify-client'
 
 const API_KEY = 'your-api-key-here'
 const user = `random-user-id`
-const inputs = {
-  name: 'test name a'
-}
-const query = "Please tell me a short story in 10 words or less."
+const query = 'Please tell me a short story in 10 words or less.'
+const remote_url_files = [{
+    type: 'image',
+    transfer_method: 'remote_url',
+    url: 'your_url_addresss'
+}]
 
 // Create a completion client
 const completionClient = new CompletionClient(API_KEY)
 // Create a completion message
-completionClient.createCompletionMessage(inputs, query, responseMode, user)
+completionClient.createCompletionMessage({'query': query}, user)
+// Create a completion message with vision model
+completionClient.createCompletionMessage({'query': 'Describe the picture.'}, user, false, remote_url_files)
 
 // Create a chat client
 const chatClient = new ChatClient(API_KEY)
 // Create a chat message in stream mode
-const response = await chatClient.createChatMessage(inputs, query, user, true, null)
+const response = await chatClient.createChatMessage({}, query, user, true, null)
 const stream = response.data;
 stream.on('data', data => {
     console.log(data);
 });
 stream.on('end', () => {
-    console.log("stream done");
+    console.log('stream done');
 });
+// Create a chat message with vision model
+chatClient.createChatMessage({}, 'Describe the picture.', user, false, null, remote_url_files)
 // Fetch conversations
 chatClient.getConversations(user)
 // Fetch conversation messages

+ 29 - 6
sdks/nodejs-client/index.js

@@ -34,6 +34,10 @@ export const routes = {
     method: "DELETE",
     url: (conversation_id) => `/conversations/${conversation_id}`,
   },
+  fileUpload: {
+    method: "POST",
+    url: () => `/files/upload`,
+  }
 };
 
 export class DifyClient {
@@ -51,11 +55,15 @@ export class DifyClient {
     endpoint,
     data = null,
     params = null,
-    stream = false
+    stream = false,
+    headerParams = {}
   ) {
     const headers = {
-      Authorization: `Bearer ${this.apiKey}`,
-      "Content-Type": "application/json",
+      ...{
+        Authorization: `Bearer ${this.apiKey}`,
+        "Content-Type": "application/json",
+      },
+      ...headerParams
     };
 
     const url = `${this.baseUrl}${endpoint}`;
@@ -104,15 +112,28 @@ export class DifyClient {
       params
     );
   }
+
+  fileUpload(data) {
+    return this.sendRequest(
+      routes.fileUpload.method,
+      routes.fileUpload.url(),
+      data,
+      null,
+      false,
+      {
+        "Content-Type": 'multipart/form-data'
+      }
+    );
+  }
 }
 
 export class CompletionClient extends DifyClient {
-  createCompletionMessage(inputs, query, user, stream = false) {
+  createCompletionMessage(inputs, user, stream = false, files = null) {
     const data = {
       inputs,
-      query,
       user,
       response_mode: stream ? "streaming" : "blocking",
+      files,
     };
     return this.sendRequest(
       routes.createCompletionMessage.method,
@@ -130,13 +151,15 @@ export class ChatClient extends DifyClient {
     query,
     user,
     stream = false,
-    conversation_id = null
+    conversation_id = null,
+    files = null
   ) {
     const data = {
       inputs,
       query,
       user,
       response_mode: stream ? "streaming" : "blocking",
+      files,
     };
     if (conversation_id) data.conversation_id = conversation_id;
 

+ 1 - 1
sdks/nodejs-client/package.json

@@ -1,6 +1,6 @@
 {
   "name": "dify-client",
-  "version": "2.0.0",
+  "version": "2.1.0",
   "description": "This is the Node.js SDK for the Dify.AI API, which allows you to easily integrate Dify.AI into your Node.js applications.",
   "main": "index.js",
   "type": "module",

+ 38 - 5
sdks/php-client/README.md

@@ -11,7 +11,7 @@ This is the PHP SDK for the Dify API, which allows you to easily integrate Dify
 
 After installing the SDK, you can use it in your project like this:
 
-```
+```php
 <?php
 
 require 'vendor/autoload.php';
@@ -26,17 +26,50 @@ $difyClient = new DifyClient($apiKey);
 
 // Create a completion client
 $completionClient = new CompletionClient($apiKey);
-$response = $completionClient->create_completion_message($inputs, $query, $response_mode, $user);
+$response = $completionClient->create_completion_message(array("query" => "Who are you?"), "blocking", "user_id");
 
 // Create a chat client
 $chatClient = new ChatClient($apiKey);
-$response = $chatClient->create_chat_message($inputs, $query, $user, $response_mode, $conversation_id);
+$response = $chatClient->create_chat_message(array(), "Who are you?", "user_id", "blocking", $conversation_id);
+
+$fileForVision = [
+    [
+        "type" => "image",
+        "transfer_method" => "remote_url",
+        "url" => "your_image_url"
+    ]
+];
+
+// $fileForVision = [
+//     [
+//         "type" => "image",
+//         "transfer_method" => "local_file",
+//         "url" => "your_file_id"
+//     ]
+// ];
+
+// Create a completion client with vision model like gpt-4-vision
+$response = $completionClient->create_completion_message(array("query" => "Describe this image."), "blocking", "user_id", $fileForVision);
+
+// Create a chat client with vision model like gpt-4-vision
+$response = $chatClient->create_chat_message(array(), "Describe this image.", "user_id", "blocking", $conversation_id, $fileForVision);
+
+// File Upload
+$fileForUpload = [
+    [
+        'tmp_name' => '/path/to/file/filename.jpg',
+        'name' => 'filename.jpg'
+    ]
+];
+$response = $difyClient->file_upload("user_id", $fileForUpload);
+$result = json_decode($response->getBody(), true);
+echo 'upload_file_id: ' . $result['id'];
 
 // Fetch application parameters
-$response = $difyClient->get_application_parameters($user);
+$response = $difyClient->get_application_parameters("user_id");
 
 // Provide feedback for a message
-$response = $difyClient->message_feedback($message_id, $rating, $user);
+$response = $difyClient->message_feedback($message_id, $rating, "user_id");
 
 // Other available methods:
 // - get_conversation_messages()

+ 40 - 3
sdks/php-client/dify-client.php

@@ -19,6 +19,13 @@ class DifyClient {
                 'Content-Type' => 'application/json',
             ],
         ]);
+        $this->file_client = new Client([
+            'base_uri' => $this->base_url,
+            'headers' => [
+                'Authorization' => 'Bearer ' . $this->api_key,
+                'Content-Type' => 'multipart/form-data',
+            ],
+        ]);
     }
 
     protected function send_request($method, $endpoint, $data = null, $params = null, $stream = false) {
@@ -44,27 +51,57 @@ class DifyClient {
         $params = ['user' => $user];
         return $this->send_request('GET', 'parameters', null, $params);
     }
+
+    public function file_upload($user, $files) {
+        $data = ['user' => $user];
+        $options = [
+            'multipart' => $this->prepareMultipart($data, $files)
+        ];
+
+        return $this->file_client->request('POST', 'files/upload', $options);
+    }
+
+    protected function prepareMultipart($data, $files) {
+        $multipart = [];
+        foreach ($data as $key => $value) {
+            $multipart[] = [
+                'name' => $key,
+                'contents' => $value
+            ];
+        }
+
+        foreach ($files as $file) {
+            $multipart[] = [
+                'name' => 'file',
+                'contents' => fopen($file['tmp_name'], 'r'),
+                'filename' => $file['name']
+            ];
+        }
+
+        return $multipart;
+    }
 }
 
 class CompletionClient extends DifyClient {
-    public function create_completion_message($inputs, $query, $response_mode, $user) {
+    public function create_completion_message($inputs, $response_mode, $user, $files = null) {
         $data = [
             'inputs' => $inputs,
-            'query' => $query,
             'response_mode' => $response_mode,
             'user' => $user,
+            'files' => $files,
         ];
         return $this->send_request('POST', 'completion-messages', $data, null, $response_mode === 'streaming');
     }
 }
 
 class ChatClient extends DifyClient {
-    public function create_chat_message($inputs, $query, $user, $response_mode = 'blocking', $conversation_id = null) {
+    public function create_chat_message($inputs, $query, $user, $response_mode = 'blocking', $conversation_id = null, $files = null) {
         $data = [
             'inputs' => $inputs,
             'query' => $query,
             'user' => $user,
             'response_mode' => $response_mode,
+            'files' => $files,
         ];
         if ($conversation_id) {
             $data['conversation_id'] = $conversation_id;

+ 101 - 16
sdks/python-client/README.md

@@ -14,8 +14,27 @@ Write your code with sdk:
 
 - completion generate with `blocking` response_mode
 
+```python
+from dify_client import CompletionClient
+
+api_key = "your_api_key"
+
+# Initialize CompletionClient
+completion_client = CompletionClient(api_key)
+
+# Create Completion Message using CompletionClient
+completion_response = completion_client.create_completion_message(inputs={"query": "What's the weather like today?"},
+                                                                  response_mode="blocking", user="user_id")
+completion_response.raise_for_status()
+
+result = completion_response.json()
+
+print(result.get('answer'))
 ```
-import json
+
+- completion using vision model, like gpt-4-vision
+
+```python
 from dify_client import CompletionClient
 
 api_key = "your_api_key"
@@ -23,19 +42,31 @@ api_key = "your_api_key"
 # Initialize CompletionClient
 completion_client = CompletionClient(api_key)
 
+files = [{
+    "type": "image",
+    "transfer_method": "remote_url",
+    "url": "your_image_url"
+}]
+
+# files = [{
+#     "type": "image",
+#     "transfer_method": "local_file",
+#     "upload_file_id": "your_file_id"
+# }]
+
 # Create Completion Message using CompletionClient
-completion_response = completion_client.create_completion_message(inputs={}, query="Hello", response_mode="blocking", user="user_id")
+completion_response = completion_client.create_completion_message(inputs={"query": "Describe the picture."},
+                                                                  response_mode="blocking", user="user_id", files=files)
 completion_response.raise_for_status()
 
-result = completion_response.text
-result = json.loads(result)
+result = completion_response.json()
 
 print(result.get('answer'))
 ```
 
 - chat generate with `streaming` response_mode
 
-```
+```python
 import json
 from dify_client import ChatClient
 
@@ -55,10 +86,67 @@ for line in chat_response.iter_lines(decode_unicode=True):
         print(line.get('answer'))
 ```
 
-- Others
+- chat using vision model, like gpt-4-vision
+
+```python
+from dify_client import ChatClient
 
+api_key = "your_api_key"
+
+# Initialize ChatClient
+chat_client = ChatClient(api_key)
+
+files = [{
+    "type": "image",
+    "transfer_method": "remote_url",
+    "url": "your_image_url"
+}]
+
+# files = [{
+#     "type": "image",
+#     "transfer_method": "local_file",
+#     "upload_file_id": "your_file_id"
+# }]
+
+# Create Chat Message using ChatClient
+chat_response = chat_client.create_chat_message(inputs={}, query="Describe the picture.", user="user_id",
+                                                response_mode="blocking", files=files)
+chat_response.raise_for_status()
+
+result = chat_response.json()
+
+print(result.get("answer"))
 ```
-import json
+
+- upload file when using vision model
+
+```python
+from dify_client import DifyClient
+
+api_key = "your_api_key"
+
+# Initialize Client
+dify_client = DifyClient(api_key)
+
+file_path = "your_image_file_path"
+file_name = "panda.jpeg"
+mime_type = "image/jpeg"
+
+with open(file_path, "rb") as file:
+    files = {
+        "file": (file_name, file, mime_type)
+    }
+    response = dify_client.file_upload("user_id", files)
+
+    result = response.json()
+    print(f'upload_file_id: {result.get("id")}')
+```
+  
+
+
+- Others
+
+```python
 from dify_client import ChatClient
 
 api_key = "your_api_key"
@@ -69,32 +157,29 @@ client = ChatClient(api_key)
 # Get App parameters
 parameters = client.get_application_parameters(user="user_id")
 parameters.raise_for_status()
-parameters = json.loads(parameters.text)
 
 print('[parameters]')
-print(parameters)
+print(parameters.json())
 
 # Get Conversation List (only for chat)
 conversations = client.get_conversations(user="user_id")
 conversations.raise_for_status()
-conversations = json.loads(conversations.text)
 
 print('[conversations]')
-print(conversations)
+print(conversations.json())
 
 # Get Message List (only for chat)
 messages = client.get_conversation_messages(user="user_id", conversation_id="conversation_id")
 messages.raise_for_status()
-messages = json.loads(messages.text)
 
 print('[messages]')
-print(messages)
+print(messages.json())
 
 # Rename Conversation (only for chat)
-rename_conversation_response = client.rename_conversation(conversation_id="conversation_id", name="new_name", user="user_id")
+rename_conversation_response = client.rename_conversation(conversation_id="conversation_id",
+                                                          name="new_name", user="user_id")
 rename_conversation_response.raise_for_status()
-rename_conversation_result = json.loads(rename_conversation_response.text)
 
 print('[rename result]')
-print(rename_conversation_result)
+print(rename_conversation_response.json())
 ```

+ 1 - 1
sdks/python-client/dify_client/__init__.py

@@ -1 +1 @@
-from dify_client.client import ChatClient, CompletionClient
+from dify_client.client import ChatClient, CompletionClient, DifyClient

+ 28 - 9
sdks/python-client/dify_client/client.py

@@ -6,14 +6,24 @@ class DifyClient:
         self.api_key = api_key
         self.base_url = "https://api.dify.ai/v1"
 
-    def _send_request(self, method, endpoint, data=None, params=None, stream=False):
+    def _send_request(self, method, endpoint, json=None, params=None, stream=False):
         headers = {
             "Authorization": f"Bearer {self.api_key}",
             "Content-Type": "application/json"
         }
 
         url = f"{self.base_url}{endpoint}"
-        response = requests.request(method, url, json=data, params=params, headers=headers, stream=stream)
+        response = requests.request(method, url, json=json, params=params, headers=headers, stream=stream)
+
+        return response
+
+    def _send_request_with_files(self, method, endpoint, data, files):
+        headers = {
+            "Authorization": f"Bearer {self.api_key}"
+        }
+
+        url = f"{self.base_url}{endpoint}"
+        response = requests.request(method, url, data=data, headers=headers, files=files)
 
         return response
 
@@ -28,30 +38,39 @@ class DifyClient:
         params = {"user": user}
         return self._send_request("GET", "/parameters", params=params)
 
+    def file_upload(self, user, files):
+        data = {
+            "user": user
+        }
+        return self._send_request_with_files("POST", "/files/upload", data=data, files=files)
+
 
 class CompletionClient(DifyClient):
-    def create_completion_message(self, inputs, query, response_mode, user):
+    def create_completion_message(self, inputs, response_mode, user, files=None):
         data = {
             "inputs": inputs,
-            "query": query,
             "response_mode": response_mode,
-            "user": user
+            "user": user,
+            "files": files
         }
-        return self._send_request("POST", "/completion-messages", data, stream=True if response_mode == "streaming" else False)
+        return self._send_request("POST", "/completion-messages", data,
+                                  stream=True if response_mode == "streaming" else False)
 
 
 class ChatClient(DifyClient):
-    def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None):
+    def create_chat_message(self, inputs, query, user, response_mode="blocking", conversation_id=None, files=None):
         data = {
             "inputs": inputs,
             "query": query,
             "user": user,
-            "response_mode": response_mode
+            "response_mode": response_mode,
+            "files": files
         }
         if conversation_id:
             data["conversation_id"] = conversation_id
 
-        return self._send_request("POST", "/chat-messages", data, stream=True if response_mode == "streaming" else False)
+        return self._send_request("POST", "/chat-messages", data,
+                                  stream=True if response_mode == "streaming" else False)
 
     def get_conversation_messages(self, user, conversation_id=None, first_id=None, limit=None):
         params = {"user": user}

+ 1 - 1
sdks/python-client/setup.py

@@ -5,7 +5,7 @@ with open("README.md", "r", encoding="utf-8") as fh:
 
 setup(
     name="dify-client",
-    version="0.1.8",
+    version="0.1.10",
     author="Dify",
     author_email="hello@dify.ai",
     description="A package for interacting with the Dify Service-API",

+ 60 - 9
sdks/python-client/tests/test_client.py

@@ -12,15 +12,33 @@ class TestChatClient(unittest.TestCase):
 
     def test_create_chat_message(self):
         response = self.chat_client.create_chat_message({}, "Hello, World!", "test_user")
-        self.assertIn("message_id", response)
+        self.assertIn("answer", response.text)
+
+    def test_create_chat_message_with_vision_model_by_remote_url(self):
+        files = [{
+            "type": "image",
+            "transfer_method": "remote_url",
+            "url": "your_image_url"
+        }]
+        response = self.chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files)
+        self.assertIn("answer", response.text)
+
+    def test_create_chat_message_with_vision_model_by_local_file(self):
+        files = [{
+            "type": "image",
+            "transfer_method": "local_file",
+            "upload_file_id": "your_file_id"
+        }]
+        response = self.chat_client.create_chat_message({}, "Describe the picture.", "test_user", files=files)
+        self.assertIn("answer", response.text)
 
     def test_get_conversation_messages(self):
-        response = self.chat_client.get_conversation_messages("test_user")
-        self.assertIsInstance(response, list)
+        response = self.chat_client.get_conversation_messages("test_user", "your_conversation_id")
+        self.assertIn("answer", response.text)
 
     def test_get_conversations(self):
         response = self.chat_client.get_conversations("test_user")
-        self.assertIsInstance(response, list)
+        self.assertIn("data", response.text)
 
 
 class TestCompletionClient(unittest.TestCase):
@@ -28,8 +46,29 @@ class TestCompletionClient(unittest.TestCase):
         self.completion_client = CompletionClient(API_KEY)
 
     def test_create_completion_message(self):
-        response = self.completion_client.create_completion_message({}, "What's the weather like today?", "blocking", "test_user")
-        self.assertIn("message_id", response)
+        response = self.completion_client.create_completion_message({"query": "What's the weather like today?"},
+                                                                    "blocking", "test_user")
+        self.assertIn("answer", response.text)
+
+    def test_create_completion_message_with_vision_model_by_remote_url(self):
+        files = [{
+            "type": "image",
+            "transfer_method": "remote_url",
+            "url": "your_image_url"
+        }]
+        response = self.completion_client.create_completion_message(
+            {"query": "Describe the picture."}, "blocking", "test_user", files)
+        self.assertIn("answer", response.text)
+
+    def test_create_completion_message_with_vision_model_by_local_file(self):
+        files = [{
+            "type": "image",
+            "transfer_method": "local_file",
+            "upload_file_id": "your_file_id"
+        }]
+        response = self.completion_client.create_completion_message(
+            {"query": "Describe the picture."}, "blocking", "test_user", files)
+        self.assertIn("answer", response.text)
 
 
 class TestDifyClient(unittest.TestCase):
@@ -37,12 +76,24 @@ class TestDifyClient(unittest.TestCase):
         self.dify_client = DifyClient(API_KEY)
 
     def test_message_feedback(self):
-        response = self.dify_client.message_feedback("test_message_id", 5, "test_user")
-        self.assertIn("success", response)
+        response = self.dify_client.message_feedback("your_message_id", 'like', "test_user")
+        self.assertIn("success", response.text)
 
     def test_get_application_parameters(self):
         response = self.dify_client.get_application_parameters("test_user")
-        self.assertIsInstance(response, dict)
+        self.assertIn("user_input_form", response.text)
+
+    def test_file_upload(self):
+        file_path = "your_image_file_path"
+        file_name = "panda.jpeg"
+        mime_type = "image/jpeg"
+
+        with open(file_path, "rb") as file:
+            files = {
+                "file": (file_name, file, mime_type)
+            }
+            response = self.dify_client.file_upload("test_user", files)
+            self.assertIn("name", response.text)
 
 
 if __name__ == "__main__":