Jelajahi Sumber

Fix : Workflow "start" paste url not support s3 pre-signed URL (#6855)

Co-authored-by: Yuanbo Li <ybalbert@amazon.com>
ybalbert001 1 tahun lalu
induk
melakukan
f2cb1fb09f

+ 26 - 0
api/core/file/message_file_parser.py

@@ -1,5 +1,7 @@
+import re
 from collections.abc import Mapping, Sequence
 from typing import Any, Union
+from urllib.parse import parse_qs, urlparse
 
 import requests
 
@@ -186,6 +188,30 @@ class MessageFileParser:
                 "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
             }
 
+            def is_s3_presigned_url(url):
+                try:
+                    parsed_url = urlparse(url)
+                    if 'amazonaws.com' not in parsed_url.netloc:
+                        return False
+                    query_params = parse_qs(parsed_url.query)
+                    required_params = ['Signature', 'Expires']
+                    for param in required_params:
+                        if param not in query_params:
+                            return False
+                    if not query_params['Expires'][0].isdigit():
+                        return False
+                    signature = query_params['Signature'][0]
+                    if not re.match(r'^[A-Za-z0-9+/]+={0,2}$', signature):
+                        return False
+                    return True
+                except Exception:
+                    return False
+
+            if is_s3_presigned_url(url):
+                response = requests.get(url, headers=headers, allow_redirects=True)
+                if response.status_code in {200, 304}:
+                    return True, ""
+
             response = requests.head(url, headers=headers, allow_redirects=True)
             if response.status_code in {200, 304}:
                 return True, ""

+ 6 - 2
api/core/model_runtime/model_providers/bedrock/llm/llm.py

@@ -379,8 +379,12 @@ class BedrockLargeLanguageModel(LargeLanguageModel):
                         if not message_content.data.startswith("data:"):
                             # fetch image data from url
                             try:
-                                image_content = requests.get(message_content.data).content
-                                mime_type, _ = mimetypes.guess_type(message_content.data)
+                                url = message_content.data
+                                image_content = requests.get(url).content
+                                if '?' in url:
+                                    url = url.split('?')[0]
+                                mime_type, _ = mimetypes.guess_type(url)
+                                base64_data = base64.b64encode(image_content).decode('utf-8')
                             except Exception as ex:
                                 raise ValueError(f"Failed to fetch image data from url {message_content.data}, {ex}")
                         else:

File diff ditekan karena terlalu besar
+ 376 - 420
api/poetry.lock


+ 1 - 1
api/pyproject.toml

@@ -110,7 +110,7 @@ authlib = "1.3.1"
 azure-identity = "1.16.1"
 azure-storage-blob = "12.13.0"
 beautifulsoup4 = "4.12.2"
-boto3 = "1.34.136"
+boto3 = "1.34.148"
 bs4 = "~0.0.1"
 cachetools = "~5.3.0"
 celery = "~5.3.6"

Beberapa file tidak ditampilkan karena terlalu banyak file yang berubah dalam diff ini