feat(sandbox): implement sandbox archive upload/download endpoints and security enhancements

- Added sandbox archive upload and download proxy endpoints with signed URL verification.
- Introduced security helpers for generating and verifying signed URLs.
- Updated file-related API routes to include sandbox archive functionality.
- Refactored app asset storage methods to streamline download/upload URL generation.
This commit is contained in:
Harry
2026-01-26 01:11:53 +08:00
parent a471caf787
commit af17e20f99
17 changed files with 322 additions and 31 deletions

View File

@@ -1,6 +1,6 @@
Summary:
- Registers file-related API namespaces and routes for files service.
- Includes app-assets download/upload proxy controllers.
- Includes app-assets and sandbox archive proxy controllers.
Invariants:
- files_ns must include all file controller modules to register routes.

View File

@@ -0,0 +1,14 @@
Summary:
- Sandbox archive upload/download proxy endpoints (signed URL verification, stream to storage).
Invariants:
- Validates tenant_id and sandbox_id UUIDs.
- Verifies tenant-scoped signature and expiration before storage access.
- URL uses expires_at/nonce/sign query params.
Edge Cases:
- Missing archive returns NotFound.
- Invalid signature or expired link returns Forbidden.
Tests:
- Add unit tests for signature validation if needed.

View File

@@ -0,0 +1,9 @@
Summary:
- Sandbox lifecycle wrapper (ready/cancel/fail signals, mount/unmount, release).
Invariants:
- wait_ready raises with the original initialization error as the cause.
- release always attempts unmount and environment release, logging failures.
Tests:
- Covered by sandbox lifecycle/unit tests and workflow execution error handling.

View File

@@ -0,0 +1,2 @@
Summary:
- Sandbox security helper modules.

View File

@@ -0,0 +1,13 @@
Summary:
- Generates and verifies signed URLs for sandbox archive upload/download.
Invariants:
- tenant_id and sandbox_id must be UUIDs.
- Signatures are tenant-scoped and include operation, expiry, and nonce.
Edge Cases:
- Missing tenant private key raises ValueError.
- Expired or tampered signatures are rejected.
Tests:
- Add unit tests if sandbox archive signature behavior expands.

View File

@@ -0,0 +1,12 @@
Summary:
- Manages sandbox archive uploads/downloads for workspace persistence.
Invariants:
- Archive storage key is sandbox/<tenant_id>/<sandbox_id>.tar.gz.
- Signed URLs are tenant-scoped and use external files URL.
Edge Cases:
- Missing archive skips mount.
Tests:
- Covered indirectly via sandbox integration tests.

View File

@@ -14,7 +14,15 @@ api = ExternalApi(
files_ns = Namespace("files", description="File operations", path="/")
from . import app_assets_download, app_assets_upload, image_preview, storage_download, tool_files, upload
from . import (
app_assets_download,
app_assets_upload,
image_preview,
sandbox_archive,
storage_download,
tool_files,
upload,
)
api.add_namespace(files_ns)
@@ -25,6 +33,7 @@ __all__ = [
"bp",
"files_ns",
"image_preview",
"sandbox_archive",
"storage_download",
"tool_files",
"upload",

View File

@@ -0,0 +1,76 @@
from uuid import UUID
from flask import Response, request
from flask_restx import Resource
from pydantic import BaseModel, Field
from werkzeug.exceptions import Forbidden, NotFound
from controllers.files import files_ns
from core.sandbox.security.archive_signer import SandboxArchivePath, SandboxArchiveSigner
from extensions.ext_storage import storage
DEFAULT_REF_TEMPLATE_SWAGGER_2_0 = "#/definitions/{model}"
class SandboxArchiveQuery(BaseModel):
expires_at: int = Field(..., description="Unix timestamp when the link expires")
nonce: str = Field(..., description="Random string for signature")
sign: str = Field(..., description="HMAC signature")
files_ns.schema_model(
SandboxArchiveQuery.__name__,
SandboxArchiveQuery.model_json_schema(ref_template=DEFAULT_REF_TEMPLATE_SWAGGER_2_0),
)
@files_ns.route("/sandbox-archives/<string:tenant_id>/<string:sandbox_id>/download")
class SandboxArchiveDownloadApi(Resource):
def get(self, tenant_id: str, sandbox_id: str):
args = SandboxArchiveQuery.model_validate(request.args.to_dict(flat=True))
try:
archive_path = SandboxArchivePath(tenant_id=UUID(tenant_id), sandbox_id=UUID(sandbox_id))
except ValueError as exc:
raise Forbidden(str(exc)) from exc
if not SandboxArchiveSigner.verify_download_signature(
archive_path=archive_path,
expires_at=args.expires_at,
nonce=args.nonce,
sign=args.sign,
):
raise Forbidden("Invalid or expired download link")
try:
generator = storage.load_stream(archive_path.get_storage_key())
except FileNotFoundError as exc:
raise NotFound("Archive not found") from exc
return Response(
generator,
mimetype="application/gzip",
direct_passthrough=True,
)
@files_ns.route("/sandbox-archives/<string:tenant_id>/<string:sandbox_id>/upload")
class SandboxArchiveUploadApi(Resource):
def put(self, tenant_id: str, sandbox_id: str):
args = SandboxArchiveQuery.model_validate(request.args.to_dict(flat=True))
try:
archive_path = SandboxArchivePath(tenant_id=UUID(tenant_id), sandbox_id=UUID(sandbox_id))
except ValueError as exc:
raise Forbidden(str(exc)) from exc
if not SandboxArchiveSigner.verify_upload_signature(
archive_path=archive_path,
expires_at=args.expires_at,
nonce=args.nonce,
sign=args.sign,
):
raise Forbidden("Invalid or expired upload link")
storage.save(archive_path.get_storage_key(), request.get_data())
return Response(status=204)

View File

@@ -301,21 +301,19 @@ class AppAssetStorage:
def get_storage_key(self, asset_path: AssetPathBase) -> str:
return asset_path.get_storage_key()
def get_download_url(self, asset_path: AssetPathBase, expires_in: int = 3600, *, for_external: bool = True) -> str:
def get_download_url(self, asset_path: AssetPathBase, expires_in: int = 3600) -> str:
storage_key = self.get_storage_key(asset_path)
try:
return self._storage.get_download_url(storage_key, expires_in)
except NotImplementedError:
pass
return self._generate_signed_proxy_download_url(asset_path, expires_in, for_external=for_external)
return self._generate_signed_proxy_download_url(asset_path, expires_in)
def get_download_urls(
self,
asset_paths: Iterable[AssetPathBase],
expires_in: int = 3600,
*,
for_external: bool = True,
) -> list[str]:
asset_paths_list = list(asset_paths)
storage_keys = [self.get_storage_key(asset_path) for asset_path in asset_paths_list]
@@ -325,17 +323,12 @@ class AppAssetStorage:
except NotImplementedError:
pass
return [
self._generate_signed_proxy_download_url(asset_path, expires_in, for_external=for_external)
for asset_path in asset_paths_list
]
return [self._generate_signed_proxy_download_url(asset_path, expires_in) for asset_path in asset_paths_list]
def get_upload_url(
self,
asset_path: AssetPathBase,
expires_in: int = 3600,
*,
for_external: bool = True,
) -> str:
storage_key = self.get_storage_key(asset_path)
try:
@@ -343,30 +336,26 @@ class AppAssetStorage:
except NotImplementedError:
pass
return self._generate_signed_proxy_upload_url(asset_path, expires_in, for_external=for_external)
return self._generate_signed_proxy_upload_url(asset_path, expires_in)
def _generate_signed_proxy_download_url(
self, asset_path: AssetPathBase, expires_in: int, *, for_external: bool
) -> str:
def _generate_signed_proxy_download_url(self, asset_path: AssetPathBase, expires_in: int) -> str:
expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT)
expires_at = int(time.time()) + max(expires_in, 1)
nonce = os.urandom(16).hex()
sign = AppAssetSigner.create_download_signature(asset_path=asset_path, expires_at=expires_at, nonce=nonce)
base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL)
base_url = dify_config.FILES_URL
url = self._build_proxy_url(base_url=base_url, asset_path=asset_path, action="download")
query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign})
return f"{url}?{query}"
def _generate_signed_proxy_upload_url(
self, asset_path: AssetPathBase, expires_in: int, *, for_external: bool
) -> str:
def _generate_signed_proxy_upload_url(self, asset_path: AssetPathBase, expires_in: int) -> str:
expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT)
expires_at = int(time.time()) + max(expires_in, 1)
nonce = os.urandom(16).hex()
sign = AppAssetSigner.create_upload_signature(asset_path=asset_path, expires_at=expires_at, nonce=nonce)
base_url = dify_config.FILES_URL if for_external else (dify_config.INTERNAL_FILES_URL or dify_config.FILES_URL)
base_url = dify_config.FILES_URL
url = self._build_proxy_url(base_url=base_url, asset_path=asset_path, action="upload")
query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign})
return f"{url}?{query}"

View File

@@ -155,7 +155,7 @@ class SandboxBuilder:
@classmethod
def draft_id(cls, user_id: str) -> str:
return f"sandbox_draft_{user_id}"
return user_id
class VMConfig:

View File

@@ -28,7 +28,7 @@ class AppAssetsInitializer(AsyncSandboxInitializer):
vm = sandbox.vm
asset_storage = app_asset_storage
zip_ref = AssetPath.build_zip(self._tenant_id, self._app_id, self._assets_id)
download_url = asset_storage.get_download_url(zip_ref, for_external=False)
download_url = asset_storage.get_download_url(zip_ref)
(
pipeline(vm)

View File

@@ -43,7 +43,7 @@ class DraftAppAssetsInitializer(AsyncSandboxInitializer):
else AssetPath.draft(self._tenant_id, self._app_id, node.id)
for node in nodes
]
urls = storage.get_download_urls(refs, DRAFT_ASSETS_EXPIRES_IN, for_external=False)
urls = storage.get_download_urls(refs, DRAFT_ASSETS_EXPIRES_IN)
items = [AssetDownloadItem(path=tree.get_path(node.id).lstrip("/"), url=url) for node, url in zip(nodes, urls)]
script = AssetDownloadService.build_download_script(items, AppAssets.PATH)
pipeline(vm).add(

View File

@@ -87,7 +87,10 @@ class Sandbox:
if self._cancel_event.is_set():
raise RuntimeError("Sandbox initialization was cancelled")
if self._init_error is not None:
raise RuntimeError("Sandbox initialization failed") from self._init_error
if isinstance(self._init_error, ValueError):
raise RuntimeError(f"Sandbox initialization failed: {self._init_error}") from self._init_error
else:
raise RuntimeError("Sandbox initialization failed") from self._init_error
def mount(self) -> bool:
return self._storage.mount(self._vm)

View File

@@ -0,0 +1 @@
"""Sandbox security helpers."""

View File

@@ -0,0 +1,152 @@
from __future__ import annotations
import base64
import hashlib
import hmac
import os
import time
import urllib.parse
from dataclasses import dataclass
from uuid import UUID
from configs import dify_config
from libs import rsa
@dataclass(frozen=True)
class SandboxArchivePath:
tenant_id: UUID
sandbox_id: UUID
def get_storage_key(self) -> str:
return f"sandbox/{self.tenant_id}/{self.sandbox_id}.tar.gz"
def proxy_path(self) -> str:
return f"{self.tenant_id}/{self.sandbox_id}"
class SandboxArchiveSigner:
SIGNATURE_PREFIX = "sandbox-archive"
SIGNATURE_VERSION = "v1"
OPERATION_DOWNLOAD = "download"
OPERATION_UPLOAD = "upload"
@classmethod
def create_download_signature(cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str) -> str:
return cls._create_signature(
archive_path=archive_path,
operation=cls.OPERATION_DOWNLOAD,
expires_at=expires_at,
nonce=nonce,
)
@classmethod
def create_upload_signature(cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str) -> str:
return cls._create_signature(
archive_path=archive_path,
operation=cls.OPERATION_UPLOAD,
expires_at=expires_at,
nonce=nonce,
)
@classmethod
def verify_download_signature(
cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str, sign: str
) -> bool:
return cls._verify_signature(
archive_path=archive_path,
operation=cls.OPERATION_DOWNLOAD,
expires_at=expires_at,
nonce=nonce,
sign=sign,
)
@classmethod
def verify_upload_signature(cls, archive_path: SandboxArchivePath, expires_at: int, nonce: str, sign: str) -> bool:
return cls._verify_signature(
archive_path=archive_path,
operation=cls.OPERATION_UPLOAD,
expires_at=expires_at,
nonce=nonce,
sign=sign,
)
@classmethod
def _verify_signature(
cls,
*,
archive_path: SandboxArchivePath,
operation: str,
expires_at: int,
nonce: str,
sign: str,
) -> bool:
if expires_at <= 0:
return False
expected_sign = cls._create_signature(
archive_path=archive_path,
operation=operation,
expires_at=expires_at,
nonce=nonce,
)
if not hmac.compare_digest(sign, expected_sign):
return False
current_time = int(time.time())
if expires_at < current_time:
return False
if expires_at - current_time > dify_config.FILES_ACCESS_TIMEOUT:
return False
return True
@classmethod
def build_signed_url(
cls,
*,
archive_path: SandboxArchivePath,
expires_in: int,
action: str,
) -> str:
expires_in = min(expires_in, dify_config.FILES_ACCESS_TIMEOUT)
expires_at = int(time.time()) + max(expires_in, 1)
nonce = os.urandom(16).hex()
sign = cls._create_signature(
archive_path=archive_path,
operation=action,
expires_at=expires_at,
nonce=nonce,
)
base_url = dify_config.FILES_URL
url = f"{base_url}/files/sandbox-archives/{archive_path.proxy_path()}/{action}"
query = urllib.parse.urlencode({"expires_at": expires_at, "nonce": nonce, "sign": sign})
return f"{url}?{query}"
@classmethod
def _create_signature(
cls,
*,
archive_path: SandboxArchivePath,
operation: str,
expires_at: int,
nonce: str,
) -> str:
key = cls._tenant_key(str(archive_path.tenant_id))
message = (
f"{cls.SIGNATURE_PREFIX}|{cls.SIGNATURE_VERSION}|{operation}|"
f"{archive_path.tenant_id}|{archive_path.sandbox_id}|{expires_at}|{nonce}"
)
sign = hmac.new(key, message.encode(), hashlib.sha256).digest()
return base64.urlsafe_b64encode(sign).decode()
@classmethod
def _tenant_key(cls, tenant_id: str) -> bytes:
try:
rsa_key, _ = rsa.get_decrypt_decoding(tenant_id)
except rsa.PrivkeyNotFoundError as exc:
raise ValueError(f"Tenant private key missing for tenant_id={tenant_id}") from exc
private_key = rsa_key.export_key()
return hashlib.sha256(private_key).digest()

View File

@@ -1,10 +1,11 @@
import logging
from uuid import UUID
from core.sandbox.security.archive_signer import SandboxArchivePath, SandboxArchiveSigner
from core.virtual_environment.__base.exec import PipelineExecutionError
from core.virtual_environment.__base.helpers import pipeline
from core.virtual_environment.__base.virtual_environment import VirtualEnvironment
from extensions.ext_storage import storage
from extensions.storage.file_presign_storage import FilePresignStorage
from .sandbox_storage import SandboxStorage
@@ -34,14 +35,19 @@ class ArchiveSandboxStorage(SandboxStorage):
@property
def _storage_key(self) -> str:
return f"sandbox/{self._tenant_id}/{self._sandbox_id}.tar.gz"
return SandboxArchivePath(UUID(self._tenant_id), UUID(self._sandbox_id)).get_storage_key()
def mount(self, sandbox: VirtualEnvironment) -> bool:
if not self.exists():
logger.debug("No archive found for sandbox %s, skipping mount", self._sandbox_id)
return False
download_url = FilePresignStorage(storage.storage_runner).get_download_url(self._storage_key)
archive_path = SandboxArchivePath(UUID(self._tenant_id), UUID(self._sandbox_id))
download_url = SandboxArchiveSigner.build_signed_url(
archive_path=archive_path,
expires_in=ARCHIVE_DOWNLOAD_TIMEOUT,
action=SandboxArchiveSigner.OPERATION_DOWNLOAD,
)
try:
(
pipeline(sandbox)
@@ -58,7 +64,12 @@ class ArchiveSandboxStorage(SandboxStorage):
return True
def unmount(self, sandbox: VirtualEnvironment) -> bool:
upload_url = FilePresignStorage(storage.storage_runner).get_upload_url(self._storage_key)
archive_path = SandboxArchivePath(UUID(self._tenant_id), UUID(self._sandbox_id))
upload_url = SandboxArchiveSigner.build_signed_url(
archive_path=archive_path,
expires_in=ARCHIVE_UPLOAD_TIMEOUT,
action=SandboxArchiveSigner.OPERATION_UPLOAD,
)
(
pipeline(sandbox)
.add(

View File

@@ -57,7 +57,7 @@ class CachedPresignStorage(StorageWrapper):
if cached:
return cached
url = super().get_download_url(filename, expires_in)
url = self._storage.get_download_url(filename, expires_in)
self._set_cached(cache_key, url, expires_in)
return url
@@ -94,7 +94,7 @@ class CachedPresignStorage(StorageWrapper):
# Batch fetch uncached URLs from storage
if uncached_filenames:
uncached_urls = [super().get_download_url(f, expires_in) for f in uncached_filenames]
uncached_urls = [self._storage.get_download_url(f, expires_in) for f in uncached_filenames]
# Fill results at correct positions
for idx, url in zip(uncached_indices, uncached_urls):