Merge remote-tracking branch 'origin/main' into feature/task-quadrant-view

fix(web): quadrant matrix i18n
chore: improve accessibility for learn more link (#31120 )
2026-02-14 17:40:13 -05:00 · 2026-01-16 18:20:29 +08:00 · 2026-01-16 18:17:28 +08:00 · 2026-01-16 18:12:07 +08:00 · 2026-01-16 17:56:55 +08:00 · 2026-01-16 17:49:52 +08:00
142 changed files with 16975 additions and 4683 deletions
--- a/.agent/skills
+++ b/.agent/skills
@@ -0,0 +1 @@
+../.claude/skills
--- a/.claude/skills/orpc-contract-first/SKILL.md
+++ b/.claude/skills/orpc-contract-first/SKILL.md
@@ -0,0 +1,46 @@
+---
+name: orpc-contract-first
+description: Guide for implementing oRPC contract-first API patterns in Dify frontend. Triggers when creating new API contracts, adding service endpoints, integrating TanStack Query with typed contracts, or migrating legacy service calls to oRPC. Use for all API layer work in web/contract and web/service directories.
+---
+
+# oRPC Contract-First Development
+
+## Project Structure
+
+```
+web/contract/
+├── base.ts           # Base contract (inputStructure: 'detailed')
+├── router.ts         # Router composition & type exports
+├── marketplace.ts    # Marketplace contracts
+└── console/          # Console contracts by domain
+    ├── system.ts
+    └── billing.ts
+```
+
+## Workflow
+
+1. **Create contract** in `web/contract/console/{domain}.ts`
+   - Import `base` from `../base` and `type` from `@orpc/contract`
+   - Define route with `path`, `method`, `input`, `output`
+
+2. **Register in router** at `web/contract/router.ts`
+   - Import directly from domain file (no barrel files)
+   - Nest by API prefix: `billing: { invoices, bindPartnerStack }`
+
+3. **Create hooks** in `web/service/use-{domain}.ts`
+   - Use `consoleQuery.{group}.{contract}.queryKey()` for query keys
+   - Use `consoleClient.{group}.{contract}()` for API calls
+
+## Key Rules
+
+- **Input structure**: Always use `{ params, query?, body? }` format
+- **Path params**: Use `{paramName}` in path, match in `params` object
+- **Router nesting**: Group by API prefix (e.g., `/billing/*` → `billing: {}`)
+- **No barrel files**: Import directly from specific files
+- **Types**: Import from `@/types/`, use `type<T>()` helper
+
+## Type Export
+
+```typescript
+export type ConsoleInputs = InferContractRouterInputs<typeof consoleRouterContract>
+```
--- a/.github/workflows/autofix.yml
+++ b/.github/workflows/autofix.yml
@@ -16,14 +16,14 @@ jobs:

      - name: Check Docker Compose inputs
        id: docker-compose-changes
-        uses: tj-actions/changed-files@v46
+        uses: tj-actions/changed-files@v47
        with:
          files: |
            docker/generate_docker_compose
            docker/.env.example
            docker/docker-compose-template.yaml
            docker/docker-compose.yaml
-      - uses: actions/setup-python@v5
+      - uses: actions/setup-python@v6
        with:
          python-version: "3.11"

--- a/.github/workflows/build-push.yml
+++ b/.github/workflows/build-push.yml
@@ -112,7 +112,7 @@ jobs:
            context: "web"
    steps:
      - name: Download digests
-        uses: actions/download-artifact@v4
+        uses: actions/download-artifact@v7
        with:
          path: /tmp/digests
          pattern: digests-${{ matrix.context }}-*
--- a/.github/workflows/deploy-agent-dev.yml
+++ b/.github/workflows/deploy-agent-dev.yml
@@ -19,7 +19,7 @@ jobs:
      github.event.workflow_run.head_branch == 'deploy/agent-dev'
    steps:
      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
+        uses: appleboy/ssh-action@v1
        with:
          host: ${{ secrets.AGENT_DEV_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
--- a/.github/workflows/deploy-dev.yml
+++ b/.github/workflows/deploy-dev.yml
@@ -16,7 +16,7 @@ jobs:
      github.event.workflow_run.head_branch == 'deploy/dev'
    steps:
      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
+        uses: appleboy/ssh-action@v1
        with:
          host: ${{ secrets.SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
--- a/.github/workflows/deploy-hitl.yml
+++ b/.github/workflows/deploy-hitl.yml
@@ -20,7 +20,7 @@ jobs:
      )
    steps:
      - name: Deploy to server
-        uses: appleboy/ssh-action@v0.1.8
+        uses: appleboy/ssh-action@v1
        with:
          host: ${{ secrets.HITL_SSH_HOST }}
          username: ${{ secrets.SSH_USER }}
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@@ -18,7 +18,7 @@ jobs:
      pull-requests: write

    steps:
-      - uses: actions/stale@v5
+      - uses: actions/stale@v10
        with:
          days-before-issue-stale: 15
          days-before-issue-close: 3
--- a/.github/workflows/style.yml
+++ b/.github/workflows/style.yml
@@ -65,6 +65,9 @@ jobs:
    defaults:
      run:
        working-directory: ./web
+    permissions:
+      checks: write
+      pull-requests: read

    steps:
      - name: Checkout code
@@ -90,7 +93,7 @@ jobs:
        uses: actions/setup-node@v6
        if: steps.changed-files.outputs.any_changed == 'true'
        with:
-          node-version: 22
+          node-version: 24
          cache: pnpm
          cache-dependency-path: ./web/pnpm-lock.yaml

@@ -103,7 +106,15 @@ jobs:
        if: steps.changed-files.outputs.any_changed == 'true'
        working-directory: ./web
        run: |
-          pnpm run lint
+          pnpm run lint:report
+        continue-on-error: true
+
+      # - name: Annotate Code
+      #   if: steps.changed-files.outputs.any_changed == 'true' && github.event_name == 'pull_request'
+      #   uses: DerLev/eslint-annotations@51347b3a0abfb503fc8734d5ae31c4b151297fae
+      #   with:
+      #     eslint-report: web/eslint_report.json
+      #     github-token: ${{ secrets.GITHUB_TOKEN }}

      - name: Web type check
        if: steps.changed-files.outputs.any_changed == 'true'
--- a/.github/workflows/tool-test-sdks.yaml
+++ b/.github/workflows/tool-test-sdks.yaml
@@ -16,10 +16,6 @@ jobs:
    name: unit test for Node.js SDK
    runs-on: ubuntu-latest

-    strategy:
-      matrix:
-        node-version: [16, 18, 20, 22]
-
    defaults:
      run:
        working-directory: sdks/nodejs-client
@@ -29,10 +25,10 @@ jobs:
        with:
          persist-credentials: false

-      - name: Use Node.js ${{ matrix.node-version }}
+      - name: Use Node.js
        uses: actions/setup-node@v6
        with:
-          node-version: ${{ matrix.node-version }}
+          node-version: 24
          cache: ''
          cache-dependency-path: 'pnpm-lock.yaml'

--- a/.github/workflows/translate-i18n-claude.yml
+++ b/.github/workflows/translate-i18n-claude.yml
@@ -57,7 +57,7 @@ jobs:
      - name: Set up Node.js
        uses: actions/setup-node@v6
        with:
-          node-version: 'lts/*'
+          node-version: 24
          cache: pnpm
          cache-dependency-path: ./web/pnpm-lock.yaml

--- a/.github/workflows/trigger-i18n-sync.yml
+++ b/.github/workflows/trigger-i18n-sync.yml
@@ -21,7 +21,7 @@ jobs:

    steps:
      - name: Checkout repository
-        uses: actions/checkout@v4
+        uses: actions/checkout@v6
        with:
          fetch-depth: 0

--- a/.github/workflows/web-tests.yml
+++ b/.github/workflows/web-tests.yml
@@ -31,7 +31,7 @@ jobs:
      - name: Setup Node.js
        uses: actions/setup-node@v6
        with:
-          node-version: 22
+          node-version: 24
          cache: pnpm
          cache-dependency-path: ./web/pnpm-lock.yaml

--- a/AGENTS.md
+++ b/AGENTS.md
@@ -12,12 +12,8 @@ The codebase is split into:

 ## Backend Workflow

+- Read `api/AGENTS.md` for details
 - Run backend CLI commands through `uv run --project api <command>`.
-
- Before submission, all backend modifications must pass local checks: `make lint`, `make type-check`, and `uv run --project api --dev dev/pytest/pytest_unit_tests.sh`.
-
- Use Makefile targets for linting and formatting; `make lint` and `make type-check` cover the required checks.
-
 - Integration tests are CI-only and are not expected to run in the local environment.

 ## Frontend Workflow
--- a/12
+++ b/12
@@ -61,7 +61,8 @@ check:

 lint:
 	@echo "🔧 Running ruff format, check with fixes, import linter, and dotenv-linter..."
-	@uv run --project api --dev sh -c 'ruff format ./api && ruff check --fix ./api'
+	@uv run --project api --dev ruff format ./api
+	@uv run --project api --dev ruff check --fix ./api
 	@uv run --directory api --dev lint-imports
 	@uv run --project api --dev dotenv-linter ./api/.env.example ./web/.env.example
 	@echo "✅ Linting complete"
@@ -73,7 +74,12 @@ type-check:

 test:
 	@echo "🧪 Running backend unit tests..."
-	@uv run --project api --dev dev/pytest/pytest_unit_tests.sh
+	@if [ -n "$(TARGET_TESTS)" ]; then \
+		echo "Target: $(TARGET_TESTS)"; \
+		uv run --project api --dev pytest $(TARGET_TESTS); \
+	else \
+		uv run --project api --dev dev/pytest/pytest_unit_tests.sh; \
+	fi
 	@echo "✅ Tests complete"

 # Build Docker images
@@ -125,7 +131,7 @@ help:
 	@echo "  make check          - Check code with ruff"
 	@echo "  make lint           - Format, fix, and lint code (ruff, imports, dotenv)"
 	@echo "  make type-check     - Run type checking with basedpyright"
-	@echo "  make test           - Run backend unit tests"
+	@echo "  make test           - Run backend unit tests (or TARGET_TESTS=./api/tests/<target_tests>)"
 	@echo ""
 	@echo "Docker Build Targets:"
 	@echo "  make build-web      - Build web Docker image"
--- a/agent-notes/.gitkeep
+++ b/agent-notes/.gitkeep
--- a/api/.env.example
+++ b/api/.env.example
@@ -417,6 +417,8 @@ SMTP_USERNAME=123
 SMTP_PASSWORD=abc
 SMTP_USE_TLS=true
 SMTP_OPPORTUNISTIC_TLS=false
+# Optional: override the local hostname used for SMTP HELO/EHLO
+SMTP_LOCAL_HOSTNAME=
 # Sendgid configuration
 SENDGRID_API_KEY=
 # Sentry configuration
@@ -713,3 +715,4 @@ ANNOTATION_IMPORT_MAX_CONCURRENT=5
 SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD=21
 SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE=1000
 SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS=30
+
--- a/api/AGENTS.md
+++ b/api/AGENTS.md
@@ -1,62 +1,236 @@
-# Agent Skill Index
+# API Agent Guide
+
+## Agent Notes (must-check)
+
+Before you start work on any backend file under `api/`, you MUST check whether a related note exists under:
+
+- `agent-notes/<same-relative-path-as-target-file>.md`
+
+Rules:
+
+- **Path mapping**: for a target file `<path>/<name>.py`, the note must be `agent-notes/<path>/<name>.py.md` (same folder structure, same filename, plus `.md`).
+- **Before working**:
+  - If the note exists, read it first and follow any constraints/decisions recorded there.
+  - If the note conflicts with the current code, or references an "origin" file/path that has been deleted, renamed, or migrated, treat the **code as the single source of truth** and update the note to match reality.
+  - If the note does not exist, create it with a short architecture/intent summary and any relevant invariants/edge cases.
+- **During working**:
+  - Keep the note in sync as you discover constraints, make decisions, or change approach.
+  - If you move/rename a file, migrate its note to the new mapped path (and fix any outdated references inside the note).
+  - Record non-obvious edge cases, trade-offs, and the test/verification plan as you go (not just at the end).
+  - Keep notes **coherent**: integrate new findings into the relevant sections and rewrite for clarity; avoid append-only “recent fix” / changelog-style additions unless the note is explicitly intended to be a changelog.
+- **When finishing work**:
+  - Update the related note(s) to reflect what changed, why, and any new edge cases/tests.
+  - If a file is deleted, remove or clearly deprecate the corresponding note so it cannot be mistaken as current guidance.
+  - Keep notes concise and accurate; they are meant to prevent repeated rediscovery.
+
+## Skill Index

 Start with the section that best matches your need. Each entry lists the problems it solves plus key files/concepts so you know what to expect before opening it.

-______________________________________________________________________
+### Platform Foundations

-## Platform Foundations
-
- **[Infrastructure Overview](agent_skills/infra.md)**\
-  When to read this:
+#### [Infrastructure Overview](agent_skills/infra.md)

+- **When to read this**
  - You need to understand where a feature belongs in the architecture.
  - You’re wiring storage, Redis, vector stores, or OTEL.
-  - You’re about to add CLI commands or async jobs.\
-    What it covers: configuration stack (`configs/app_config.py`, remote settings), storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`), Redis conventions (`extensions/ext_redis.py`), plugin runtime topology, vector-store factory (`core/rag/datasource/vdb/*`), observability hooks, SSRF proxy usage, and core CLI commands.
+  - You’re about to add CLI commands or async jobs.
+- **What it covers**
+  - Configuration stack (`configs/app_config.py`, remote settings)
+  - Storage entry points (`extensions/ext_storage.py`, `core/file/file_manager.py`)
+  - Redis conventions (`extensions/ext_redis.py`)
+  - Plugin runtime topology
+  - Vector-store factory (`core/rag/datasource/vdb/*`)
+  - Observability hooks
+  - SSRF proxy usage
+  - Core CLI commands

- **[Coding Style](agent_skills/coding_style.md)**\
-  When to read this:
+### Plugin & Extension Development

-  - You’re writing or reviewing backend code and need the authoritative checklist.
-  - You’re unsure about Pydantic validators, SQLAlchemy session usage, or logging patterns.
-  - You want the exact lint/type/test commands used in PRs.\
-    Includes: Ruff & BasedPyright commands, no-annotation policy, session examples (`with Session(db.engine, ...)`), `@field_validator` usage, logging expectations, and the rule set for file size, helpers, and package management.
-
-______________________________________________________________________
-
-## Plugin & Extension Development
-
- **[Plugin Systems](agent_skills/plugin.md)**\
-  When to read this:
+#### [Plugin Systems](agent_skills/plugin.md)

+- **When to read this**
  - You’re building or debugging a marketplace plugin.
-  - You need to know how manifests, providers, daemons, and migrations fit together.\
-    What it covers: plugin manifests (`core/plugin/entities/plugin.py`), installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands), runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent), daemon coordination (`core/plugin/entities/plugin_daemon.py`), and how provider registries surface capabilities to the rest of the platform.
+  - You need to know how manifests, providers, daemons, and migrations fit together.
+- **What it covers**
+  - Plugin manifests (`core/plugin/entities/plugin.py`)
+  - Installation/upgrade flows (`services/plugin/plugin_service.py`, CLI commands)
+  - Runtime adapters (`core/plugin/impl/*` for tool/model/datasource/trigger/endpoint/agent)
+  - Daemon coordination (`core/plugin/entities/plugin_daemon.py`)
+  - How provider registries surface capabilities to the rest of the platform

- **[Plugin OAuth](agent_skills/plugin_oauth.md)**\
-  When to read this:
+#### [Plugin OAuth](agent_skills/plugin_oauth.md)

+- **When to read this**
  - You must integrate OAuth for a plugin or datasource.
-  - You’re handling credential encryption or refresh flows.\
-    Topics: credential storage, encryption helpers (`core/helper/provider_encryption.py`), OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`), and how console/API layers expose the flows.
+  - You’re handling credential encryption or refresh flows.
+- **Topics**
+  - Credential storage
+  - Encryption helpers (`core/helper/provider_encryption.py`)
+  - OAuth client bootstrap (`services/plugin/oauth_service.py`, `services/plugin/plugin_parameter_service.py`)
+  - How console/API layers expose the flows

-______________________________________________________________________
+### Workflow Entry & Execution

-## Workflow Entry & Execution
+#### [Trigger Concepts](agent_skills/trigger.md)

- **[Trigger Concepts](agent_skills/trigger.md)**\
-  When to read this:
+- **When to read this**
  - You’re debugging why a workflow didn’t start.
  - You’re adding a new trigger type or hook.
-  - You need to trace async execution, draft debugging, or webhook/schedule pipelines.\
-    Details: Start-node taxonomy, webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`), async orchestration (`services/async_workflow_service.py`, Celery queues), debug event bus, and storage/logging interactions.
+  - You need to trace async execution, draft debugging, or webhook/schedule pipelines.
+- **Details**
+  - Start-node taxonomy
+  - Webhook & schedule internals (`core/workflow/nodes/trigger_*`, `services/trigger/*`)
+  - Async orchestration (`services/async_workflow_service.py`, Celery queues)
+  - Debug event bus
+  - Storage/logging interactions

-______________________________________________________________________
+## General Reminders

-## Additional Notes for Agents
-
- All skill docs assume you follow the coding style guide—run Ruff/BasedPyright/tests listed there before submitting changes.
+- All skill docs assume you follow the coding style rules below—run the lint/type/test commands before submitting changes.
 - When you cannot find an answer in these briefs, search the codebase using the paths referenced (e.g., `core/plugin/impl/tool.py`, `services/dataset_service.py`).
 - If you run into cross-cutting concerns (tenancy, configuration, storage), check the infrastructure guide first; it links to most supporting modules.
 - Keep multi-tenancy and configuration central: everything flows through `configs.dify_config` and `tenant_id`.
 - When touching plugins or triggers, consult both the system overview and the specialised doc to ensure you adjust lifecycle, storage, and observability consistently.
+
+## Coding Style
+
+This is the default standard for backend code in this repo. Follow it for new code and use it as the checklist when reviewing changes.
+
+### Linting & Formatting
+
+- Use Ruff for formatting and linting (follow `.ruff.toml`).
+- Keep each line under 120 characters (including spaces).
+
+### Naming Conventions
+
+- Use `snake_case` for variables and functions.
+- Use `PascalCase` for classes.
+- Use `UPPER_CASE` for constants.
+
+### Typing & Class Layout
+
+- Code should usually include type annotations that match the repo’s current Python version (avoid untyped public APIs and “mystery” values).
+- Prefer modern typing forms (e.g. `list[str]`, `dict[str, int]`) and avoid `Any` unless there’s a strong reason.
+- For classes, declare member variables at the top of the class body (before `__init__`) so the class shape is obvious at a glance:
+
+```python
+from datetime import datetime
+
+
+class Example:
+    user_id: str
+    created_at: datetime
+
+    def __init__(self, user_id: str, created_at: datetime) -> None:
+        self.user_id = user_id
+        self.created_at = created_at
+```
+
+### General Rules
+
+- Use Pydantic v2 conventions.
+- Use `uv` for Python package management in this repo (usually with `--project api`).
+- Prefer simple functions over small “utility classes” for lightweight helpers.
+- Avoid implementing dunder methods unless it’s clearly needed and matches existing patterns.
+- Never start long-running services as part of agent work (`uv run app.py`, `flask run`, etc.); running tests is allowed.
+- Keep files below ~800 lines; split when necessary.
+- Keep code readable and explicit—avoid clever hacks.
+
+### Architecture & Boundaries
+
+- Mirror the layered architecture: controller → service → core/domain.
+- Reuse existing helpers in `core/`, `services/`, and `libs/` before creating new abstractions.
+- Optimise for observability: deterministic control flow, clear logging, actionable errors.
+
+### Logging & Errors
+
+- Never use `print`; use a module-level logger:
+  - `logger = logging.getLogger(__name__)`
+- Include tenant/app/workflow identifiers in log context when relevant.
+- Raise domain-specific exceptions (`services/errors`, `core/errors`) and translate them into HTTP responses in controllers.
+- Log retryable events at `warning`, terminal failures at `error`.
+
+### SQLAlchemy Patterns
+
+- Models inherit from `models.base.TypeBase`; do not create ad-hoc metadata or engines.
+- Open sessions with context managers:
+
+```python
+from sqlalchemy.orm import Session
+
+with Session(db.engine, expire_on_commit=False) as session:
+    stmt = select(Workflow).where(
+        Workflow.id == workflow_id,
+        Workflow.tenant_id == tenant_id,
+    )
+    workflow = session.execute(stmt).scalar_one_or_none()
+```
+
+- Prefer SQLAlchemy expressions; avoid raw SQL unless necessary.
+- Always scope queries by `tenant_id` and protect write paths with safeguards (`FOR UPDATE`, row counts, etc.).
+- Introduce repository abstractions only for very large tables (e.g., workflow executions) or when alternative storage strategies are required.
+
+### Storage & External I/O
+
+- Access storage via `extensions.ext_storage.storage`.
+- Use `core.helper.ssrf_proxy` for outbound HTTP fetches.
+- Background tasks that touch storage must be idempotent, and should log relevant object identifiers.
+
+### Pydantic Usage
+
+- Define DTOs with Pydantic v2 models and forbid extras by default.
+- Use `@field_validator` / `@model_validator` for domain rules.
+
+Example:
+
+```python
+from pydantic import BaseModel, ConfigDict, HttpUrl, field_validator
+
+
+class TriggerConfig(BaseModel):
+    endpoint: HttpUrl
+    secret: str
+
+    model_config = ConfigDict(extra="forbid")
+
+    @field_validator("secret")
+    def ensure_secret_prefix(cls, value: str) -> str:
+        if not value.startswith("dify_"):
+            raise ValueError("secret must start with dify_")
+        return value
+```
+
+### Generics & Protocols
+
+- Use `typing.Protocol` to define behavioural contracts (e.g., cache interfaces).
+- Apply generics (`TypeVar`, `Generic`) for reusable utilities like caches or providers.
+- Validate dynamic inputs at runtime when generics cannot enforce safety alone.
+
+### Tooling & Checks
+
+Quick checks while iterating:
+
+- Format: `make format`
+- Lint (includes auto-fix): `make lint`
+- Type check: `make type-check`
+- Targeted tests: `make test TARGET_TESTS=./api/tests/<target_tests>`
+
+Before opening a PR / submitting:
+
+- `make lint`
+- `make type-check`
+- `make test`
+
+### Controllers & Services
+
+- Controllers: parse input via Pydantic, invoke services, return serialised responses; no business logic.
+- Services: coordinate repositories, providers, background tasks; keep side effects explicit.
+- Document non-obvious behaviour with concise comments.
+
+### Miscellaneous
+
+- Use `configs.dify_config` for configuration—never read environment variables directly.
+- Maintain tenant awareness end-to-end; `tenant_id` must flow through every layer touching shared resources.
+- Queue async work through `services/async_workflow_service`; implement tasks under `tasks/` with explicit queue selection.
+- Keep experimental scripts under `dev/`; do not ship them in production builds.
--- a/api/agent_skills/coding_style.md
+++ b/api/agent_skills/coding_style.md
@@ -1,115 +0,0 @@
-## Linter
-
- Always follow `.ruff.toml`.
- Run `uv run ruff check --fix --unsafe-fixes`.
- Keep each line under 100 characters (including spaces).
-
-## Code Style
-
- `snake_case` for variables and functions.
- `PascalCase` for classes.
- `UPPER_CASE` for constants.
-
-## Rules
-
- Use Pydantic v2 standard.
- Use `uv` for package management.
- Do not override dunder methods like `__init__`, `__iadd__`, etc.
- Never launch services (`uv run app.py`, `flask run`, etc.); running tests under `tests/` is allowed.
- Prefer simple functions over classes for lightweight helpers.
- Keep files below 800 lines; split when necessary.
- Keep code readable—no clever hacks.
- Never use `print`; log with `logger = logging.getLogger(__name__)`.
-
-## Guiding Principles
-
- Mirror the project’s layered architecture: controller → service → core/domain.
- Reuse existing helpers in `core/`, `services/`, and `libs/` before creating new abstractions.
- Optimise for observability: deterministic control flow, clear logging, actionable errors.
-
-## SQLAlchemy Patterns
-
- Models inherit from `models.base.Base`; never create ad-hoc metadata or engines.
-
- Open sessions with context managers:
-
-  ```python
-  from sqlalchemy.orm import Session
-
-  with Session(db.engine, expire_on_commit=False) as session:
-      stmt = select(Workflow).where(
-          Workflow.id == workflow_id,
-          Workflow.tenant_id == tenant_id,
-      )
-      workflow = session.execute(stmt).scalar_one_or_none()
-  ```
-
- Use SQLAlchemy expressions; avoid raw SQL unless necessary.
-
- Introduce repository abstractions only for very large tables (e.g., workflow executions) to support alternative storage strategies.
-
- Always scope queries by `tenant_id` and protect write paths with safeguards (`FOR UPDATE`, row counts, etc.).
-
-## Storage & External IO
-
- Access storage via `extensions.ext_storage.storage`.
- Use `core.helper.ssrf_proxy` for outbound HTTP fetches.
- Background tasks that touch storage must be idempotent and log the relevant object identifiers.
-
-## Pydantic Usage
-
- Define DTOs with Pydantic v2 models and forbid extras by default.
-
- Use `@field_validator` / `@model_validator` for domain rules.
-
- Example:
-
-  ```python
-  from pydantic import BaseModel, ConfigDict, HttpUrl, field_validator
-
-  class TriggerConfig(BaseModel):
-      endpoint: HttpUrl
-      secret: str
-
-      model_config = ConfigDict(extra="forbid")
-
-      @field_validator("secret")
-      def ensure_secret_prefix(cls, value: str) -> str:
-          if not value.startswith("dify_"):
-              raise ValueError("secret must start with dify_")
-          return value
-  ```
-
-## Generics & Protocols
-
- Use `typing.Protocol` to define behavioural contracts (e.g., cache interfaces).
- Apply generics (`TypeVar`, `Generic`) for reusable utilities like caches or providers.
- Validate dynamic inputs at runtime when generics cannot enforce safety alone.
-
-## Error Handling & Logging
-
- Raise domain-specific exceptions (`services/errors`, `core/errors`) and translate to HTTP responses in controllers.
- Declare `logger = logging.getLogger(__name__)` at module top.
- Include tenant/app/workflow identifiers in log context.
- Log retryable events at `warning`, terminal failures at `error`.
-
-## Tooling & Checks
-
- Format/lint: `uv run --project api --dev ruff format ./api` and `uv run --project api --dev ruff check --fix --unsafe-fixes ./api`.
- Type checks: `uv run --directory api --dev basedpyright`.
- Tests: `uv run --project api --dev dev/pytest/pytest_unit_tests.sh`.
- Run all of the above before submitting your work.
-
-## Controllers & Services
-
- Controllers: parse input via Pydantic, invoke services, return serialised responses; no business logic.
- Services: coordinate repositories, providers, background tasks; keep side effects explicit.
- Avoid repositories unless necessary; direct SQLAlchemy usage is preferred for typical tables.
- Document non-obvious behaviour with concise comments.
-
-## Miscellaneous
-
- Use `configs.dify_config` for configuration—never read environment variables directly.
- Maintain tenant awareness end-to-end; `tenant_id` must flow through every layer touching shared resources.
- Queue async work through `services/async_workflow_service`; implement tasks under `tasks/` with explicit queue selection.
- Keep experimental scripts under `dev/`; do not ship them in production builds.
--- a/api/commands.py
+++ b/api/commands.py
@@ -3,6 +3,7 @@ import datetime
 import json
 import logging
 import secrets
+import time
 from typing import Any

 import click
@@ -46,6 +47,8 @@ from services.clear_free_plan_tenant_expired_logs import ClearFreePlanTenantExpi
 from services.plugin.data_migration import PluginDataMigration
 from services.plugin.plugin_migration import PluginMigration
 from services.plugin.plugin_service import PluginService
+from services.retention.conversation.messages_clean_policy import create_message_clean_policy
+from services.retention.conversation.messages_clean_service import MessagesCleanService
 from services.retention.workflow_run.clear_free_plan_expired_workflow_run_logs import WorkflowRunCleanup
 from tasks.remove_app_and_related_data_task import delete_draft_variables_batch

@@ -2172,3 +2175,79 @@ def migrate_oss(
            except Exception as e:
                db.session.rollback()
                click.echo(click.style(f"Failed to update DB storage_type: {str(e)}", fg="red"))
+
+
+@click.command("clean-expired-messages", help="Clean expired messages.")
+@click.option(
+    "--start-from",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    required=True,
+    help="Lower bound (inclusive) for created_at.",
+)
+@click.option(
+    "--end-before",
+    type=click.DateTime(formats=["%Y-%m-%d", "%Y-%m-%dT%H:%M:%S"]),
+    required=True,
+    help="Upper bound (exclusive) for created_at.",
+)
+@click.option("--batch-size", default=1000, show_default=True, help="Batch size for selecting messages.")
+@click.option(
+    "--graceful-period",
+    default=21,
+    show_default=True,
+    help="Graceful period in days after subscription expiration, will be ignored when billing is disabled.",
+)
+@click.option("--dry-run", is_flag=True, default=False, help="Show messages logs would be cleaned without deleting")
+def clean_expired_messages(
+    batch_size: int,
+    graceful_period: int,
+    start_from: datetime.datetime,
+    end_before: datetime.datetime,
+    dry_run: bool,
+):
+    """
+    Clean expired messages and related data for tenants based on clean policy.
+    """
+    click.echo(click.style("clean_messages: start clean messages.", fg="green"))
+
+    start_at = time.perf_counter()
+
+    try:
+        # Create policy based on billing configuration
+        # NOTE: graceful_period will be ignored when billing is disabled.
+        policy = create_message_clean_policy(graceful_period_days=graceful_period)
+
+        # Create and run the cleanup service
+        service = MessagesCleanService.from_time_range(
+            policy=policy,
+            start_from=start_from,
+            end_before=end_before,
+            batch_size=batch_size,
+            dry_run=dry_run,
+        )
+        stats = service.run()
+
+        end_at = time.perf_counter()
+        click.echo(
+            click.style(
+                f"clean_messages: completed successfully\n"
+                f"  - Latency: {end_at - start_at:.2f}s\n"
+                f"  - Batches processed: {stats['batches']}\n"
+                f"  - Total messages scanned: {stats['total_messages']}\n"
+                f"  - Messages filtered: {stats['filtered_messages']}\n"
+                f"  - Messages deleted: {stats['total_deleted']}",
+                fg="green",
+            )
+        )
+    except Exception as e:
+        end_at = time.perf_counter()
+        logger.exception("clean_messages failed")
+        click.echo(
+            click.style(
+                f"clean_messages: failed after {end_at - start_at:.2f}s - {str(e)}",
+                fg="red",
+            )
+        )
+        raise
+
+    click.echo(click.style("messages cleanup completed.", fg="green"))
--- a/api/configs/feature/init.py
+++ b/api/configs/feature/init.py
@@ -949,6 +949,12 @@ class MailConfig(BaseSettings):
        default=False,
    )

+    SMTP_LOCAL_HOSTNAME: str | None = Field(
+        description="Override the local hostname used in SMTP HELO/EHLO. "
+        "Useful behind NAT or when the default hostname causes rejections.",
+        default=None,
+    )
+
    EMAIL_SEND_IP_LIMIT_PER_MINUTE: PositiveInt = Field(
        description="Maximum number of emails allowed to be sent from the same IP address in a minute",
        default=50,
--- a/api/controllers/console/app/conversation.py
+++ b/api/controllers/console/app/conversation.py
@@ -592,9 +592,12 @@ def _get_conversation(app_model, conversation_id):
    if not conversation:
        raise NotFound("Conversation Not Exists.")

-    if not conversation.read_at:
-        conversation.read_at = naive_utc_now()
-        conversation.read_account_id = current_user.id
-        db.session.commit()
+    db.session.execute(
+        sa.update(Conversation)
+        .where(Conversation.id == conversation_id, Conversation.read_at.is_(None))
+        .values(read_at=naive_utc_now(), read_account_id=current_user.id)
+    )
+    db.session.commit()
+    db.session.refresh(conversation)

    return conversation
--- a/api/controllers/console/datasets/external.py
+++ b/api/controllers/console/datasets/external.py
@@ -81,7 +81,7 @@ class ExternalKnowledgeApiPayload(BaseModel):
 class ExternalDatasetCreatePayload(BaseModel):
    external_knowledge_api_id: str
    external_knowledge_id: str
-    name: str = Field(..., min_length=1, max_length=40)
+    name: str = Field(..., min_length=1, max_length=100)
    description: str | None = Field(None, max_length=400)
    external_retrieval_model: dict[str, object] | None = None

--- a/api/controllers/console/tag/tags.py
+++ b/api/controllers/console/tag/tags.py
@@ -30,6 +30,11 @@ class TagBindingRemovePayload(BaseModel):
    type: Literal["knowledge", "app"] | None = Field(default=None, description="Tag type")


+class TagListQueryParam(BaseModel):
+    type: Literal["knowledge", "app", ""] = Field("", description="Tag type filter")
+    keyword: str | None = Field(None, description="Search keyword")
+
+
 register_schema_models(
    console_ns,
    TagBasePayload,
@@ -43,12 +48,15 @@ class TagListApi(Resource):
    @setup_required
    @login_required
    @account_initialization_required
+    @console_ns.doc(
+        params={"type": 'Tag type filter. Can be "knowledge" or "app".', "keyword": "Search keyword for tag name."}
+    )
    @marshal_with(dataset_tag_fields)
    def get(self):
        _, current_tenant_id = current_account_with_tenant()
-        tag_type = request.args.get("type", type=str, default="")
-        keyword = request.args.get("keyword", default=None, type=str)
-        tags = TagService.get_tags(tag_type, current_tenant_id, keyword)
+        raw_args = request.args.to_dict()
+        param = TagListQueryParam.model_validate(raw_args)
+        tags = TagService.get_tags(param.type, current_tenant_id, param.keyword)

        return tags, 200

--- a/api/core/llm_generator/llm_generator.py
+++ b/api/core/llm_generator/llm_generator.py
@@ -71,8 +71,8 @@ class LLMGenerator:
            response: LLMResult = model_instance.invoke_llm(
                prompt_messages=list(prompts), model_parameters={"max_tokens": 500, "temperature": 1}, stream=False
            )
-        answer = cast(str, response.message.content)
-        if answer is None:
+        answer = response.message.get_text_content()
+        if answer == "":
            return ""
        try:
            result_dict = json.loads(answer)
@@ -184,7 +184,7 @@ class LLMGenerator:
                    prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
                )

-                rule_config["prompt"] = cast(str, response.message.content)
+                rule_config["prompt"] = response.message.get_text_content()

            except InvokeError as e:
                error = str(e)
@@ -237,13 +237,11 @@ class LLMGenerator:

                return rule_config

-            rule_config["prompt"] = cast(str, prompt_content.message.content)
+            rule_config["prompt"] = prompt_content.message.get_text_content()

-            if not isinstance(prompt_content.message.content, str):
-                raise NotImplementedError("prompt content is not a string")
            parameter_generate_prompt = parameter_template.format(
                inputs={
-                    "INPUT_TEXT": prompt_content.message.content,
+                    "INPUT_TEXT": prompt_content.message.get_text_content(),
                },
                remove_template_variables=False,
            )
@@ -253,7 +251,7 @@ class LLMGenerator:
            statement_generate_prompt = statement_template.format(
                inputs={
                    "TASK_DESCRIPTION": instruction,
-                    "INPUT_TEXT": prompt_content.message.content,
+                    "INPUT_TEXT": prompt_content.message.get_text_content(),
                },
                remove_template_variables=False,
            )
@@ -263,7 +261,7 @@ class LLMGenerator:
                parameter_content: LLMResult = model_instance.invoke_llm(
                    prompt_messages=list(parameter_messages), model_parameters=model_parameters, stream=False
                )
-                rule_config["variables"] = re.findall(r'"\s*([^"]+)\s*"', cast(str, parameter_content.message.content))
+                rule_config["variables"] = re.findall(r'"\s*([^"]+)\s*"', parameter_content.message.get_text_content())
            except InvokeError as e:
                error = str(e)
                error_step = "generate variables"
@@ -272,7 +270,7 @@ class LLMGenerator:
                statement_content: LLMResult = model_instance.invoke_llm(
                    prompt_messages=list(statement_messages), model_parameters=model_parameters, stream=False
                )
-                rule_config["opening_statement"] = cast(str, statement_content.message.content)
+                rule_config["opening_statement"] = statement_content.message.get_text_content()
            except InvokeError as e:
                error = str(e)
                error_step = "generate conversation opener"
@@ -315,7 +313,7 @@ class LLMGenerator:
                prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
            )

-            generated_code = cast(str, response.message.content)
+            generated_code = response.message.get_text_content()
            return {"code": generated_code, "language": code_language, "error": ""}

        except InvokeError as e:
@@ -351,7 +349,7 @@ class LLMGenerator:
            raise TypeError("Expected LLMResult when stream=False")
        response = result

-        answer = cast(str, response.message.content)
+        answer = response.message.get_text_content()
        return answer.strip()

    @classmethod
@@ -375,10 +373,7 @@ class LLMGenerator:
                prompt_messages=list(prompt_messages), model_parameters=model_parameters, stream=False
            )

-            raw_content = response.message.content
-
-            if not isinstance(raw_content, str):
-                raise ValueError(f"LLM response content must be a string, got: {type(raw_content)}")
+            raw_content = response.message.get_text_content()

            try:
                parsed_content = json.loads(raw_content)
--- a/api/core/plugin/impl/endpoint.py
+++ b/api/core/plugin/impl/endpoint.py
@@ -1,5 +1,6 @@
 from core.plugin.entities.endpoint import EndpointEntityWithInstance
 from core.plugin.impl.base import BasePluginClient
+from core.plugin.impl.exc import PluginDaemonInternalServerError


 class PluginEndpointClient(BasePluginClient):
@@ -70,18 +71,27 @@ class PluginEndpointClient(BasePluginClient):
    def delete_endpoint(self, tenant_id: str, user_id: str, endpoint_id: str):
        """
        Delete the given endpoint.
+
+        This operation is idempotent: if the endpoint is already deleted (record not found),
+        it will return True instead of raising an error.
        """
-        return self._request_with_plugin_daemon_response(
-            "POST",
-            f"plugin/{tenant_id}/endpoint/remove",
-            bool,
-            data={
-                "endpoint_id": endpoint_id,
-            },
-            headers={
-                "Content-Type": "application/json",
-            },
-        )
+        try:
+            return self._request_with_plugin_daemon_response(
+                "POST",
+                f"plugin/{tenant_id}/endpoint/remove",
+                bool,
+                data={
+                    "endpoint_id": endpoint_id,
+                },
+                headers={
+                    "Content-Type": "application/json",
+                },
+            )
+        except PluginDaemonInternalServerError as e:
+            # Make delete idempotent: if record is not found, consider it a success
+            if "record not found" in str(e.description).lower():
+                return True
+            raise

    def enable_endpoint(self, tenant_id: str, user_id: str, endpoint_id: str):
        """
--- a/api/core/workflow/workflow_entry.py
+++ b/api/core/workflow/workflow_entry.py
@@ -189,8 +189,7 @@ class WorkflowEntry:
            )

        try:
-            # run node
-            generator = node.run()
+            generator = cls._traced_node_run(node)
        except Exception as e:
            logger.exception(
                "error while running node, workflow_id=%s, node_id=%s, node_type=%s, node_version=%s",
@@ -323,8 +322,7 @@ class WorkflowEntry:
                tenant_id=tenant_id,
            )

-            # run node
-            generator = node.run()
+            generator = cls._traced_node_run(node)

            return node, generator
        except Exception as e:
@@ -430,3 +428,26 @@ class WorkflowEntry:
                        input_value = current_variable.value | input_value

                variable_pool.add([variable_node_id] + variable_key_list, input_value)
+
+    @staticmethod
+    def _traced_node_run(node: Node) -> Generator[GraphNodeEventBase, None, None]:
+        """
+        Wraps a node's run method with OpenTelemetry tracing and returns a generator.
+        """
+        # Wrap node.run() with ObservabilityLayer hooks to produce node-level spans
+        layer = ObservabilityLayer()
+        layer.on_graph_start()
+        node.ensure_execution_id()
+
+        def _gen():
+            error: Exception | None = None
+            layer.on_node_run_start(node)
+            try:
+                yield from node.run()
+            except Exception as exc:
+                error = exc
+                raise
+            finally:
+                layer.on_node_run_end(node, error)
+
+        return _gen()
--- a/api/events/event_handlers/init.py
+++ b/api/events/event_handlers/init.py
@@ -6,6 +6,7 @@ from .create_site_record_when_app_created import handle as handle_create_site_re
 from .delete_tool_parameters_cache_when_sync_draft_workflow import (
    handle as handle_delete_tool_parameters_cache_when_sync_draft_workflow,
 )
+from .queue_credential_sync_when_tenant_created import handle as handle_queue_credential_sync_when_tenant_created
 from .sync_plugin_trigger_when_app_created import handle as handle_sync_plugin_trigger_when_app_created
 from .sync_webhook_when_app_created import handle as handle_sync_webhook_when_app_created
 from .sync_workflow_schedule_when_app_published import handle as handle_sync_workflow_schedule_when_app_published
@@ -30,6 +31,7 @@ __all__ = [
    "handle_create_installed_app_when_app_created",
    "handle_create_site_record_when_app_created",
    "handle_delete_tool_parameters_cache_when_sync_draft_workflow",
+    "handle_queue_credential_sync_when_tenant_created",
    "handle_sync_plugin_trigger_when_app_created",
    "handle_sync_webhook_when_app_created",
    "handle_sync_workflow_schedule_when_app_published",
--- a/api/events/event_handlers/queue_credential_sync_when_tenant_created.py
+++ b/api/events/event_handlers/queue_credential_sync_when_tenant_created.py
@@ -0,0 +1,19 @@
+from configs import dify_config
+from events.tenant_event import tenant_was_created
+from services.enterprise.workspace_sync import WorkspaceSyncService
+
+
+@tenant_was_created.connect
+def handle(sender, **kwargs):
+    """Queue credential sync when a tenant/workspace is created."""
+    # Only queue sync tasks if plugin manager (enterprise feature) is enabled
+    if not dify_config.ENTERPRISE_ENABLED:
+        return
+
+    tenant = sender
+
+    # Determine source from kwargs if available, otherwise use generic
+    source = kwargs.get("source", "tenant_created")
+
+    # Queue credential sync task to Redis for enterprise backend to process
+    WorkspaceSyncService.queue_credential_sync(tenant.id, source=source)
--- a/api/extensions/ext_commands.py
+++ b/api/extensions/ext_commands.py
@@ -4,6 +4,7 @@ from dify_app import DifyApp
 def init_app(app: DifyApp):
    from commands import (
        add_qdrant_index,
+        clean_expired_messages,
        clean_workflow_runs,
        cleanup_orphaned_draft_variables,
        clear_free_plan_tenant_expired_logs,
@@ -58,6 +59,7 @@ def init_app(app: DifyApp):
        transform_datasource_credentials,
        install_rag_pipeline_plugins,
        clean_workflow_runs,
+        clean_expired_messages,
    ]
    for cmd in cmds_to_register:
        app.cli.add_command(cmd)
--- a/api/factories/file_factory.py
+++ b/api/factories/file_factory.py
@@ -115,7 +115,18 @@ def build_from_mappings(
    # TODO(QuantumGhost): Performance concern - each mapping triggers a separate database query.
    # Implement batch processing to reduce database load when handling multiple files.
    # Filter out None/empty mappings to avoid errors
-    valid_mappings = [m for m in mappings if m and m.get("transfer_method")]
+    def is_valid_mapping(m: Mapping[str, Any]) -> bool:
+        if not m or not m.get("transfer_method"):
+            return False
+        # For REMOTE_URL transfer method, ensure url or remote_url is provided and not None
+        transfer_method = m.get("transfer_method")
+        if transfer_method == FileTransferMethod.REMOTE_URL:
+            url = m.get("url") or m.get("remote_url")
+            if not url:
+                return False
+        return True
+
+    valid_mappings = [m for m in mappings if is_valid_mapping(m)]
    files = [
        build_from_mapping(
            mapping=mapping,
--- a/api/fields/message_fields.py
+++ b/api/fields/message_fields.py
@@ -2,6 +2,7 @@ from __future__ import annotations

 from datetime import datetime
 from typing import TypeAlias
+from uuid import uuid4

 from pydantic import BaseModel, ConfigDict, Field, field_validator

@@ -20,8 +21,8 @@ class SimpleFeedback(ResponseModel):


 class RetrieverResource(ResponseModel):
-    id: str
-    message_id: str
+    id: str = Field(default_factory=lambda: str(uuid4()))
+    message_id: str = Field(default_factory=lambda: str(uuid4()))
    position: int
    dataset_id: str | None = None
    dataset_name: str | None = None
--- a/api/libs/smtp.py
+++ b/api/libs/smtp.py
@@ -3,6 +3,8 @@ import smtplib
 from email.mime.multipart import MIMEMultipart
 from email.mime.text import MIMEText

+from configs import dify_config
+
 logger = logging.getLogger(__name__)


@@ -19,20 +21,21 @@ class SMTPClient:
        self.opportunistic_tls = opportunistic_tls

    def send(self, mail: dict):
-        smtp = None
+        smtp: smtplib.SMTP | None = None
+        local_host = dify_config.SMTP_LOCAL_HOSTNAME
        try:
-            if self.use_tls:
-                if self.opportunistic_tls:
-                    smtp = smtplib.SMTP(self.server, self.port, timeout=10)
-                    # Send EHLO command with the HELO domain name as the server address
-                    smtp.ehlo(self.server)
-                    smtp.starttls()
-                    # Resend EHLO command to identify the TLS session
-                    smtp.ehlo(self.server)
-                else:
-                    smtp = smtplib.SMTP_SSL(self.server, self.port, timeout=10)
+            if self.use_tls and not self.opportunistic_tls:
+                # SMTP with SSL (implicit TLS)
+                smtp = smtplib.SMTP_SSL(self.server, self.port, timeout=10, local_hostname=local_host)
            else:
-                smtp = smtplib.SMTP(self.server, self.port, timeout=10)
+                # Plain SMTP or SMTP with STARTTLS (explicit TLS)
+                smtp = smtplib.SMTP(self.server, self.port, timeout=10, local_hostname=local_host)
+
+            assert smtp is not None
+            if self.use_tls and self.opportunistic_tls:
+                smtp.ehlo(self.server)
+                smtp.starttls()
+                smtp.ehlo(self.server)

            # Only authenticate if both username and password are non-empty
            if self.username and self.password and self.username.strip() and self.password.strip():
--- a/api/migrations/versions/2026_01_12_1729-3334862ee907_feat_add_created_at_id_index_to_messages.py
+++ b/api/migrations/versions/2026_01_12_1729-3334862ee907_feat_add_created_at_id_index_to_messages.py
@@ -0,0 +1,33 @@
+"""feat: add created_at id index to messages
+
+Revision ID: 3334862ee907
+Revises: 905527cc8fd3
+Create Date: 2026-01-12 17:29:44.846544
+
+"""
+from alembic import op
+import models as models
+import sqlalchemy as sa
+
+
+# revision identifiers, used by Alembic.
+revision = '3334862ee907'
+down_revision = '905527cc8fd3'
+branch_labels = None
+depends_on = None
+
+
+def upgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('messages', schema=None) as batch_op:
+        batch_op.create_index('message_created_at_id_idx', ['created_at', 'id'], unique=False)
+
+    # ### end Alembic commands ###
+
+
+def downgrade():
+    # ### commands auto generated by Alembic - please adjust! ###
+    with op.batch_alter_table('messages', schema=None) as batch_op:
+        batch_op.drop_index('message_created_at_id_idx')
+
+    # ### end Alembic commands ###
--- a/api/models/dataset.py
+++ b/api/models/dataset.py
@@ -1149,7 +1149,7 @@ class DatasetCollectionBinding(TypeBase):
    )


-class TidbAuthBinding(Base):
+class TidbAuthBinding(TypeBase):
    __tablename__ = "tidb_auth_bindings"
    __table_args__ = (
        sa.PrimaryKeyConstraint("id", name="tidb_auth_bindings_pkey"),
@@ -1158,7 +1158,13 @@ class TidbAuthBinding(Base):
        sa.Index("tidb_auth_bindings_created_at_idx", "created_at"),
        sa.Index("tidb_auth_bindings_status_idx", "status"),
    )
-    id: Mapped[str] = mapped_column(StringUUID, primary_key=True, default=lambda: str(uuid4()))
+    id: Mapped[str] = mapped_column(
+        StringUUID,
+        primary_key=True,
+        insert_default=lambda: str(uuid4()),
+        default_factory=lambda: str(uuid4()),
+        init=False,
+    )
    tenant_id: Mapped[str | None] = mapped_column(StringUUID, nullable=True)
    cluster_id: Mapped[str] = mapped_column(String(255), nullable=False)
    cluster_name: Mapped[str] = mapped_column(String(255), nullable=False)
@@ -1166,7 +1172,9 @@ class TidbAuthBinding(Base):
    status: Mapped[str] = mapped_column(sa.String(255), nullable=False, server_default=sa.text("'CREATING'"))
    account: Mapped[str] = mapped_column(String(255), nullable=False)
    password: Mapped[str] = mapped_column(String(255), nullable=False)
-    created_at: Mapped[datetime] = mapped_column(DateTime, nullable=False, server_default=func.current_timestamp())
+    created_at: Mapped[datetime] = mapped_column(
+        DateTime, nullable=False, server_default=func.current_timestamp(), init=False
+    )


 class Whitelist(TypeBase):
--- a/api/models/model.py
+++ b/api/models/model.py
@@ -968,6 +968,7 @@ class Message(Base):
        Index("message_workflow_run_id_idx", "conversation_id", "workflow_run_id"),
        Index("message_created_at_idx", "created_at"),
        Index("message_app_mode_idx", "app_mode"),
+        Index("message_created_at_id_idx", "created_at", "id"),
    )

    id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()))
@@ -1447,7 +1448,7 @@ class MessageAnnotation(Base):
        return account


-class AppAnnotationHitHistory(Base):
+class AppAnnotationHitHistory(TypeBase):
    __tablename__ = "app_annotation_hit_histories"
    __table_args__ = (
        sa.PrimaryKeyConstraint("id", name="app_annotation_hit_histories_pkey"),
@@ -1457,17 +1458,19 @@ class AppAnnotationHitHistory(Base):
        sa.Index("app_annotation_hit_histories_message_idx", "message_id"),
    )

-    id = mapped_column(StringUUID, default=lambda: str(uuid4()))
-    app_id = mapped_column(StringUUID, nullable=False)
+    id: Mapped[str] = mapped_column(StringUUID, default=lambda: str(uuid4()), init=False)
+    app_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
    annotation_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
-    source = mapped_column(LongText, nullable=False)
-    question = mapped_column(LongText, nullable=False)
-    account_id = mapped_column(StringUUID, nullable=False)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=func.current_timestamp())
-    score = mapped_column(Float, nullable=False, server_default=sa.text("0"))
-    message_id = mapped_column(StringUUID, nullable=False)
-    annotation_question = mapped_column(LongText, nullable=False)
-    annotation_content = mapped_column(LongText, nullable=False)
+    source: Mapped[str] = mapped_column(LongText, nullable=False)
+    question: Mapped[str] = mapped_column(LongText, nullable=False)
+    account_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime, nullable=False, server_default=func.current_timestamp(), init=False
+    )
+    score: Mapped[float] = mapped_column(Float, nullable=False, server_default=sa.text("0"))
+    message_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    annotation_question: Mapped[str] = mapped_column(LongText, nullable=False)
+    annotation_content: Mapped[str] = mapped_column(LongText, nullable=False)

    @property
    def account(self):
@@ -2083,7 +2086,7 @@ class TraceAppConfig(TypeBase):
        }


-class TenantCreditPool(Base):
+class TenantCreditPool(TypeBase):
    __tablename__ = "tenant_credit_pools"
    __table_args__ = (
        sa.PrimaryKeyConstraint("id", name="tenant_credit_pool_pkey"),
@@ -2091,14 +2094,20 @@ class TenantCreditPool(Base):
        sa.Index("tenant_credit_pool_pool_type_idx", "pool_type"),
    )

-    id = mapped_column(StringUUID, primary_key=True, server_default=text("uuid_generate_v4()"))
-    tenant_id = mapped_column(StringUUID, nullable=False)
-    pool_type = mapped_column(String(40), nullable=False, default="trial", server_default="trial")
-    quota_limit = mapped_column(BigInteger, nullable=False, default=0)
-    quota_used = mapped_column(BigInteger, nullable=False, default=0)
-    created_at = mapped_column(sa.DateTime, nullable=False, server_default=text("CURRENT_TIMESTAMP"))
-    updated_at = mapped_column(
-        sa.DateTime, nullable=False, server_default=func.current_timestamp(), onupdate=func.current_timestamp()
+    id: Mapped[str] = mapped_column(StringUUID, primary_key=True, server_default=text("uuid_generate_v4()"), init=False)
+    tenant_id: Mapped[str] = mapped_column(StringUUID, nullable=False)
+    pool_type: Mapped[str] = mapped_column(String(40), nullable=False, default="trial", server_default="trial")
+    quota_limit: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
+    quota_used: Mapped[int] = mapped_column(BigInteger, nullable=False, default=0)
+    created_at: Mapped[datetime] = mapped_column(
+        sa.DateTime, nullable=False, server_default=text("CURRENT_TIMESTAMP"), init=False
+    )
+    updated_at: Mapped[datetime] = mapped_column(
+        sa.DateTime,
+        nullable=False,
+        server_default=func.current_timestamp(),
+        onupdate=func.current_timestamp(),
+        init=False,
    )

    @property
--- a/api/pyproject.toml
+++ b/api/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "dify-api"
-version = "1.11.3"
+version = "1.11.4"
 requires-python = ">=3.11,<3.13"

 dependencies = [
--- a/api/schedule/clean_messages.py
+++ b/api/schedule/clean_messages.py
@@ -1,90 +1,62 @@
-import datetime
 import logging
 import time

 import click
-from sqlalchemy.exc import SQLAlchemyError

 import app
 from configs import dify_config
-from enums.cloud_plan import CloudPlan
-from extensions.ext_database import db
-from extensions.ext_redis import redis_client
-from models.model import (
-    App,
-    Message,
-    MessageAgentThought,
-    MessageAnnotation,
-    MessageChain,
-    MessageFeedback,
-    MessageFile,
-)
-from models.web import SavedMessage
-from services.feature_service import FeatureService
+from services.retention.conversation.messages_clean_policy import create_message_clean_policy
+from services.retention.conversation.messages_clean_service import MessagesCleanService

 logger = logging.getLogger(__name__)


-@app.celery.task(queue="dataset")
+@app.celery.task(queue="retention")
 def clean_messages():
-    click.echo(click.style("Start clean messages.", fg="green"))
-    start_at = time.perf_counter()
-    plan_sandbox_clean_message_day = datetime.datetime.now() - datetime.timedelta(
-        days=dify_config.PLAN_SANDBOX_CLEAN_MESSAGE_DAY_SETTING
-    )
-    while True:
-        try:
-            # Main query with join and filter
-            messages = (
-                db.session.query(Message)
-                .where(Message.created_at < plan_sandbox_clean_message_day)
-                .order_by(Message.created_at.desc())
-                .limit(100)
-                .all()
-            )
+    """
+    Clean expired messages based on clean policy.

-        except SQLAlchemyError:
-            raise
-        if not messages:
-            break
-        for message in messages:
-            app = db.session.query(App).filter_by(id=message.app_id).first()
-            if not app:
-                logger.warning(
-                    "Expected App record to exist, but none was found, app_id=%s, message_id=%s",
-                    message.app_id,
-                    message.id,
-                )
-                continue
-            features_cache_key = f"features:{app.tenant_id}"
-            plan_cache = redis_client.get(features_cache_key)
-            if plan_cache is None:
-                features = FeatureService.get_features(app.tenant_id)
-                redis_client.setex(features_cache_key, 600, features.billing.subscription.plan)
-                plan = features.billing.subscription.plan
-            else:
-                plan = plan_cache.decode()
-            if plan == CloudPlan.SANDBOX:
-                # clean related message
-                db.session.query(MessageFeedback).where(MessageFeedback.message_id == message.id).delete(
-                    synchronize_session=False
-                )
-                db.session.query(MessageAnnotation).where(MessageAnnotation.message_id == message.id).delete(
-                    synchronize_session=False
-                )
-                db.session.query(MessageChain).where(MessageChain.message_id == message.id).delete(
-                    synchronize_session=False
-                )
-                db.session.query(MessageAgentThought).where(MessageAgentThought.message_id == message.id).delete(
-                    synchronize_session=False
-                )
-                db.session.query(MessageFile).where(MessageFile.message_id == message.id).delete(
-                    synchronize_session=False
-                )
-                db.session.query(SavedMessage).where(SavedMessage.message_id == message.id).delete(
-                    synchronize_session=False
-                )
-                db.session.query(Message).where(Message.id == message.id).delete()
-                db.session.commit()
-    end_at = time.perf_counter()
-    click.echo(click.style(f"Cleaned messages from db success latency: {end_at - start_at}", fg="green"))
+    This task uses MessagesCleanService to efficiently clean messages in batches.
+    The behavior depends on BILLING_ENABLED configuration:
+    - BILLING_ENABLED=True: only delete messages from sandbox tenants (with whitelist/grace period)
+    - BILLING_ENABLED=False: delete all messages within the time range
+    """
+    click.echo(click.style("clean_messages: start clean messages.", fg="green"))
+    start_at = time.perf_counter()
+
+    try:
+        # Create policy based on billing configuration
+        policy = create_message_clean_policy(
+            graceful_period_days=dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_GRACEFUL_PERIOD,
+        )
+
+        # Create and run the cleanup service
+        service = MessagesCleanService.from_days(
+            policy=policy,
+            days=dify_config.SANDBOX_EXPIRED_RECORDS_RETENTION_DAYS,
+            batch_size=dify_config.SANDBOX_EXPIRED_RECORDS_CLEAN_BATCH_SIZE,
+        )
+        stats = service.run()
+
+        end_at = time.perf_counter()
+        click.echo(
+            click.style(
+                f"clean_messages: completed successfully\n"
+                f"  - Latency: {end_at - start_at:.2f}s\n"
+                f"  - Batches processed: {stats['batches']}\n"
+                f"  - Total messages scanned: {stats['total_messages']}\n"
+                f"  - Messages filtered: {stats['filtered_messages']}\n"
+                f"  - Messages deleted: {stats['total_deleted']}",
+                fg="green",
+            )
+        )
+    except Exception as e:
+        end_at = time.perf_counter()
+        logger.exception("clean_messages failed")
+        click.echo(
+            click.style(
+                f"clean_messages: failed after {end_at - start_at:.2f}s - {str(e)}",
+                fg="red",
+            )
+        )
+        raise
--- a/api/schedule/create_tidb_serverless_task.py
+++ b/api/schedule/create_tidb_serverless_task.py
@@ -50,10 +50,13 @@ def create_clusters(batch_size):
        )
        for new_cluster in new_clusters:
            tidb_auth_binding = TidbAuthBinding(
+                tenant_id=None,
                cluster_id=new_cluster["cluster_id"],
                cluster_name=new_cluster["cluster_name"],
                account=new_cluster["account"],
                password=new_cluster["password"],
+                active=False,
+                status="CREATING",
            )
            db.session.add(tidb_auth_binding)
        db.session.commit()
--- a/api/services/enterprise/workspace_sync.py
+++ b/api/services/enterprise/workspace_sync.py
@@ -0,0 +1,58 @@
+import json
+import logging
+import uuid
+from datetime import UTC, datetime
+
+from redis import RedisError
+
+from extensions.ext_redis import redis_client
+
+logger = logging.getLogger(__name__)
+
+WORKSPACE_SYNC_QUEUE = "enterprise:workspace:sync:queue"
+WORKSPACE_SYNC_PROCESSING = "enterprise:workspace:sync:processing"
+
+
+class WorkspaceSyncService:
+    """Service to publish workspace sync tasks to Redis queue for enterprise backend consumption"""
+
+    @staticmethod
+    def queue_credential_sync(workspace_id: str, *, source: str) -> bool:
+        """
+        Queue a credential sync task for a newly created workspace.
+
+        This publishes a task to Redis that will be consumed by the enterprise backend
+        worker to sync credentials with the plugin-manager.
+
+        Args:
+            workspace_id: The workspace/tenant ID to sync credentials for
+            source: Source of the sync request (for debugging/tracking)
+
+        Returns:
+            bool: True if task was queued successfully, False otherwise
+        """
+        try:
+            task = {
+                "task_id": str(uuid.uuid4()),
+                "workspace_id": workspace_id,
+                "retry_count": 0,
+                "created_at": datetime.now(UTC).isoformat(),
+                "source": source,
+            }
+
+            # Push to Redis list (queue) - LPUSH adds to the head, worker consumes from tail with RPOP
+            redis_client.lpush(WORKSPACE_SYNC_QUEUE, json.dumps(task))
+
+            logger.info(
+                "Queued credential sync task for workspace %s, task_id: %s, source: %s",
+                workspace_id,
+                task["task_id"],
+                source,
+            )
+            return True
+
+        except (RedisError, TypeError) as e:
+            logger.error("Failed to queue credential sync for workspace %s: %s", workspace_id, str(e), exc_info=True)
+            # Don't raise - we don't want to fail workspace creation if queueing fails
+            # The scheduled task will catch it later
+            return False
--- a/api/services/retention/conversation/messages_clean_policy.py
+++ b/api/services/retention/conversation/messages_clean_policy.py
@@ -0,0 +1,216 @@
+import datetime
+import logging
+from abc import ABC, abstractmethod
+from collections.abc import Callable, Sequence
+from dataclasses import dataclass
+
+from configs import dify_config
+from enums.cloud_plan import CloudPlan
+from services.billing_service import BillingService, SubscriptionPlan
+
+logger = logging.getLogger(__name__)
+
+
+@dataclass
+class SimpleMessage:
+    id: str
+    app_id: str
+    created_at: datetime.datetime
+
+
+class MessagesCleanPolicy(ABC):
+    """
+    Abstract base class for message cleanup policies.
+
+    A policy determines which messages from a batch should be deleted.
+    """
+
+    @abstractmethod
+    def filter_message_ids(
+        self,
+        messages: Sequence[SimpleMessage],
+        app_to_tenant: dict[str, str],
+    ) -> Sequence[str]:
+        """
+        Filter messages and return IDs of messages that should be deleted.
+
+        Args:
+            messages: Batch of messages to evaluate
+            app_to_tenant: Mapping from app_id to tenant_id
+
+        Returns:
+            List of message IDs that should be deleted
+        """
+        ...
+
+
+class BillingDisabledPolicy(MessagesCleanPolicy):
+    """
+    Policy for community or enterpriseedition (billing disabled).
+
+    No special filter logic, just return all message ids.
+    """
+
+    def filter_message_ids(
+        self,
+        messages: Sequence[SimpleMessage],
+        app_to_tenant: dict[str, str],
+    ) -> Sequence[str]:
+        return [msg.id for msg in messages]
+
+
+class BillingSandboxPolicy(MessagesCleanPolicy):
+    """
+    Policy for sandbox plan tenants in cloud edition (billing enabled).
+
+    Filters messages based on sandbox plan expiration rules:
+    - Skip tenants in the whitelist
+    - Only delete messages from sandbox plan tenants
+    - Respect grace period after subscription expiration
+    - Safe default: if tenant mapping or plan is missing, do NOT delete
+    """
+
+    def __init__(
+        self,
+        plan_provider: Callable[[Sequence[str]], dict[str, SubscriptionPlan]],
+        graceful_period_days: int = 21,
+        tenant_whitelist: Sequence[str] | None = None,
+        current_timestamp: int | None = None,
+    ) -> None:
+        self._graceful_period_days = graceful_period_days
+        self._tenant_whitelist: Sequence[str] = tenant_whitelist or []
+        self._plan_provider = plan_provider
+        self._current_timestamp = current_timestamp
+
+    def filter_message_ids(
+        self,
+        messages: Sequence[SimpleMessage],
+        app_to_tenant: dict[str, str],
+    ) -> Sequence[str]:
+        """
+        Filter messages based on sandbox plan expiration rules.
+
+        Args:
+            messages: Batch of messages to evaluate
+            app_to_tenant: Mapping from app_id to tenant_id
+
+        Returns:
+            List of message IDs that should be deleted
+        """
+        if not messages or not app_to_tenant:
+            return []
+
+        # Get unique tenant_ids and fetch subscription plans
+        tenant_ids = list(set(app_to_tenant.values()))
+        tenant_plans = self._plan_provider(tenant_ids)
+
+        if not tenant_plans:
+            return []
+
+        # Apply sandbox deletion rules
+        return self._filter_expired_sandbox_messages(
+            messages=messages,
+            app_to_tenant=app_to_tenant,
+            tenant_plans=tenant_plans,
+        )
+
+    def _filter_expired_sandbox_messages(
+        self,
+        messages: Sequence[SimpleMessage],
+        app_to_tenant: dict[str, str],
+        tenant_plans: dict[str, SubscriptionPlan],
+    ) -> list[str]:
+        """
+        Filter messages that should be deleted based on sandbox plan expiration.
+
+        A message should be deleted if:
+        1. It belongs to a sandbox tenant AND
+        2. Either:
+           a) The tenant has no previous subscription (expiration_date == -1), OR
+           b) The subscription expired more than graceful_period_days ago
+
+        Args:
+            messages: List of message objects with id and app_id attributes
+            app_to_tenant: Mapping from app_id to tenant_id
+            tenant_plans: Mapping from tenant_id to subscription plan info
+
+        Returns:
+            List of message IDs that should be deleted
+        """
+        current_timestamp = self._current_timestamp
+        if current_timestamp is None:
+            current_timestamp = int(datetime.datetime.now(datetime.UTC).timestamp())
+
+        sandbox_message_ids: list[str] = []
+        graceful_period_seconds = self._graceful_period_days * 24 * 60 * 60
+
+        for msg in messages:
+            # Get tenant_id for this message's app
+            tenant_id = app_to_tenant.get(msg.app_id)
+            if not tenant_id:
+                continue
+
+            # Skip tenant messages in whitelist
+            if tenant_id in self._tenant_whitelist:
+                continue
+
+            # Get subscription plan for this tenant
+            tenant_plan = tenant_plans.get(tenant_id)
+            if not tenant_plan:
+                continue
+
+            plan = str(tenant_plan["plan"])
+            expiration_date = int(tenant_plan["expiration_date"])
+
+            # Only process sandbox plans
+            if plan != CloudPlan.SANDBOX:
+                continue
+
+            # Case 1: No previous subscription (-1 means never had a paid subscription)
+            if expiration_date == -1:
+                sandbox_message_ids.append(msg.id)
+                continue
+
+            # Case 2: Subscription expired beyond grace period
+            if current_timestamp - expiration_date > graceful_period_seconds:
+                sandbox_message_ids.append(msg.id)
+
+        return sandbox_message_ids
+
+
+def create_message_clean_policy(
+    graceful_period_days: int = 21,
+    current_timestamp: int | None = None,
+) -> MessagesCleanPolicy:
+    """
+    Factory function to create the appropriate message clean policy.
+
+    Determines which policy to use based on BILLING_ENABLED configuration:
+    - If BILLING_ENABLED is True: returns BillingSandboxPolicy
+    - If BILLING_ENABLED is False: returns BillingDisabledPolicy
+
+    Args:
+        graceful_period_days: Grace period in days after subscription expiration (default: 21)
+        current_timestamp: Current Unix timestamp for testing (default: None, uses current time)
+    """
+    if not dify_config.BILLING_ENABLED:
+        logger.info("create_message_clean_policy: billing disabled, using BillingDisabledPolicy")
+        return BillingDisabledPolicy()
+
+    # Billing enabled - fetch whitelist from BillingService
+    tenant_whitelist = BillingService.get_expired_subscription_cleanup_whitelist()
+    plan_provider = BillingService.get_plan_bulk_with_cache
+
+    logger.info(
+        "create_message_clean_policy: billing enabled, using BillingSandboxPolicy "
+        "(graceful_period_days=%s, whitelist=%s)",
+        graceful_period_days,
+        tenant_whitelist,
+    )
+
+    return BillingSandboxPolicy(
+        plan_provider=plan_provider,
+        graceful_period_days=graceful_period_days,
+        tenant_whitelist=tenant_whitelist,
+        current_timestamp=current_timestamp,
+    )
--- a/api/services/retention/conversation/messages_clean_service.py
+++ b/api/services/retention/conversation/messages_clean_service.py
@@ -0,0 +1,334 @@
+import datetime
+import logging
+import random
+from collections.abc import Sequence
+from typing import cast
+
+from sqlalchemy import delete, select
+from sqlalchemy.engine import CursorResult
+from sqlalchemy.orm import Session
+
+from extensions.ext_database import db
+from models.model import (
+    App,
+    AppAnnotationHitHistory,
+    DatasetRetrieverResource,
+    Message,
+    MessageAgentThought,
+    MessageAnnotation,
+    MessageChain,
+    MessageFeedback,
+    MessageFile,
+)
+from models.web import SavedMessage
+from services.retention.conversation.messages_clean_policy import (
+    MessagesCleanPolicy,
+    SimpleMessage,
+)
+
+logger = logging.getLogger(__name__)
+
+
+class MessagesCleanService:
+    """
+    Service for cleaning expired messages based on retention policies.
+
+    Compatible with non cloud edition (billing disabled): all messages in the time range will be deleted.
+    If billing is enabled: only sandbox plan tenant messages are deleted (with whitelist and grace period support).
+    """
+
+    def __init__(
+        self,
+        policy: MessagesCleanPolicy,
+        end_before: datetime.datetime,
+        start_from: datetime.datetime | None = None,
+        batch_size: int = 1000,
+        dry_run: bool = False,
+    ) -> None:
+        """
+        Initialize the service with cleanup parameters.
+
+        Args:
+            policy: The policy that determines which messages to delete
+            end_before: End time (exclusive) of the range
+            start_from: Optional start time (inclusive) of the range
+            batch_size: Number of messages to process per batch
+            dry_run: Whether to perform a dry run (no actual deletion)
+        """
+        self._policy = policy
+        self._end_before = end_before
+        self._start_from = start_from
+        self._batch_size = batch_size
+        self._dry_run = dry_run
+
+    @classmethod
+    def from_time_range(
+        cls,
+        policy: MessagesCleanPolicy,
+        start_from: datetime.datetime,
+        end_before: datetime.datetime,
+        batch_size: int = 1000,
+        dry_run: bool = False,
+    ) -> "MessagesCleanService":
+        """
+        Create a service instance for cleaning messages within a specific time range.
+
+        Time range is [start_from, end_before).
+
+        Args:
+            policy: The policy that determines which messages to delete
+            start_from: Start time (inclusive) of the range
+            end_before: End time (exclusive) of the range
+            batch_size: Number of messages to process per batch
+            dry_run: Whether to perform a dry run (no actual deletion)
+
+        Returns:
+            MessagesCleanService instance
+
+        Raises:
+            ValueError: If start_from >= end_before or invalid parameters
+        """
+        if start_from >= end_before:
+            raise ValueError(f"start_from ({start_from}) must be less than end_before ({end_before})")
+
+        if batch_size <= 0:
+            raise ValueError(f"batch_size ({batch_size}) must be greater than 0")
+
+        logger.info(
+            "clean_messages: start_from=%s, end_before=%s, batch_size=%s, policy=%s",
+            start_from,
+            end_before,
+            batch_size,
+            policy.__class__.__name__,
+        )
+
+        return cls(
+            policy=policy,
+            end_before=end_before,
+            start_from=start_from,
+            batch_size=batch_size,
+            dry_run=dry_run,
+        )
+
+    @classmethod
+    def from_days(
+        cls,
+        policy: MessagesCleanPolicy,
+        days: int = 30,
+        batch_size: int = 1000,
+        dry_run: bool = False,
+    ) -> "MessagesCleanService":
+        """
+        Create a service instance for cleaning messages older than specified days.
+
+        Args:
+            policy: The policy that determines which messages to delete
+            days: Number of days to look back from now
+            batch_size: Number of messages to process per batch
+            dry_run: Whether to perform a dry run (no actual deletion)
+
+        Returns:
+            MessagesCleanService instance
+
+        Raises:
+            ValueError: If invalid parameters
+        """
+        if days < 0:
+            raise ValueError(f"days ({days}) must be greater than or equal to 0")
+
+        if batch_size <= 0:
+            raise ValueError(f"batch_size ({batch_size}) must be greater than 0")
+
+        end_before = datetime.datetime.now() - datetime.timedelta(days=days)
+
+        logger.info(
+            "clean_messages: days=%s, end_before=%s, batch_size=%s, policy=%s",
+            days,
+            end_before,
+            batch_size,
+            policy.__class__.__name__,
+        )
+
+        return cls(policy=policy, end_before=end_before, start_from=None, batch_size=batch_size, dry_run=dry_run)
+
+    def run(self) -> dict[str, int]:
+        """
+        Execute the message cleanup operation.
+
+        Returns:
+            Dict with statistics: batches, filtered_messages, total_deleted
+        """
+        return self._clean_messages_by_time_range()
+
+    def _clean_messages_by_time_range(self) -> dict[str, int]:
+        """
+        Clean messages within a time range using cursor-based pagination.
+
+        Time range is [start_from, end_before)
+
+        Steps:
+        1. Iterate messages using cursor pagination (by created_at, id)
+        2. Query app_id -> tenant_id mapping
+        3. Delegate to policy to determine which messages to delete
+        4. Batch delete messages and their relations
+
+        Returns:
+            Dict with statistics: batches, filtered_messages, total_deleted
+        """
+        stats = {
+            "batches": 0,
+            "total_messages": 0,
+            "filtered_messages": 0,
+            "total_deleted": 0,
+        }
+
+        # Cursor-based pagination using (created_at, id) to avoid infinite loops
+        # and ensure proper ordering with time-based filtering
+        _cursor: tuple[datetime.datetime, str] | None = None
+
+        logger.info(
+            "clean_messages: start cleaning messages (dry_run=%s), start_from=%s, end_before=%s",
+            self._dry_run,
+            self._start_from,
+            self._end_before,
+        )
+
+        while True:
+            stats["batches"] += 1
+
+            # Step 1: Fetch a batch of messages using cursor
+            with Session(db.engine, expire_on_commit=False) as session:
+                msg_stmt = (
+                    select(Message.id, Message.app_id, Message.created_at)
+                    .where(Message.created_at < self._end_before)
+                    .order_by(Message.created_at, Message.id)
+                    .limit(self._batch_size)
+                )
+
+                if self._start_from:
+                    msg_stmt = msg_stmt.where(Message.created_at >= self._start_from)
+
+                # Apply cursor condition: (created_at, id) > (last_created_at, last_message_id)
+                # This translates to:
+                #   created_at > last_created_at OR (created_at = last_created_at AND id > last_message_id)
+                if _cursor:
+                    # Continuing from previous batch
+                    msg_stmt = msg_stmt.where(
+                        (Message.created_at > _cursor[0])
+                        | ((Message.created_at == _cursor[0]) & (Message.id > _cursor[1]))
+                    )
+
+                raw_messages = list(session.execute(msg_stmt).all())
+                messages = [
+                    SimpleMessage(id=msg_id, app_id=app_id, created_at=msg_created_at)
+                    for msg_id, app_id, msg_created_at in raw_messages
+                ]
+
+                # Track total messages fetched across all batches
+                stats["total_messages"] += len(messages)
+
+                if not messages:
+                    logger.info("clean_messages (batch %s): no more messages to process", stats["batches"])
+                    break
+
+                # Update cursor to the last message's (created_at, id)
+                _cursor = (messages[-1].created_at, messages[-1].id)
+
+                # Step 2: Extract app_ids and query tenant_ids
+                app_ids = list({msg.app_id for msg in messages})
+
+                if not app_ids:
+                    logger.info("clean_messages (batch %s): no app_ids found, skip", stats["batches"])
+                    continue
+
+                app_stmt = select(App.id, App.tenant_id).where(App.id.in_(app_ids))
+                apps = list(session.execute(app_stmt).all())
+
+            if not apps:
+                logger.info("clean_messages (batch %s): no apps found, skip", stats["batches"])
+                continue
+
+            # Build app_id -> tenant_id mapping
+            app_to_tenant: dict[str, str] = {app.id: app.tenant_id for app in apps}
+
+            # Step 3: Delegate to policy to determine which messages to delete
+            message_ids_to_delete = self._policy.filter_message_ids(messages, app_to_tenant)
+
+            if not message_ids_to_delete:
+                logger.info("clean_messages (batch %s): no messages to delete, skip", stats["batches"])
+                continue
+
+            stats["filtered_messages"] += len(message_ids_to_delete)
+
+            # Step 4: Batch delete messages and their relations
+            if not self._dry_run:
+                with Session(db.engine, expire_on_commit=False) as session:
+                    # Delete related records first
+                    self._batch_delete_message_relations(session, message_ids_to_delete)
+
+                    # Delete messages
+                    delete_stmt = delete(Message).where(Message.id.in_(message_ids_to_delete))
+                    delete_result = cast(CursorResult, session.execute(delete_stmt))
+                    messages_deleted = delete_result.rowcount
+                    session.commit()
+
+                    stats["total_deleted"] += messages_deleted
+
+                    logger.info(
+                        "clean_messages (batch %s): processed %s messages, deleted %s messages",
+                        stats["batches"],
+                        len(messages),
+                        messages_deleted,
+                    )
+            else:
+                # Log random sample of message IDs that would be deleted (up to 10)
+                sample_size = min(10, len(message_ids_to_delete))
+                sampled_ids = random.sample(list(message_ids_to_delete), sample_size)
+
+                logger.info(
+                    "clean_messages (batch %s, dry_run): would delete %s messages, sampling %s ids:",
+                    stats["batches"],
+                    len(message_ids_to_delete),
+                    sample_size,
+                )
+                for msg_id in sampled_ids:
+                    logger.info("clean_messages (batch %s, dry_run) sample: message_id=%s", stats["batches"], msg_id)
+
+        logger.info(
+            "clean_messages completed: total batches: %s, total messages: %s, filtered messages: %s, total deleted: %s",
+            stats["batches"],
+            stats["total_messages"],
+            stats["filtered_messages"],
+            stats["total_deleted"],
+        )
+
+        return stats
+
+    @staticmethod
+    def _batch_delete_message_relations(session: Session, message_ids: Sequence[str]) -> None:
+        """
+        Batch delete all related records for given message IDs.
+
+        Args:
+            session: Database session
+            message_ids: List of message IDs to delete relations for
+        """
+        if not message_ids:
+            return
+
+        # Delete all related records in batch
+        session.execute(delete(MessageFeedback).where(MessageFeedback.message_id.in_(message_ids)))
+
+        session.execute(delete(MessageAnnotation).where(MessageAnnotation.message_id.in_(message_ids)))
+
+        session.execute(delete(MessageChain).where(MessageChain.message_id.in_(message_ids)))
+
+        session.execute(delete(MessageAgentThought).where(MessageAgentThought.message_id.in_(message_ids)))
+
+        session.execute(delete(MessageFile).where(MessageFile.message_id.in_(message_ids)))
+
+        session.execute(delete(SavedMessage).where(SavedMessage.message_id.in_(message_ids)))
+
+        session.execute(delete(AppAnnotationHitHistory).where(AppAnnotationHitHistory.message_id.in_(message_ids)))
+
+        session.execute(delete(DatasetRetrieverResource).where(DatasetRetrieverResource.message_id.in_(message_ids)))
--- a/api/tests/integration_tests/.env.example
+++ b/api/tests/integration_tests/.env.example
@@ -103,6 +103,8 @@ SMTP_USERNAME=123
 SMTP_PASSWORD=abc
 SMTP_USE_TLS=true
 SMTP_OPPORTUNISTIC_TLS=false
+# Optional: override the local hostname used for SMTP HELO/EHLO
+SMTP_LOCAL_HOSTNAME=

 # Sentry configuration
 SENTRY_DSN=
--- a/api/tests/test_containers_integration_tests/services/test_messages_clean_service.py
+++ b/api/tests/test_containers_integration_tests/services/test_messages_clean_service.py
--- a/api/tests/unit_tests/controllers/console/datasets/init.py
+++ b/api/tests/unit_tests/controllers/console/datasets/init.py
@@ -0,0 +1 @@
+"""Unit tests for `controllers.console.datasets` controllers."""
--- a/api/tests/unit_tests/controllers/console/datasets/test_external_dataset_payload.py
+++ b/api/tests/unit_tests/controllers/console/datasets/test_external_dataset_payload.py
@@ -0,0 +1,49 @@
+from __future__ import annotations
+
+"""
+Unit tests for the external dataset controller payload schemas.
+
+These tests focus on Pydantic validation rules so we can catch regressions
+in request constraints (e.g. max length changes) without exercising the
+full Flask/RESTX request stack.
+"""
+
+import pytest
+from pydantic import ValidationError
+
+from controllers.console.datasets.external import ExternalDatasetCreatePayload
+
+
+def test_external_dataset_create_payload_allows_name_length_100() -> None:
+    """Ensure the `name` field accepts up to 100 characters (inclusive)."""
+
+    # Build a request payload with a boundary-length name value.
+    name_100: str = "a" * 100
+    payload = {
+        "external_knowledge_api_id": "ek-api-1",
+        "external_knowledge_id": "ek-1",
+        "name": name_100,
+    }
+
+    model = ExternalDatasetCreatePayload.model_validate(payload)
+    assert model.name == name_100
+
+
+def test_external_dataset_create_payload_rejects_name_length_101() -> None:
+    """Ensure the `name` field rejects values longer than 100 characters."""
+
+    # Build a request payload that exceeds the max length by 1.
+    name_101: str = "a" * 101
+    payload: dict[str, object] = {
+        "external_knowledge_api_id": "ek-api-1",
+        "external_knowledge_id": "ek-1",
+        "name": name_101,
+    }
+
+    with pytest.raises(ValidationError) as exc_info:
+        ExternalDatasetCreatePayload.model_validate(payload)
+
+    errors = exc_info.value.errors()
+    assert errors[0]["loc"] == ("name",)
+    assert errors[0]["type"] == "string_too_long"
+    assert errors[0]["ctx"]["max_length"] == 100
--- a/api/tests/unit_tests/core/plugin/test_endpoint_client.py
+++ b/api/tests/unit_tests/core/plugin/test_endpoint_client.py
@@ -0,0 +1,279 @@
+"""Unit tests for PluginEndpointClient functionality.
+
+This test module covers the endpoint client operations including:
+- Successful endpoint deletion
+- Idempotent delete behavior (record not found)
+- Non-idempotent delete behavior (other errors)
+
+Tests follow the Arrange-Act-Assert pattern for clarity.
+"""
+
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from core.plugin.impl.endpoint import PluginEndpointClient
+from core.plugin.impl.exc import PluginDaemonInternalServerError
+
+
+class TestPluginEndpointClientDelete:
+    """Unit tests for PluginEndpointClient delete_endpoint operation.
+
+    Tests cover:
+    - Successful endpoint deletion
+    - Idempotent behavior when endpoint is already deleted (record not found)
+    - Non-idempotent behavior for other errors
+    """
+
+    @pytest.fixture
+    def endpoint_client(self):
+        """Create a PluginEndpointClient instance for testing."""
+        return PluginEndpointClient()
+
+    @pytest.fixture
+    def mock_config(self):
+        """Mock plugin daemon configuration."""
+        with (
+            patch("core.plugin.impl.base.dify_config.PLUGIN_DAEMON_URL", "http://127.0.0.1:5002"),
+            patch("core.plugin.impl.base.dify_config.PLUGIN_DAEMON_KEY", "test-api-key"),
+        ):
+            yield
+
+    def test_delete_endpoint_success(self, endpoint_client, mock_config):
+        """Test successful endpoint deletion.
+
+        Given:
+            - A valid tenant_id, user_id, and endpoint_id
+            - The plugin daemon returns success response
+        When:
+            - delete_endpoint is called
+        Then:
+            - The method should return True
+            - The request should be made with correct parameters
+        """
+        # Arrange
+        tenant_id = "tenant-123"
+        user_id = "user-456"
+        endpoint_id = "endpoint-789"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "code": 0,
+            "message": "success",
+            "data": True,
+        }
+
+        with patch("httpx.request", return_value=mock_response):
+            # Act
+            result = endpoint_client.delete_endpoint(
+                tenant_id=tenant_id,
+                user_id=user_id,
+                endpoint_id=endpoint_id,
+            )
+
+            # Assert
+            assert result is True
+
+    def test_delete_endpoint_idempotent_record_not_found(self, endpoint_client, mock_config):
+        """Test idempotent delete behavior when endpoint is already deleted.
+
+        Given:
+            - A valid tenant_id, user_id, and endpoint_id
+            - The plugin daemon returns "record not found" error
+        When:
+            - delete_endpoint is called
+        Then:
+            - The method should return True (idempotent behavior)
+            - No exception should be raised
+        """
+        # Arrange
+        tenant_id = "tenant-123"
+        user_id = "user-456"
+        endpoint_id = "endpoint-789"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "code": -1,
+            "message": (
+                '{"error_type": "PluginDaemonInternalServerError", '
+                '"message": "failed to remove endpoint: record not found"}'
+            ),
+        }
+
+        with patch("httpx.request", return_value=mock_response):
+            # Act
+            result = endpoint_client.delete_endpoint(
+                tenant_id=tenant_id,
+                user_id=user_id,
+                endpoint_id=endpoint_id,
+            )
+
+            # Assert - should return True instead of raising an error
+            assert result is True
+
+    def test_delete_endpoint_non_idempotent_other_errors(self, endpoint_client, mock_config):
+        """Test non-idempotent delete behavior for other errors.
+
+        Given:
+            - A valid tenant_id, user_id, and endpoint_id
+            - The plugin daemon returns a different error (not "record not found")
+        When:
+            - delete_endpoint is called
+        Then:
+            - The method should raise PluginDaemonInternalServerError
+        """
+        # Arrange
+        tenant_id = "tenant-123"
+        user_id = "user-456"
+        endpoint_id = "endpoint-789"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "code": -1,
+            "message": (
+                '{"error_type": "PluginDaemonInternalServerError", '
+                '"message": "failed to remove endpoint: internal server error"}'
+            ),
+        }
+
+        with patch("httpx.request", return_value=mock_response):
+            # Act & Assert
+            with pytest.raises(PluginDaemonInternalServerError) as exc_info:
+                endpoint_client.delete_endpoint(
+                    tenant_id=tenant_id,
+                    user_id=user_id,
+                    endpoint_id=endpoint_id,
+                )
+
+            # Assert - the error message should not be "record not found"
+            assert "record not found" not in str(exc_info.value.description)
+
+    def test_delete_endpoint_idempotent_case_insensitive(self, endpoint_client, mock_config):
+        """Test idempotent delete behavior with case-insensitive error message.
+
+        Given:
+            - A valid tenant_id, user_id, and endpoint_id
+            - The plugin daemon returns "Record Not Found" error (different case)
+        When:
+            - delete_endpoint is called
+        Then:
+            - The method should return True (idempotent behavior)
+        """
+        # Arrange
+        tenant_id = "tenant-123"
+        user_id = "user-456"
+        endpoint_id = "endpoint-789"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "code": -1,
+            "message": '{"error_type": "PluginDaemonInternalServerError", "message": "Record Not Found"}',
+        }
+
+        with patch("httpx.request", return_value=mock_response):
+            # Act
+            result = endpoint_client.delete_endpoint(
+                tenant_id=tenant_id,
+                user_id=user_id,
+                endpoint_id=endpoint_id,
+            )
+
+            # Assert - should still return True
+            assert result is True
+
+    def test_delete_endpoint_multiple_calls_idempotent(self, endpoint_client, mock_config):
+        """Test that multiple delete calls are idempotent.
+
+        Given:
+            - A valid tenant_id, user_id, and endpoint_id
+            - The first call succeeds
+            - Subsequent calls return "record not found"
+        When:
+            - delete_endpoint is called multiple times
+        Then:
+            - All calls should return True
+        """
+        # Arrange
+        tenant_id = "tenant-123"
+        user_id = "user-456"
+        endpoint_id = "endpoint-789"
+
+        # First call - success
+        mock_response_success = MagicMock()
+        mock_response_success.status_code = 200
+        mock_response_success.json.return_value = {
+            "code": 0,
+            "message": "success",
+            "data": True,
+        }
+
+        # Second call - record not found
+        mock_response_not_found = MagicMock()
+        mock_response_not_found.status_code = 200
+        mock_response_not_found.json.return_value = {
+            "code": -1,
+            "message": (
+                '{"error_type": "PluginDaemonInternalServerError", '
+                '"message": "failed to remove endpoint: record not found"}'
+            ),
+        }
+
+        with patch("httpx.request") as mock_request:
+            # Act - first call
+            mock_request.return_value = mock_response_success
+            result1 = endpoint_client.delete_endpoint(
+                tenant_id=tenant_id,
+                user_id=user_id,
+                endpoint_id=endpoint_id,
+            )
+
+            # Act - second call (already deleted)
+            mock_request.return_value = mock_response_not_found
+            result2 = endpoint_client.delete_endpoint(
+                tenant_id=tenant_id,
+                user_id=user_id,
+                endpoint_id=endpoint_id,
+            )
+
+            # Assert - both should return True
+            assert result1 is True
+            assert result2 is True
+
+    def test_delete_endpoint_non_idempotent_unauthorized_error(self, endpoint_client, mock_config):
+        """Test that authorization errors are not treated as idempotent.
+
+        Given:
+            - A valid tenant_id, user_id, and endpoint_id
+            - The plugin daemon returns an unauthorized error
+        When:
+            - delete_endpoint is called
+        Then:
+            - The method should raise the appropriate error (not return True)
+        """
+        # Arrange
+        tenant_id = "tenant-123"
+        user_id = "user-456"
+        endpoint_id = "endpoint-789"
+
+        mock_response = MagicMock()
+        mock_response.status_code = 200
+        mock_response.json.return_value = {
+            "code": -1,
+            "message": '{"error_type": "PluginDaemonUnauthorizedError", "message": "unauthorized access"}',
+        }
+
+        with patch("httpx.request", return_value=mock_response):
+            # Act & Assert
+            with pytest.raises(Exception) as exc_info:
+                endpoint_client.delete_endpoint(
+                    tenant_id=tenant_id,
+                    user_id=user_id,
+                    endpoint_id=endpoint_id,
+                )
+
+            # Assert - should not return True for unauthorized errors
+            assert exc_info.value.__class__.__name__ == "PluginDaemonUnauthorizedError"
--- a/api/tests/unit_tests/libs/test_smtp_client.py
+++ b/api/tests/unit_tests/libs/test_smtp_client.py
@@ -1,4 +1,4 @@
-from unittest.mock import MagicMock, patch
+from unittest.mock import ANY, MagicMock, patch

 import pytest

@@ -17,7 +17,7 @@ def test_smtp_plain_success(mock_smtp_cls: MagicMock):
    client = SMTPClient(server="smtp.example.com", port=25, username="", password="", _from="noreply@example.com")
    client.send(_mail())

-    mock_smtp_cls.assert_called_once_with("smtp.example.com", 25, timeout=10)
+    mock_smtp_cls.assert_called_once_with("smtp.example.com", 25, timeout=10, local_hostname=ANY)
    mock_smtp.sendmail.assert_called_once()
    mock_smtp.quit.assert_called_once()

@@ -38,7 +38,7 @@ def test_smtp_tls_opportunistic_success(mock_smtp_cls: MagicMock):
    )
    client.send(_mail())

-    mock_smtp_cls.assert_called_once_with("smtp.example.com", 587, timeout=10)
+    mock_smtp_cls.assert_called_once_with("smtp.example.com", 587, timeout=10, local_hostname=ANY)
    assert mock_smtp.ehlo.call_count == 2
    mock_smtp.starttls.assert_called_once()
    mock_smtp.login.assert_called_once_with("user", "pass")
--- a/api/tests/unit_tests/services/test_messages_clean_service.py
+++ b/api/tests/unit_tests/services/test_messages_clean_service.py
@@ -0,0 +1,627 @@
+import datetime
+from unittest.mock import MagicMock, patch
+
+import pytest
+
+from enums.cloud_plan import CloudPlan
+from services.retention.conversation.messages_clean_policy import (
+    BillingDisabledPolicy,
+    BillingSandboxPolicy,
+    SimpleMessage,
+    create_message_clean_policy,
+)
+from services.retention.conversation.messages_clean_service import MessagesCleanService
+
+
+def make_simple_message(msg_id: str, app_id: str) -> SimpleMessage:
+    """Helper to create a SimpleMessage with a fixed created_at timestamp."""
+    return SimpleMessage(id=msg_id, app_id=app_id, created_at=datetime.datetime(2024, 1, 1))
+
+
+def make_plan_provider(tenant_plans: dict) -> MagicMock:
+    """Helper to create a mock plan_provider that returns the given tenant_plans."""
+    provider = MagicMock()
+    provider.return_value = tenant_plans
+    return provider
+
+
+class TestBillingSandboxPolicyFilterMessageIds:
+    """Unit tests for BillingSandboxPolicy.filter_message_ids method."""
+
+    # Fixed timestamp for deterministic tests
+    CURRENT_TIMESTAMP = 1000000
+    GRACEFUL_PERIOD_DAYS = 8
+    GRACEFUL_PERIOD_SECONDS = GRACEFUL_PERIOD_DAYS * 24 * 60 * 60
+
+    def test_missing_tenant_mapping_excluded(self):
+        """Test that messages with missing app-to-tenant mapping are excluded."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+        ]
+        app_to_tenant = {}  # No mapping
+        tenant_plans = {"tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": -1}}
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert
+        assert list(result) == []
+
+    def test_missing_tenant_plan_excluded(self):
+        """Test that messages with missing tenant plan are excluded (safe default)."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2"}
+        tenant_plans = {}  # No plans
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert
+        assert list(result) == []
+
+    def test_non_sandbox_plan_excluded(self):
+        """Test that messages from non-sandbox plans (PROFESSIONAL/TEAM) are excluded."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+            make_simple_message("msg3", "app3"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2", "app3": "tenant3"}
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.PROFESSIONAL, "expiration_date": -1},
+            "tenant2": {"plan": CloudPlan.TEAM, "expiration_date": -1},
+            "tenant3": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},  # Only this one
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - only msg3 (sandbox tenant) should be included
+        assert set(result) == {"msg3"}
+
+    def test_whitelist_skip(self):
+        """Test that whitelisted tenants are excluded even if sandbox + expired."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),  # Whitelisted - excluded
+            make_simple_message("msg2", "app2"),  # Not whitelisted - included
+            make_simple_message("msg3", "app3"),  # Whitelisted - excluded
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2", "app3": "tenant3"}
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},
+            "tenant2": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},
+            "tenant3": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+        tenant_whitelist = ["tenant1", "tenant3"]
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            tenant_whitelist=tenant_whitelist,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - only msg2 should be included
+        assert set(result) == {"msg2"}
+
+    def test_no_previous_subscription_included(self):
+        """Test that messages with expiration_date=-1 (no previous subscription) are included."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2"}
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},
+            "tenant2": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - all messages should be included
+        assert set(result) == {"msg1", "msg2"}
+
+    def test_within_grace_period_excluded(self):
+        """Test that messages within grace period are excluded."""
+        # Arrange
+        now = self.CURRENT_TIMESTAMP
+        expired_1_day_ago = now - (1 * 24 * 60 * 60)
+        expired_5_days_ago = now - (5 * 24 * 60 * 60)
+        expired_7_days_ago = now - (7 * 24 * 60 * 60)
+
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+            make_simple_message("msg3", "app3"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2", "app3": "tenant3"}
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": expired_1_day_ago},
+            "tenant2": {"plan": CloudPlan.SANDBOX, "expiration_date": expired_5_days_ago},
+            "tenant3": {"plan": CloudPlan.SANDBOX, "expiration_date": expired_7_days_ago},
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,  # 8 days
+            current_timestamp=now,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - all within 8-day grace period, none should be included
+        assert list(result) == []
+
+    def test_exactly_at_boundary_excluded(self):
+        """Test that messages exactly at grace period boundary are excluded (code uses >)."""
+        # Arrange
+        now = self.CURRENT_TIMESTAMP
+        expired_exactly_8_days_ago = now - self.GRACEFUL_PERIOD_SECONDS  # Exactly at boundary
+
+        messages = [make_simple_message("msg1", "app1")]
+        app_to_tenant = {"app1": "tenant1"}
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": expired_exactly_8_days_ago},
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=now,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - exactly at boundary (==) should be excluded (code uses >)
+        assert list(result) == []
+
+    def test_beyond_grace_period_included(self):
+        """Test that messages beyond grace period are included."""
+        # Arrange
+        now = self.CURRENT_TIMESTAMP
+        expired_9_days_ago = now - (9 * 24 * 60 * 60)  # Just beyond 8-day grace
+        expired_30_days_ago = now - (30 * 24 * 60 * 60)  # Well beyond
+
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2"}
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": expired_9_days_ago},
+            "tenant2": {"plan": CloudPlan.SANDBOX, "expiration_date": expired_30_days_ago},
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=now,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - both beyond grace period, should be included
+        assert set(result) == {"msg1", "msg2"}
+
+    def test_empty_messages_returns_empty(self):
+        """Test that empty messages returns empty list."""
+        # Arrange
+        messages: list[SimpleMessage] = []
+        app_to_tenant = {"app1": "tenant1"}
+        plan_provider = make_plan_provider({"tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": -1}})
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert
+        assert list(result) == []
+
+    def test_plan_provider_called_with_correct_tenant_ids(self):
+        """Test that plan_provider is called with correct tenant_ids."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+            make_simple_message("msg3", "app3"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2", "app3": "tenant1"}  # tenant1 appears twice
+        plan_provider = make_plan_provider({})
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            current_timestamp=self.CURRENT_TIMESTAMP,
+        )
+
+        # Act
+        policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - plan_provider should be called once with unique tenant_ids
+        plan_provider.assert_called_once()
+        called_tenant_ids = set(plan_provider.call_args[0][0])
+        assert called_tenant_ids == {"tenant1", "tenant2"}
+
+    def test_complex_mixed_scenario(self):
+        """Test complex scenario with mixed plans, expirations, whitelist, and missing mappings."""
+        # Arrange
+        now = self.CURRENT_TIMESTAMP
+        sandbox_expired_old = now - (15 * 24 * 60 * 60)  # Beyond grace
+        sandbox_expired_recent = now - (3 * 24 * 60 * 60)  # Within grace
+        future_expiration = now + (30 * 24 * 60 * 60)
+
+        messages = [
+            make_simple_message("msg1", "app1"),  # Sandbox, no subscription - included
+            make_simple_message("msg2", "app2"),  # Sandbox, expired old - included
+            make_simple_message("msg3", "app3"),  # Sandbox, within grace - excluded
+            make_simple_message("msg4", "app4"),  # Team plan, active - excluded
+            make_simple_message("msg5", "app5"),  # No tenant mapping - excluded
+            make_simple_message("msg6", "app6"),  # No plan info - excluded
+            make_simple_message("msg7", "app7"),  # Sandbox, expired old, whitelisted - excluded
+        ]
+        app_to_tenant = {
+            "app1": "tenant1",
+            "app2": "tenant2",
+            "app3": "tenant3",
+            "app4": "tenant4",
+            "app6": "tenant6",  # Has mapping but no plan
+            "app7": "tenant7",
+            # app5 has no mapping
+        }
+        tenant_plans = {
+            "tenant1": {"plan": CloudPlan.SANDBOX, "expiration_date": -1},
+            "tenant2": {"plan": CloudPlan.SANDBOX, "expiration_date": sandbox_expired_old},
+            "tenant3": {"plan": CloudPlan.SANDBOX, "expiration_date": sandbox_expired_recent},
+            "tenant4": {"plan": CloudPlan.TEAM, "expiration_date": future_expiration},
+            "tenant7": {"plan": CloudPlan.SANDBOX, "expiration_date": sandbox_expired_old},
+            # tenant6 has no plan
+        }
+        plan_provider = make_plan_provider(tenant_plans)
+        tenant_whitelist = ["tenant7"]
+
+        policy = BillingSandboxPolicy(
+            plan_provider=plan_provider,
+            graceful_period_days=self.GRACEFUL_PERIOD_DAYS,
+            tenant_whitelist=tenant_whitelist,
+            current_timestamp=now,
+        )
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - only msg1 and msg2 should be included
+        assert set(result) == {"msg1", "msg2"}
+
+
+class TestBillingDisabledPolicyFilterMessageIds:
+    """Unit tests for BillingDisabledPolicy.filter_message_ids method."""
+
+    def test_returns_all_message_ids(self):
+        """Test that all message IDs are returned (order-preserving)."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+            make_simple_message("msg3", "app3"),
+        ]
+        app_to_tenant = {"app1": "tenant1", "app2": "tenant2"}
+
+        policy = BillingDisabledPolicy()
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - all message IDs returned in order
+        assert list(result) == ["msg1", "msg2", "msg3"]
+
+    def test_ignores_app_to_tenant(self):
+        """Test that app_to_tenant mapping is ignored."""
+        # Arrange
+        messages = [
+            make_simple_message("msg1", "app1"),
+            make_simple_message("msg2", "app2"),
+        ]
+        app_to_tenant: dict[str, str] = {}  # Empty - should be ignored
+
+        policy = BillingDisabledPolicy()
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert - all message IDs still returned
+        assert list(result) == ["msg1", "msg2"]
+
+    def test_empty_messages_returns_empty(self):
+        """Test that empty messages returns empty list."""
+        # Arrange
+        messages: list[SimpleMessage] = []
+        app_to_tenant = {"app1": "tenant1"}
+
+        policy = BillingDisabledPolicy()
+
+        # Act
+        result = policy.filter_message_ids(messages, app_to_tenant)
+
+        # Assert
+        assert list(result) == []
+
+
+class TestCreateMessageCleanPolicy:
+    """Unit tests for create_message_clean_policy factory function."""
+
+    @patch("services.retention.conversation.messages_clean_policy.dify_config")
+    def test_billing_disabled_returns_billing_disabled_policy(self, mock_config):
+        """Test that BILLING_ENABLED=False returns BillingDisabledPolicy."""
+        # Arrange
+        mock_config.BILLING_ENABLED = False
+
+        # Act
+        policy = create_message_clean_policy(graceful_period_days=21)
+
+        # Assert
+        assert isinstance(policy, BillingDisabledPolicy)
+
+    @patch("services.retention.conversation.messages_clean_policy.BillingService")
+    @patch("services.retention.conversation.messages_clean_policy.dify_config")
+    def test_billing_enabled_policy_has_correct_internals(self, mock_config, mock_billing_service):
+        """Test that BillingSandboxPolicy is created with correct internal values."""
+        # Arrange
+        mock_config.BILLING_ENABLED = True
+        whitelist = ["tenant1", "tenant2"]
+        mock_billing_service.get_expired_subscription_cleanup_whitelist.return_value = whitelist
+        mock_plan_provider = MagicMock()
+        mock_billing_service.get_plan_bulk_with_cache = mock_plan_provider
+
+        # Act
+        policy = create_message_clean_policy(graceful_period_days=14, current_timestamp=1234567)
+
+        # Assert
+        mock_billing_service.get_expired_subscription_cleanup_whitelist.assert_called_once()
+        assert isinstance(policy, BillingSandboxPolicy)
+        assert policy._graceful_period_days == 14
+        assert list(policy._tenant_whitelist) == whitelist
+        assert policy._plan_provider == mock_plan_provider
+        assert policy._current_timestamp == 1234567
+
+
+class TestMessagesCleanServiceFromTimeRange:
+    """Unit tests for MessagesCleanService.from_time_range factory method."""
+
+    def test_start_from_end_before_raises_value_error(self):
+        """Test that start_from == end_before raises ValueError."""
+        policy = BillingDisabledPolicy()
+
+        # Arrange
+        same_time = datetime.datetime(2024, 1, 1, 12, 0, 0)
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="start_from .* must be less than end_before"):
+            MessagesCleanService.from_time_range(
+                policy=policy,
+                start_from=same_time,
+                end_before=same_time,
+            )
+
+        # Arrange
+        start_from = datetime.datetime(2024, 12, 31)
+        end_before = datetime.datetime(2024, 1, 1)
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="start_from .* must be less than end_before"):
+            MessagesCleanService.from_time_range(
+                policy=policy,
+                start_from=start_from,
+                end_before=end_before,
+            )
+
+    def test_batch_size_raises_value_error(self):
+        """Test that batch_size=0 raises ValueError."""
+        # Arrange
+        start_from = datetime.datetime(2024, 1, 1)
+        end_before = datetime.datetime(2024, 2, 1)
+        policy = BillingDisabledPolicy()
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="batch_size .* must be greater than 0"):
+            MessagesCleanService.from_time_range(
+                policy=policy,
+                start_from=start_from,
+                end_before=end_before,
+                batch_size=0,
+            )
+
+        start_from = datetime.datetime(2024, 1, 1)
+        end_before = datetime.datetime(2024, 2, 1)
+        policy = BillingDisabledPolicy()
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="batch_size .* must be greater than 0"):
+            MessagesCleanService.from_time_range(
+                policy=policy,
+                start_from=start_from,
+                end_before=end_before,
+                batch_size=-100,
+            )
+
+    def test_valid_params_creates_instance(self):
+        """Test that valid parameters create a correctly configured instance."""
+        # Arrange
+        start_from = datetime.datetime(2024, 1, 1, 0, 0, 0)
+        end_before = datetime.datetime(2024, 12, 31, 23, 59, 59)
+        policy = BillingDisabledPolicy()
+        batch_size = 500
+        dry_run = True
+
+        # Act
+        service = MessagesCleanService.from_time_range(
+            policy=policy,
+            start_from=start_from,
+            end_before=end_before,
+            batch_size=batch_size,
+            dry_run=dry_run,
+        )
+
+        # Assert
+        assert isinstance(service, MessagesCleanService)
+        assert service._policy is policy
+        assert service._start_from == start_from
+        assert service._end_before == end_before
+        assert service._batch_size == batch_size
+        assert service._dry_run == dry_run
+
+    def test_default_params(self):
+        """Test that default parameters are applied correctly."""
+        # Arrange
+        start_from = datetime.datetime(2024, 1, 1)
+        end_before = datetime.datetime(2024, 2, 1)
+        policy = BillingDisabledPolicy()
+
+        # Act
+        service = MessagesCleanService.from_time_range(
+            policy=policy,
+            start_from=start_from,
+            end_before=end_before,
+        )
+
+        # Assert
+        assert service._batch_size == 1000  # default
+        assert service._dry_run is False  # default
+
+
+class TestMessagesCleanServiceFromDays:
+    """Unit tests for MessagesCleanService.from_days factory method."""
+
+    def test_days_raises_value_error(self):
+        """Test that days < 0 raises ValueError."""
+        # Arrange
+        policy = BillingDisabledPolicy()
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="days .* must be greater than or equal to 0"):
+            MessagesCleanService.from_days(policy=policy, days=-1)
+
+        # Act
+        with patch("services.retention.conversation.messages_clean_service.datetime") as mock_datetime:
+            fixed_now = datetime.datetime(2024, 6, 15, 14, 0, 0)
+            mock_datetime.datetime.now.return_value = fixed_now
+            mock_datetime.timedelta = datetime.timedelta
+
+            service = MessagesCleanService.from_days(policy=policy, days=0)
+
+        # Assert
+        assert service._end_before == fixed_now
+
+    def test_batch_size_raises_value_error(self):
+        """Test that batch_size=0 raises ValueError."""
+        # Arrange
+        policy = BillingDisabledPolicy()
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="batch_size .* must be greater than 0"):
+            MessagesCleanService.from_days(policy=policy, days=30, batch_size=0)
+
+        # Act & Assert
+        with pytest.raises(ValueError, match="batch_size .* must be greater than 0"):
+            MessagesCleanService.from_days(policy=policy, days=30, batch_size=-500)
+
+    def test_valid_params_creates_instance(self):
+        """Test that valid parameters create a correctly configured instance."""
+        # Arrange
+        policy = BillingDisabledPolicy()
+        days = 90
+        batch_size = 500
+        dry_run = True
+
+        # Act
+        with patch("services.retention.conversation.messages_clean_service.datetime") as mock_datetime:
+            fixed_now = datetime.datetime(2024, 6, 15, 10, 30, 0)
+            mock_datetime.datetime.now.return_value = fixed_now
+            mock_datetime.timedelta = datetime.timedelta
+
+            service = MessagesCleanService.from_days(
+                policy=policy,
+                days=days,
+                batch_size=batch_size,
+                dry_run=dry_run,
+            )
+
+        # Assert
+        expected_end_before = fixed_now - datetime.timedelta(days=days)
+        assert isinstance(service, MessagesCleanService)
+        assert service._policy is policy
+        assert service._start_from is None
+        assert service._end_before == expected_end_before
+        assert service._batch_size == batch_size
+        assert service._dry_run == dry_run
+
+    def test_default_params(self):
+        """Test that default parameters are applied correctly."""
+        # Arrange
+        policy = BillingDisabledPolicy()
+
+        # Act
+        with patch("services.retention.conversation.messages_clean_service.datetime") as mock_datetime:
+            fixed_now = datetime.datetime(2024, 6, 15, 10, 30, 0)
+            mock_datetime.datetime.now.return_value = fixed_now
+            mock_datetime.timedelta = datetime.timedelta
+
+            service = MessagesCleanService.from_days(policy=policy)
+
+        # Assert
+        expected_end_before = fixed_now - datetime.timedelta(days=30)  # default days=30
+        assert service._end_before == expected_end_before
+        assert service._batch_size == 1000  # default
+        assert service._dry_run is False  # default
--- a/api/tests/unit_tests/tasks/test_mail_send_task.py
+++ b/api/tests/unit_tests/tasks/test_mail_send_task.py
@@ -9,7 +9,7 @@ This module tests the mail sending functionality including:
 """

 import smtplib
-from unittest.mock import MagicMock, patch
+from unittest.mock import ANY, MagicMock, patch

 import pytest

@@ -151,7 +151,7 @@ class TestSMTPIntegration:
        client.send(mail_data)

        # Assert
-        mock_smtp_ssl.assert_called_once_with("smtp.example.com", 465, timeout=10)
+        mock_smtp_ssl.assert_called_once_with("smtp.example.com", 465, timeout=10, local_hostname=ANY)
        mock_server.login.assert_called_once_with("user@example.com", "password123")
        mock_server.sendmail.assert_called_once()
        mock_server.quit.assert_called_once()
@@ -181,7 +181,7 @@ class TestSMTPIntegration:
        client.send(mail_data)

        # Assert
-        mock_smtp.assert_called_once_with("smtp.example.com", 587, timeout=10)
+        mock_smtp.assert_called_once_with("smtp.example.com", 587, timeout=10, local_hostname=ANY)
        mock_server.ehlo.assert_called()
        mock_server.starttls.assert_called_once()
        assert mock_server.ehlo.call_count == 2  # Before and after STARTTLS
@@ -213,7 +213,7 @@ class TestSMTPIntegration:
        client.send(mail_data)

        # Assert
-        mock_smtp.assert_called_once_with("smtp.example.com", 25, timeout=10)
+        mock_smtp.assert_called_once_with("smtp.example.com", 25, timeout=10, local_hostname=ANY)
        mock_server.login.assert_called_once()
        mock_server.sendmail.assert_called_once()
        mock_server.quit.assert_called_once()
--- a/api/uv.lock
+++ b/api/uv.lock
--- a/docker/.env.example
+++ b/docker/.env.example
@@ -968,6 +968,8 @@ SMTP_USERNAME=
 SMTP_PASSWORD=
 SMTP_USE_TLS=true
 SMTP_OPPORTUNISTIC_TLS=false
+# Optional: override the local hostname used for SMTP HELO/EHLO
+SMTP_LOCAL_HOSTNAME=

 # Sendgid configuration
 SENDGRID_API_KEY=
--- a/docker/docker-compose-template.yaml
+++ b/docker/docker-compose-template.yaml
@@ -21,7 +21,7 @@ services:

  # API service
  api:
-    image: langgenius/dify-api:1.11.3
+    image: langgenius/dify-api:1.11.4
    restart: always
    environment:
      # Use the shared environment variables.
@@ -63,7 +63,7 @@ services:
  # worker service
  # The Celery worker for processing all queues (dataset, workflow, mail, etc.)
  worker:
-    image: langgenius/dify-api:1.11.3
+    image: langgenius/dify-api:1.11.4
    restart: always
    environment:
      # Use the shared environment variables.
@@ -102,7 +102,7 @@ services:
  # worker_beat service
  # Celery beat for scheduling periodic tasks.
  worker_beat:
-    image: langgenius/dify-api:1.11.3
+    image: langgenius/dify-api:1.11.4
    restart: always
    environment:
      # Use the shared environment variables.
@@ -132,7 +132,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.11.3
+    image: langgenius/dify-web:1.11.4
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
--- a/docker/docker-compose.yaml
+++ b/docker/docker-compose.yaml
@@ -425,6 +425,7 @@ x-shared-env: &shared-api-worker-env
  SMTP_PASSWORD: ${SMTP_PASSWORD:-}
  SMTP_USE_TLS: ${SMTP_USE_TLS:-true}
  SMTP_OPPORTUNISTIC_TLS: ${SMTP_OPPORTUNISTIC_TLS:-false}
+  SMTP_LOCAL_HOSTNAME: ${SMTP_LOCAL_HOSTNAME:-}
  SENDGRID_API_KEY: ${SENDGRID_API_KEY:-}
  INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH: ${INDEXING_MAX_SEGMENTATION_TOKENS_LENGTH:-4000}
  INVITE_EXPIRY_HOURS: ${INVITE_EXPIRY_HOURS:-72}
@@ -704,7 +705,7 @@ services:

  # API service
  api:
-    image: langgenius/dify-api:1.11.3
+    image: langgenius/dify-api:1.11.4
    restart: always
    environment:
      # Use the shared environment variables.
@@ -746,7 +747,7 @@ services:
  # worker service
  # The Celery worker for processing all queues (dataset, workflow, mail, etc.)
  worker:
-    image: langgenius/dify-api:1.11.3
+    image: langgenius/dify-api:1.11.4
    restart: always
    environment:
      # Use the shared environment variables.
@@ -785,7 +786,7 @@ services:
  # worker_beat service
  # Celery beat for scheduling periodic tasks.
  worker_beat:
-    image: langgenius/dify-api:1.11.3
+    image: langgenius/dify-api:1.11.4
    restart: always
    environment:
      # Use the shared environment variables.
@@ -815,7 +816,7 @@ services:

  # Frontend web application.
  web:
-    image: langgenius/dify-web:1.11.3
+    image: langgenius/dify-web:1.11.4
    restart: always
    environment:
      CONSOLE_API_URL: ${CONSOLE_API_URL:-}
--- a/web/.nvmrc
+++ b/web/.nvmrc
@@ -1 +1 @@
-22.21.1
+24
--- a/web/Dockerfile
+++ b/web/Dockerfile
@@ -1,5 +1,5 @@
 # base image
-FROM node:22.21.1-alpine3.23 AS base
+FROM node:24-alpine AS base
 LABEL maintainer="takatost@gmail.com"

 # if you located in China, you can use aliyun mirror to speed up
--- a/web/README.md
+++ b/web/README.md
@@ -8,8 +8,8 @@ This is a [Next.js](https://nextjs.org/) project bootstrapped with [`create-next

 Before starting the web frontend service, please make sure the following environment is ready.

- [Node.js](https://nodejs.org) >= v22.11.x
- [pnpm](https://pnpm.io) v10.x
+- [Node.js](https://nodejs.org)
+- [pnpm](https://pnpm.io)

 > [!TIP]
 > It is recommended to install and enable Corepack to manage package manager versions automatically:
--- a/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/card-view.tsx
+++ b/web/app/(commonLayout)/app/(appDetailLayout)/[appId]/overview/card-view.tsx
@@ -65,15 +65,17 @@ const CardView: FC<ICardViewProps> = ({ appId, isInPanel, className }) => {
      <div className="text-xs text-text-secondary">
        {t('overview.disableTooltip.triggerMode', { ns: 'appOverview', feature: featureName })}
      </div>
-      <div
-        className="cursor-pointer text-xs font-medium text-text-accent hover:underline"
+      <a
+        href={triggerDocUrl}
+        target="_blank"
+        rel="noopener noreferrer"
+        className="block cursor-pointer text-xs font-medium text-text-accent hover:underline"
        onClick={(event) => {
          event.stopPropagation()
-          window.open(triggerDocUrl, '_blank')
        }}
      >
        {t('overview.appInfo.enableTooltip.learnMore', { ns: 'appOverview' })}
-      </div>
+      </a>
    </div>
  ), [t, triggerDocUrl])

--- a/web/app/(shareLayout)/webapp-signin/check-code/page.tsx
+++ b/web/app/(shareLayout)/webapp-signin/check-code/page.tsx
@@ -66,7 +66,9 @@ export default function CheckCode() {
      setIsLoading(true)
      const ret = await webAppEmailLoginWithCode({ email, code: encryptVerificationCode(code), token })
      if (ret.result === 'success') {
-        setWebAppAccessToken(ret.data.access_token)
+        if (ret?.data?.access_token) {
+          setWebAppAccessToken(ret.data.access_token)
+        }
        const { access_token } = await fetchAccessToken({
          appCode: appCode!,
          userId: embeddedUserId || undefined,
--- a/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx
+++ b/web/app/(shareLayout)/webapp-signin/components/mail-and-password-auth.tsx
@@ -82,7 +82,9 @@ export default function MailAndPasswordAuth({ isEmailSetup }: MailAndPasswordAut
        body: loginData,
      })
      if (res.result === 'success') {
-        setWebAppAccessToken(res.data.access_token)
+        if (res?.data?.access_token) {
+          setWebAppAccessToken(res.data.access_token)
+        }

        const { access_token } = await fetchAccessToken({
          appCode: appCode!,
--- a/web/app/components/app/configuration/config-var/config-modal/index.tsx
+++ b/web/app/components/app/configuration/config-var/config-modal/index.tsx
@@ -21,7 +21,6 @@ import CodeEditor from '@/app/components/workflow/nodes/_base/components/editor/
 import FileUploadSetting from '@/app/components/workflow/nodes/_base/components/file-upload-setting'
 import { CodeLanguage } from '@/app/components/workflow/nodes/code/types'
 import { ChangeType, InputVarType, SupportUploadFileTypes } from '@/app/components/workflow/types'
-import { DEFAULT_VALUE_MAX_LEN } from '@/config'
 import ConfigContext from '@/context/debug-configuration'
 import { AppModeEnum, TransferMethod } from '@/types/app'
 import { checkKeys, getNewVarInWorkflow, replaceSpaceWithUnderscoreInVarNameInput } from '@/utils/var'
@@ -198,8 +197,6 @@ const ConfigModal: FC<IConfigModalProps> = ({
        if (type === InputVarType.multiFiles)
          draft.max_length = DEFAULT_FILE_UPLOAD_SETTING.max_length
      }
-      if (type === InputVarType.paragraph)
-        draft.max_length = DEFAULT_VALUE_MAX_LEN
    })
    setTempPayload(newPayload)
  }, [tempPayload])
--- a/web/app/components/app/configuration/config-var/index.tsx
+++ b/web/app/components/app/configuration/config-var/index.tsx
@@ -15,7 +15,6 @@ import Confirm from '@/app/components/base/confirm'
 import Toast from '@/app/components/base/toast'
 import Tooltip from '@/app/components/base/tooltip'
 import { InputVarType } from '@/app/components/workflow/types'
-import { DEFAULT_VALUE_MAX_LEN } from '@/config'
 import ConfigContext from '@/context/debug-configuration'
 import { useEventEmitterContextContext } from '@/context/event-emitter'
 import { useModalContext } from '@/context/modal-context'
@@ -58,8 +57,6 @@ const buildPromptVariableFromInput = (payload: InputVar): PromptVariable => {
    key: variable,
    name: label as string,
  }
-  if (payload.type === InputVarType.textInput)
-    nextItem.max_length = nextItem.max_length || DEFAULT_VALUE_MAX_LEN

  if (payload.type !== InputVarType.select)
    delete nextItem.options
--- a/web/app/components/app/configuration/debug/chat-user-input.tsx
+++ b/web/app/components/app/configuration/debug/chat-user-input.tsx
@@ -7,7 +7,6 @@ import Input from '@/app/components/base/input'
 import Select from '@/app/components/base/select'
 import Textarea from '@/app/components/base/textarea'
 import BoolInput from '@/app/components/workflow/nodes/_base/components/before-run-form/bool-input'
-import { DEFAULT_VALUE_MAX_LEN } from '@/config'
 import ConfigContext from '@/context/debug-configuration'
 import { cn } from '@/utils/classnames'

@@ -88,7 +87,7 @@ const ChatUserInput = ({
                    onChange={(e) => { handleInputValueChange(key, e.target.value) }}
                    placeholder={name}
                    autoFocus={index === 0}
-                    maxLength={max_length || DEFAULT_VALUE_MAX_LEN}
+                    maxLength={max_length}
                  />
                )}
                {type === 'paragraph' && (
@@ -115,7 +114,7 @@ const ChatUserInput = ({
                    onChange={(e) => { handleInputValueChange(key, e.target.value) }}
                    placeholder={name}
                    autoFocus={index === 0}
-                    maxLength={max_length || DEFAULT_VALUE_MAX_LEN}
+                    maxLength={max_length}
                  />
                )}
                {type === 'checkbox' && (
--- a/web/app/components/app/configuration/prompt-value-panel/index.tsx
+++ b/web/app/components/app/configuration/prompt-value-panel/index.tsx
@@ -20,7 +20,6 @@ import Select from '@/app/components/base/select'
 import Textarea from '@/app/components/base/textarea'
 import Tooltip from '@/app/components/base/tooltip'
 import BoolInput from '@/app/components/workflow/nodes/_base/components/before-run-form/bool-input'
-import { DEFAULT_VALUE_MAX_LEN } from '@/config'
 import ConfigContext from '@/context/debug-configuration'
 import { AppModeEnum, ModelModeType } from '@/types/app'
 import { cn } from '@/utils/classnames'
@@ -142,7 +141,7 @@ const PromptValuePanel: FC<IPromptValuePanelProps> = ({
                        onChange={(e) => { handleInputValueChange(key, e.target.value) }}
                        placeholder={name}
                        autoFocus={index === 0}
-                        maxLength={max_length || DEFAULT_VALUE_MAX_LEN}
+                        maxLength={max_length}
                      />
                    )}
                    {type === 'paragraph' && (
@@ -170,7 +169,7 @@ const PromptValuePanel: FC<IPromptValuePanelProps> = ({
                        onChange={(e) => { handleInputValueChange(key, e.target.value) }}
                        placeholder={name}
                        autoFocus={index === 0}
-                        maxLength={max_length || DEFAULT_VALUE_MAX_LEN}
+                        maxLength={max_length}
                      />
                    )}
                    {type === 'checkbox' && (
--- a/web/app/components/apps/list.tsx
+++ b/web/app/components/apps/list.tsx
@@ -12,7 +12,6 @@ import { useDebounceFn } from 'ahooks'
 import dynamic from 'next/dynamic'
 import {
  useRouter,
-  useSearchParams,
 } from 'next/navigation'
 import { parseAsString, useQueryState } from 'nuqs'
 import { useCallback, useEffect, useRef, useState } from 'react'
@@ -29,7 +28,6 @@ import { CheckModal } from '@/hooks/use-pay'
 import { useInfiniteAppList } from '@/service/use-apps'
 import { AppModeEnum } from '@/types/app'
 import { cn } from '@/utils/classnames'
-import { isServer } from '@/utils/client'
 import AppCard from './app-card'
 import { AppCardSkeleton } from './app-card-skeleton'
 import Empty from './empty'
@@ -59,7 +57,6 @@ const List = () => {
  const { t } = useTranslation()
  const { systemFeatures } = useGlobalPublicStore()
  const router = useRouter()
-  const searchParams = useSearchParams()
  const { isCurrentWorkspaceEditor, isCurrentWorkspaceDatasetOperator, isLoadingCurrentWorkspace } = useAppContext()
  const showTagManagementModal = useTagStore(s => s.showTagManagementModal)
  const [activeTab, setActiveTab] = useQueryState(
@@ -67,33 +64,6 @@ const List = () => {
    parseAsString.withDefault('all').withOptions({ history: 'push' }),
  )

-  // valid tabs for apps list; anything else should fallback to 'all'
-
-  // 1) Normalize legacy/incorrect query params like ?mode=discover -> ?category=all
-  useEffect(() => {
-    // avoid running on server
-    if (isServer)
-      return
-    const mode = searchParams.get('mode')
-    if (!mode)
-      return
-    const url = new URL(window.location.href)
-    url.searchParams.delete('mode')
-    if (validTabs.has(mode)) {
-      // migrate to category key
-      url.searchParams.set('category', mode)
-    }
-    else {
-      url.searchParams.set('category', 'all')
-    }
-    router.replace(url.pathname + url.search)
-  }, [router, searchParams])
-
-  // 2) If category has an invalid value (e.g., 'discover'), reset to 'all'
-  useEffect(() => {
-    if (!validTabs.has(activeTab))
-      setActiveTab('all')
-  }, [activeTab, setActiveTab])
  const { query: { tagIDs = [], keywords = '', isCreatedByMe: queryIsCreatedByMe = false }, setQuery } = useAppsQueryState()
  const [isCreatedByMe, setIsCreatedByMe] = useState(queryIsCreatedByMe)
  const [tagFilterValue, setTagFilterValue] = useState<string[]>(tagIDs)
--- a/web/app/components/base/markdown-blocks/code-block.tsx
+++ b/web/app/components/base/markdown-blocks/code-block.tsx
@@ -16,6 +16,7 @@ import { Theme } from '@/types/app'
 import SVGRenderer from '../svg-gallery' // Assumes svg-gallery.tsx is in /base directory

 const Flowchart = dynamic(() => import('@/app/components/base/mermaid'), { ssr: false })
+const QuadrantMatrix = dynamic(() => import('@/app/components/base/quadrant-matrix'), { ssr: false })

 // Available language https://github.com/react-syntax-highlighter/react-syntax-highlighter/blob/master/AVAILABLE_LANGUAGES_HLJS.MD
 const capitalizationLanguageNameMap: Record<string, string> = {
@@ -40,6 +41,7 @@ const capitalizationLanguageNameMap: Record<string, string> = {
  latex: 'Latex',
  svg: 'SVG',
  abc: 'ABC',
+  quadrant: 'Quadrant',
 }
 const getCorrectCapitalizationLanguageName = (language: string) => {
  if (!language)
@@ -409,6 +411,12 @@ const CodeBlock: any = memo(({ inline, className, children = '', ...props }: any
            <MarkdownMusic children={content} />
          </ErrorBoundary>
        )
+      case 'quadrant':
+        return (
+          <ErrorBoundary>
+            <QuadrantMatrix content={content} />
+          </ErrorBoundary>
+        )
      default:
        return (
          <SyntaxHighlighter
--- a/web/app/components/base/quadrant-matrix/index.tsx
+++ b/web/app/components/base/quadrant-matrix/index.tsx
@@ -0,0 +1,153 @@
+'use client'
+import type { FC } from 'react'
+import type { QuadrantData } from './types'
+import { RiExpandDiagonalLine } from '@remixicon/react'
+import { useCallback, useMemo, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import ActionButton from '@/app/components/base/action-button'
+import FullScreenModal from '@/app/components/base/fullscreen-modal'
+import QuadrantCard from './quadrant-card'
+import { isValidQuadrantData, QUADRANT_CONFIGS } from './types'
+
+type QuadrantMatrixProps = {
+  content: string
+}
+
+const QuadrantMatrix: FC<QuadrantMatrixProps> = ({ content }) => {
+  const { t } = useTranslation()
+  const [isExpanded, setIsExpanded] = useState(false)
+
+  const parsedData = useMemo<QuadrantData | null>(() => {
+    try {
+      const trimmed = content.trim()
+      const data = JSON.parse(trimmed)
+
+      if (!isValidQuadrantData(data))
+        return null
+
+      return data
+    }
+    catch {
+      return null
+    }
+  }, [content])
+
+  const handleExpand = useCallback(() => {
+    setIsExpanded(true)
+  }, [])
+
+  const handleClose = useCallback(() => {
+    setIsExpanded(false)
+  }, [])
+
+  if (!parsedData) {
+    return (
+      <div className="flex items-center justify-center rounded-xl bg-components-panel-bg-blur p-8">
+        <div className="text-center text-text-secondary">
+          <div className="system-md-semibold mb-2">{t('quadrantMatrix.invalidData', { ns: 'app' })}</div>
+          <div className="text-sm text-text-tertiary">
+            {t('quadrantMatrix.invalidDataDesc', { ns: 'app' })}
+          </div>
+        </div>
+      </div>
+    )
+  }
+
+  const totalTasks
+    = parsedData.q1.length
+      + parsedData.q2.length
+      + parsedData.q3.length
+      + parsedData.q4.length
+
+  // Shared grid content component
+  const renderGrid = (expanded: boolean) => (
+    <div className="grid grid-cols-2 gap-3">
+      {/* Row 1: Q1 (Do First), Q2 (Schedule) */}
+      <QuadrantCard
+        config={QUADRANT_CONFIGS.q1}
+        tasks={parsedData.q1}
+        expanded={expanded}
+      />
+      <QuadrantCard
+        config={QUADRANT_CONFIGS.q2}
+        tasks={parsedData.q2}
+        expanded={expanded}
+      />
+
+      {/* Row 2: Q3 (Delegate), Q4 (Don't Do) */}
+      <QuadrantCard
+        config={QUADRANT_CONFIGS.q3}
+        tasks={parsedData.q3}
+        expanded={expanded}
+      />
+      <QuadrantCard
+        config={QUADRANT_CONFIGS.q4}
+        tasks={parsedData.q4}
+        expanded={expanded}
+      />
+    </div>
+  )
+
+  return (
+    <>
+      <div className="w-full overflow-hidden rounded-xl bg-components-panel-bg-blur p-4">
+        {/* Header */}
+        <div className="mb-4 flex items-center justify-between">
+          <div>
+            <div className="system-md-semibold text-text-primary">
+              {t('quadrantMatrix.title', { ns: 'app' })}
+            </div>
+            <div className="text-xs text-text-tertiary">
+              {t('quadrantMatrix.taskCount', { ns: 'app', count: totalTasks })}
+            </div>
+          </div>
+          {/* Legend + Expand Button */}
+          <div className="flex items-center gap-3">
+            <div className="flex items-center gap-3 text-[11px] text-text-quaternary">
+              <span>{t('quadrantMatrix.legend.importance', { ns: 'app' })}</span>
+              <span>{t('quadrantMatrix.legend.urgency', { ns: 'app' })}</span>
+            </div>
+            <ActionButton onClick={handleExpand}>
+              <RiExpandDiagonalLine className="h-4 w-4" />
+            </ActionButton>
+          </div>
+        </div>
+
+        {/* 2x2 Grid */}
+        {renderGrid(false)}
+      </div>
+
+      {/* Fullscreen Modal */}
+      <FullScreenModal
+        open={isExpanded}
+        onClose={handleClose}
+        closable
+      >
+        <div className="flex h-full flex-col p-6">
+          {/* Modal Header */}
+          <div className="mb-6 flex items-center justify-between">
+            <div>
+              <div className="text-xl font-semibold text-text-primary">
+                {t('quadrantMatrix.title', { ns: 'app' })}
+              </div>
+              <div className="text-sm text-text-tertiary">
+                {t('quadrantMatrix.taskCount', { ns: 'app', count: totalTasks })}
+              </div>
+            </div>
+            <div className="flex items-center gap-3 text-sm text-text-quaternary">
+              <span>{t('quadrantMatrix.legend.importance', { ns: 'app' })}</span>
+              <span>{t('quadrantMatrix.legend.urgency', { ns: 'app' })}</span>
+            </div>
+          </div>
+
+          {/* Expanded Grid */}
+          <div className="min-h-0 flex-1">
+            {renderGrid(true)}
+          </div>
+        </div>
+      </FullScreenModal>
+    </>
+  )
+}
+
+export default QuadrantMatrix
--- a/web/app/components/base/quadrant-matrix/quadrant-card.tsx
+++ b/web/app/components/base/quadrant-matrix/quadrant-card.tsx
@@ -0,0 +1,102 @@
+'use client'
+import type { FC } from 'react'
+import type { QuadrantConfig, Task } from './types'
+import { useTranslation } from 'react-i18next'
+import { cn } from '@/utils/classnames'
+import TaskItem from './task-item'
+
+type QuadrantCardProps = {
+  config: QuadrantConfig
+  tasks: Task[]
+  expanded?: boolean
+  maxDisplay?: number
+}
+
+const QuadrantCard: FC<QuadrantCardProps> = ({
+  config,
+  tasks,
+  expanded = false,
+  maxDisplay = 3,
+}) => {
+  const { t } = useTranslation()
+  const { number, titleKey, subtitleKey, bgClass, borderClass, titleClass } = config
+  const displayLimit = expanded ? Infinity : maxDisplay
+  const displayTasks = tasks.slice(0, displayLimit)
+  const remainingCount = Math.max(0, tasks.length - displayLimit)
+
+  return (
+    <div
+      className={cn(
+        'flex min-w-0 flex-col rounded-xl border p-3',
+        bgClass,
+        borderClass,
+        expanded ? 'min-h-[280px]' : 'min-h-[200px]',
+      )}
+    >
+      {/* Header with numbered circle */}
+      <div className="mb-2 shrink-0">
+        <div className="flex items-center gap-2">
+          {/* Numbered circle */}
+          <span className={cn(
+            'flex h-5 w-5 items-center justify-center rounded-full border text-xs font-semibold',
+            borderClass,
+            titleClass,
+          )}
+          >
+            {number}
+          </span>
+          <span className={cn('system-sm-semibold', titleClass)}>{t(titleKey, { ns: 'app' })}</span>
+          {tasks.length > 0 && (
+            <span className="bg-components-badge-bg-gray rounded-full px-1.5 py-0.5 text-[10px] font-medium text-text-tertiary">
+              {tasks.length}
+            </span>
+          )}
+        </div>
+        <div className="text-[11px] text-text-tertiary">{t(subtitleKey, { ns: 'app' })}</div>
+      </div>
+
+      {/* Task List */}
+      <div className={cn(
+        'flex min-h-0 flex-1 flex-col gap-2',
+        expanded && 'overflow-y-auto',
+      )}
+      >
+        {displayTasks.length > 0
+          ? (
+              displayTasks.map((task) => {
+                const taskKey = [
+                  task.name,
+                  task.deadline ?? 'no-deadline',
+                  task.importance_score,
+                  task.urgency_score,
+                  task.description ?? '',
+                  task.action_advice ?? '',
+                ].join('|')
+
+                return (
+                  <TaskItem
+                    key={taskKey}
+                    task={task}
+                    expanded={expanded}
+                  />
+                )
+              })
+            )
+          : (
+              <div className="flex flex-1 items-center justify-center text-xs text-text-quaternary">
+                {t('quadrantMatrix.noTasks', { ns: 'app' })}
+              </div>
+            )}
+      </div>
+
+      {/* More indicator (only in non-expanded mode) */}
+      {!expanded && remainingCount > 0 && (
+        <div className="mt-2 shrink-0 text-center text-[11px] text-text-tertiary">
+          {t('quadrantMatrix.more', { ns: 'app', count: remainingCount })}
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default QuadrantCard
--- a/web/app/components/base/quadrant-matrix/task-item.tsx
+++ b/web/app/components/base/quadrant-matrix/task-item.tsx
@@ -0,0 +1,88 @@
+'use client'
+import type { FC } from 'react'
+import type { Task } from './types'
+import { RiCalendarLine } from '@remixicon/react'
+import { useTranslation } from 'react-i18next'
+import { cn } from '@/utils/classnames'
+
+type TaskItemProps = {
+  task: Task
+  expanded?: boolean
+  showScores?: boolean
+}
+
+const TaskItem: FC<TaskItemProps> = ({ task, expanded = false, showScores = true }) => {
+  const { t } = useTranslation()
+  const { name, description, deadline, importance_score, urgency_score, action_advice } = task
+
+  return (
+    <div className="group min-w-0 rounded-lg bg-components-panel-bg p-2.5 shadow-xs transition-all hover:shadow-sm">
+      {/* Header: Task Name + Scores */}
+      <div className="flex items-start justify-between gap-2">
+        <div
+          className={cn(
+            'system-sm-medium min-w-0 flex-1 text-text-primary',
+            !expanded && 'truncate',
+          )}
+          title={name}
+        >
+          {name}
+        </div>
+        {showScores && (
+          <div className="flex shrink-0 items-center gap-1 text-[10px] font-medium">
+            <span className="text-text-accent">
+              I:
+              {importance_score}
+            </span>
+            <span className="text-text-warning">
+              U:
+              {urgency_score}
+            </span>
+          </div>
+        )}
+      </div>
+
+      {/* Description */}
+      {description && (
+        <div className={cn(
+          'mt-1 text-xs text-text-tertiary',
+          !expanded && 'line-clamp-2',
+        )}
+        >
+          {description}
+        </div>
+      )}
+
+      {/* Deadline Badge */}
+      {deadline && (
+        <div className="mt-1.5">
+          <span className="bg-components-badge-bg-gray inline-flex items-center gap-1 rounded px-1.5 py-0.5 text-[10px] text-text-tertiary">
+            <RiCalendarLine className="h-3 w-3" />
+            <span>
+              {t('quadrantMatrix.deadline', { ns: 'app' })}
+              {' '}
+              {deadline}
+            </span>
+          </span>
+        </div>
+      )}
+
+      {/* Action Advice */}
+      {action_advice && (
+        <div className="mt-2 border-t border-divider-subtle pt-2">
+          <p
+            className={cn(
+              'text-xs italic text-text-quaternary',
+              !expanded && 'line-clamp-2',
+            )}
+            title={!expanded ? action_advice : undefined}
+          >
+            {action_advice}
+          </p>
+        </div>
+      )}
+    </div>
+  )
+}
+
+export default TaskItem
--- a/web/app/components/base/quadrant-matrix/types.ts
+++ b/web/app/components/base/quadrant-matrix/types.ts
@@ -0,0 +1,92 @@
+/**
+ * Type definitions for Eisenhower Matrix (Task Quadrant) visualization
+ */
+import type { I18nKeysWithPrefix } from '@/types/i18n'
+
+export type Task = {
+  name: string
+  description?: string
+  deadline?: string // YYYY-MM-DD format
+  importance_score: number // 0-100, based on goal alignment and long-term value
+  urgency_score: number // 0-100, based on deadline pressure and delay penalty
+  action_advice?: string // Suggested action for this task
+}
+
+export type QuadrantData = {
+  q1: Task[] // Urgent & Important - Do First
+  q2: Task[] // Not Urgent & Important - Schedule
+  q3: Task[] // Urgent & Not Important - Delegate
+  q4: Task[] // Not Urgent & Not Important - Don't Do
+}
+
+type QuadrantKeyBase = I18nKeysWithPrefix<'app', 'quadrantMatrix.q'>
+type QuadrantTitleKey = Extract<QuadrantKeyBase, `${string}.title`>
+type QuadrantSubtitleKey = Extract<QuadrantKeyBase, `${string}.subtitle`>
+
+export type QuadrantConfig = {
+  key: 'q1' | 'q2' | 'q3' | 'q4'
+  number: number
+  titleKey: QuadrantTitleKey // i18n key for title
+  subtitleKey: QuadrantSubtitleKey // i18n key for subtitle
+  bgClass: string
+  borderClass: string
+  titleClass: string
+}
+
+// Layout based on Eisenhower Matrix:
+// Q1 (Do First) - top-left, Q2 (Schedule) - top-right
+// Q3 (Delegate) - bottom-left, Q4 (Don't Do) - bottom-right
+export const QUADRANT_CONFIGS: Record<string, QuadrantConfig> = {
+  q1: {
+    key: 'q1',
+    number: 1,
+    titleKey: 'quadrantMatrix.q1.title',
+    subtitleKey: 'quadrantMatrix.q1.subtitle',
+    bgClass: 'bg-state-destructive-hover',
+    borderClass: 'border-state-destructive-border',
+    titleClass: 'text-text-destructive',
+  },
+  q2: {
+    key: 'q2',
+    number: 2,
+    titleKey: 'quadrantMatrix.q2.title',
+    subtitleKey: 'quadrantMatrix.q2.subtitle',
+    bgClass: 'bg-state-accent-hover',
+    borderClass: 'border-state-accent-border',
+    titleClass: 'text-text-accent',
+  },
+  q3: {
+    key: 'q3',
+    number: 3,
+    titleKey: 'quadrantMatrix.q3.title',
+    subtitleKey: 'quadrantMatrix.q3.subtitle',
+    bgClass: 'bg-state-warning-hover',
+    borderClass: 'border-state-warning-border',
+    titleClass: 'text-text-warning',
+  },
+  q4: {
+    key: 'q4',
+    number: 4,
+    titleKey: 'quadrantMatrix.q4.title',
+    subtitleKey: 'quadrantMatrix.q4.subtitle',
+    bgClass: 'bg-components-panel-on-panel-item-bg',
+    borderClass: 'border-divider-regular',
+    titleClass: 'text-text-tertiary',
+  },
+}
+
+/**
+ * Validates if the data structure matches QuadrantData interface
+ */
+export function isValidQuadrantData(data: unknown): data is QuadrantData {
+  if (typeof data !== 'object' || data === null)
+    return false
+
+  const d = data as Record<string, unknown>
+  return (
+    Array.isArray(d.q1)
+    && Array.isArray(d.q2)
+    && Array.isArray(d.q3)
+    && Array.isArray(d.q4)
+  )
+}
--- a/web/app/components/base/search-input/index.tsx
+++ b/web/app/components/base/search-input/index.tsx
@@ -20,6 +20,7 @@ const SearchInput: FC<SearchInputProps> = ({
  white,
 }) => {
  const { t } = useTranslation()
+  const inputRef = useRef<HTMLInputElement>(null)
  const [focus, setFocus] = useState<boolean>(false)
  const isComposing = useRef<boolean>(false)
  const [compositionValue, setCompositionValue] = useState<string>('')
@@ -36,6 +37,7 @@ const SearchInput: FC<SearchInputProps> = ({
        <RiSearchLine className="h-4 w-4 text-components-input-text-placeholder" aria-hidden="true" />
      </div>
      <input
+        ref={inputRef}
        type="text"
        name="query"
        className={cn(
@@ -65,14 +67,17 @@ const SearchInput: FC<SearchInputProps> = ({
        autoComplete="off"
      />
      {value && (
-        <div
-          className="group/clear flex h-4 w-4 shrink-0 cursor-pointer items-center justify-center"
+        <button
+          type="button"
+          aria-label={t('operation.clear', { ns: 'common' })}
+          className="group/clear flex h-4 w-4 shrink-0 cursor-pointer items-center justify-center border-none bg-transparent p-0"
          onClick={() => {
            onChange('')
+            inputRef.current?.focus()
          }}
        >
          <RiCloseCircleFill className="h-4 w-4 text-text-quaternary group-hover/clear:text-text-tertiary" />
-        </div>
+        </button>
      )}
    </div>
  )
--- a/web/app/components/billing/pricing/plans/cloud-plan-item/index.spec.tsx
+++ b/web/app/components/billing/pricing/plans/cloud-plan-item/index.spec.tsx
@@ -27,7 +27,9 @@ vi.mock('@/service/billing', () => ({

 vi.mock('@/service/client', () => ({
  consoleClient: {
-    billingUrl: vi.fn(),
+    billing: {
+      invoices: vi.fn(),
+    },
  },
 }))

@@ -43,7 +45,7 @@ vi.mock('../../assets', () => ({

 const mockUseAppContext = useAppContext as Mock
 const mockUseAsyncWindowOpen = useAsyncWindowOpen as Mock
-const mockBillingUrl = consoleClient.billingUrl as Mock
+const mockBillingInvoices = consoleClient.billing.invoices as Mock
 const mockFetchSubscriptionUrls = fetchSubscriptionUrls as Mock
 const mockToastNotify = Toast.notify as Mock

@@ -75,7 +77,7 @@ beforeEach(() => {
  vi.clearAllMocks()
  mockUseAppContext.mockReturnValue({ isCurrentWorkspaceManager: true })
  mockUseAsyncWindowOpen.mockReturnValue(vi.fn(async open => await open()))
-  mockBillingUrl.mockResolvedValue({ url: 'https://billing.example' })
+  mockBillingInvoices.mockResolvedValue({ url: 'https://billing.example' })
  mockFetchSubscriptionUrls.mockResolvedValue({ url: 'https://subscription.example' })
  assignedHref = ''
 })
@@ -149,7 +151,7 @@ describe('CloudPlanItem', () => {
        type: 'error',
        message: 'billing.buyPermissionDeniedTip',
      }))
-      expect(mockBillingUrl).not.toHaveBeenCalled()
+      expect(mockBillingInvoices).not.toHaveBeenCalled()
    })

    it('should open billing portal when upgrading current paid plan', async () => {
@@ -168,7 +170,7 @@ describe('CloudPlanItem', () => {
      fireEvent.click(screen.getByRole('button', { name: 'billing.plansCommon.currentPlan' }))

      await waitFor(() => {
-        expect(mockBillingUrl).toHaveBeenCalledTimes(1)
+        expect(mockBillingInvoices).toHaveBeenCalledTimes(1)
      })
      expect(openWindow).toHaveBeenCalledTimes(1)
    })
--- a/web/app/components/billing/pricing/plans/cloud-plan-item/index.tsx
+++ b/web/app/components/billing/pricing/plans/cloud-plan-item/index.tsx
@@ -77,7 +77,7 @@ const CloudPlanItem: FC<CloudPlanItemProps> = ({
    try {
      if (isCurrentPaidPlan) {
        await openAsyncWindow(async () => {
-          const res = await consoleClient.billingUrl()
+          const res = await consoleClient.billing.invoices()
          if (res.url)
            return res.url
          throw new Error('Failed to open billing page')
--- a/web/app/components/datasets/common/document-picker/preview-document-picker.spec.tsx
+++ b/web/app/components/datasets/common/document-picker/preview-document-picker.spec.tsx
@@ -362,6 +362,18 @@ describe('PreviewDocumentPicker', () => {
      expect(screen.getByText('--')).toBeInTheDocument()
    })

+    it('should render when value prop is omitted (optional)', () => {
+      const files = createMockDocumentList(2)
+      const onChange = vi.fn()
+      // Do not pass `value` at all to verify optional behavior
+      render(<PreviewDocumentPicker files={files} onChange={onChange} />)
+
+      // Renders placeholder for missing name
+      expect(screen.getByText('--')).toBeInTheDocument()
+      // Portal wrapper renders
+      expect(screen.getByTestId('portal-elem')).toBeInTheDocument()
+    })
+
    it('should handle empty files array', () => {
      renderComponent({ files: [] })

--- a/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
+++ b/web/app/components/datasets/common/document-picker/preview-document-picker.tsx
@@ -18,7 +18,7 @@ import DocumentList from './document-list'

 type Props = {
  className?: string
-  value: DocumentItem
+  value?: DocumentItem
  files: DocumentItem[]
  onChange: (value: DocumentItem) => void
 }
@@ -30,7 +30,8 @@ const PreviewDocumentPicker: FC<Props> = ({
  onChange,
 }) => {
  const { t } = useTranslation()
-  const { name, extension } = value
+  const name = value?.name || ''
+  const extension = value?.extension

  const [open, {
    set: setOpen,
--- a/web/app/components/datasets/create-from-pipeline/create-options/create-from-dsl-modal/index.spec.tsx
+++ b/web/app/components/datasets/create-from-pipeline/create-options/create-from-dsl-modal/index.spec.tsx
--- a/web/app/components/datasets/documents/components/documents-header.tsx
+++ b/web/app/components/datasets/documents/components/documents-header.tsx
@@ -0,0 +1,201 @@
+'use client'
+import type { FC } from 'react'
+import type { Item } from '@/app/components/base/select'
+import type { BuiltInMetadataItem, MetadataItemWithValueLength } from '@/app/components/datasets/metadata/types'
+import type { SortType } from '@/service/datasets'
+import { PlusIcon } from '@heroicons/react/24/solid'
+import { RiDraftLine, RiExternalLinkLine } from '@remixicon/react'
+import { useMemo } from 'react'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import Chip from '@/app/components/base/chip'
+import Input from '@/app/components/base/input'
+import Sort from '@/app/components/base/sort'
+import AutoDisabledDocument from '@/app/components/datasets/common/document-status-with-action/auto-disabled-document'
+import IndexFailed from '@/app/components/datasets/common/document-status-with-action/index-failed'
+import StatusWithAction from '@/app/components/datasets/common/document-status-with-action/status-with-action'
+import DatasetMetadataDrawer from '@/app/components/datasets/metadata/metadata-dataset/dataset-metadata-drawer'
+import { useDocLink } from '@/context/i18n'
+import { DataSourceType } from '@/models/datasets'
+import { useIndexStatus } from '../status-item/hooks'
+
+type DocumentsHeaderProps = {
+  // Dataset info
+  datasetId: string
+  dataSourceType?: DataSourceType
+  embeddingAvailable: boolean
+  isFreePlan: boolean
+
+  // Filter & sort
+  statusFilterValue: string
+  sortValue: SortType
+  inputValue: string
+  onStatusFilterChange: (value: string) => void
+  onStatusFilterClear: () => void
+  onSortChange: (value: string) => void
+  onInputChange: (value: string) => void
+
+  // Metadata modal
+  isShowEditMetadataModal: boolean
+  showEditMetadataModal: () => void
+  hideEditMetadataModal: () => void
+  datasetMetaData?: MetadataItemWithValueLength[]
+  builtInMetaData?: BuiltInMetadataItem[]
+  builtInEnabled: boolean
+  onAddMetaData: (payload: BuiltInMetadataItem) => Promise<void>
+  onRenameMetaData: (payload: MetadataItemWithValueLength) => Promise<void>
+  onDeleteMetaData: (metaDataId: string) => Promise<void>
+  onBuiltInEnabledChange: (enabled: boolean) => void
+
+  // Actions
+  onAddDocument: () => void
+}
+
+const DocumentsHeader: FC<DocumentsHeaderProps> = ({
+  datasetId,
+  dataSourceType,
+  embeddingAvailable,
+  isFreePlan,
+  statusFilterValue,
+  sortValue,
+  inputValue,
+  onStatusFilterChange,
+  onStatusFilterClear,
+  onSortChange,
+  onInputChange,
+  isShowEditMetadataModal,
+  showEditMetadataModal,
+  hideEditMetadataModal,
+  datasetMetaData,
+  builtInMetaData,
+  builtInEnabled,
+  onAddMetaData,
+  onRenameMetaData,
+  onDeleteMetaData,
+  onBuiltInEnabledChange,
+  onAddDocument,
+}) => {
+  const { t } = useTranslation()
+  const docLink = useDocLink()
+  const DOC_INDEX_STATUS_MAP = useIndexStatus()
+
+  const isDataSourceNotion = dataSourceType === DataSourceType.NOTION
+  const isDataSourceWeb = dataSourceType === DataSourceType.WEB
+
+  const statusFilterItems: Item[] = useMemo(() => [
+    { value: 'all', name: t('list.index.all', { ns: 'datasetDocuments' }) as string },
+    { value: 'queuing', name: DOC_INDEX_STATUS_MAP.queuing.text },
+    { value: 'indexing', name: DOC_INDEX_STATUS_MAP.indexing.text },
+    { value: 'paused', name: DOC_INDEX_STATUS_MAP.paused.text },
+    { value: 'error', name: DOC_INDEX_STATUS_MAP.error.text },
+    { value: 'available', name: DOC_INDEX_STATUS_MAP.available.text },
+    { value: 'enabled', name: DOC_INDEX_STATUS_MAP.enabled.text },
+    { value: 'disabled', name: DOC_INDEX_STATUS_MAP.disabled.text },
+    { value: 'archived', name: DOC_INDEX_STATUS_MAP.archived.text },
+  ], [DOC_INDEX_STATUS_MAP, t])
+
+  const sortItems: Item[] = useMemo(() => [
+    { value: 'created_at', name: t('list.sort.uploadTime', { ns: 'datasetDocuments' }) as string },
+    { value: 'hit_count', name: t('list.sort.hitCount', { ns: 'datasetDocuments' }) as string },
+  ], [t])
+
+  // Determine add button text based on data source type
+  const addButtonText = useMemo(() => {
+    if (isDataSourceNotion)
+      return t('list.addPages', { ns: 'datasetDocuments' })
+    if (isDataSourceWeb)
+      return t('list.addUrl', { ns: 'datasetDocuments' })
+    return t('list.addFile', { ns: 'datasetDocuments' })
+  }, [isDataSourceNotion, isDataSourceWeb, t])
+
+  return (
+    <>
+      {/* Title section */}
+      <div className="flex flex-col justify-center gap-1 px-6 pt-4">
+        <h1 className="text-base font-semibold text-text-primary">
+          {t('list.title', { ns: 'datasetDocuments' })}
+        </h1>
+        <div className="flex items-center space-x-0.5 text-sm font-normal text-text-tertiary">
+          <span>{t('list.desc', { ns: 'datasetDocuments' })}</span>
+          <a
+            className="flex items-center text-text-accent"
+            target="_blank"
+            rel="noopener noreferrer"
+            href={docLink('/guides/knowledge-base/integrate-knowledge-within-application')}
+          >
+            <span>{t('list.learnMore', { ns: 'datasetDocuments' })}</span>
+            <RiExternalLinkLine className="h-3 w-3" />
+          </a>
+        </div>
+      </div>
+
+      {/* Toolbar section */}
+      <div className="flex flex-wrap items-center justify-between px-6 pt-4">
+        {/* Left: Filters */}
+        <div className="flex items-center gap-2">
+          <Chip
+            className="w-[160px]"
+            showLeftIcon={false}
+            value={statusFilterValue}
+            items={statusFilterItems}
+            onSelect={item => onStatusFilterChange(item?.value ? String(item.value) : '')}
+            onClear={onStatusFilterClear}
+          />
+          <Input
+            showLeftIcon
+            showClearIcon
+            wrapperClassName="!w-[200px]"
+            value={inputValue}
+            onChange={e => onInputChange(e.target.value)}
+            onClear={() => onInputChange('')}
+          />
+          <div className="h-3.5 w-px bg-divider-regular"></div>
+          <Sort
+            order={sortValue.startsWith('-') ? '-' : ''}
+            value={sortValue.replace('-', '')}
+            items={sortItems}
+            onSelect={value => onSortChange(String(value))}
+          />
+        </div>
+
+        {/* Right: Actions */}
+        <div className="flex !h-8 items-center justify-center gap-2">
+          {!isFreePlan && <AutoDisabledDocument datasetId={datasetId} />}
+          <IndexFailed datasetId={datasetId} />
+          {!embeddingAvailable && (
+            <StatusWithAction
+              type="warning"
+              description={t('embeddingModelNotAvailable', { ns: 'dataset' })}
+            />
+          )}
+          {embeddingAvailable && (
+            <Button variant="secondary" className="shrink-0" onClick={showEditMetadataModal}>
+              <RiDraftLine className="mr-1 size-4" />
+              {t('metadata.metadata', { ns: 'dataset' })}
+            </Button>
+          )}
+          {isShowEditMetadataModal && (
+            <DatasetMetadataDrawer
+              userMetadata={datasetMetaData ?? []}
+              onClose={hideEditMetadataModal}
+              onAdd={onAddMetaData}
+              onRename={onRenameMetaData}
+              onRemove={onDeleteMetaData}
+              builtInMetadata={builtInMetaData ?? []}
+              isBuiltInEnabled={builtInEnabled}
+              onIsBuiltInEnabledChange={onBuiltInEnabledChange}
+            />
+          )}
+          {embeddingAvailable && (
+            <Button variant="primary" onClick={onAddDocument} className="shrink-0">
+              <PlusIcon className="mr-2 h-4 w-4 stroke-current" />
+              {addButtonText}
+            </Button>
+          )}
+        </div>
+      </div>
+    </>
+  )
+}
+
+export default DocumentsHeader
--- a/web/app/components/datasets/documents/components/empty-element.tsx
+++ b/web/app/components/datasets/documents/components/empty-element.tsx
@@ -0,0 +1,41 @@
+'use client'
+import type { FC } from 'react'
+import { PlusIcon } from '@heroicons/react/24/solid'
+import { useTranslation } from 'react-i18next'
+import Button from '@/app/components/base/button'
+import s from '../style.module.css'
+import { FolderPlusIcon, NotionIcon, ThreeDotsIcon } from './icons'
+
+type EmptyElementProps = {
+  canAdd: boolean
+  onClick: () => void
+  type?: 'upload' | 'sync'
+}
+
+const EmptyElement: FC<EmptyElementProps> = ({ canAdd = true, onClick, type = 'upload' }) => {
+  const { t } = useTranslation()
+  return (
+    <div className={s.emptyWrapper}>
+      <div className={s.emptyElement}>
+        <div className={s.emptySymbolIconWrapper}>
+          {type === 'upload' ? <FolderPlusIcon /> : <NotionIcon />}
+        </div>
+        <span className={s.emptyTitle}>
+          {t('list.empty.title', { ns: 'datasetDocuments' })}
+          <ThreeDotsIcon className="relative -left-1.5 -top-3 inline" />
+        </span>
+        <div className={s.emptyTip}>
+          {t(`list.empty.${type}.tip`, { ns: 'datasetDocuments' })}
+        </div>
+        {type === 'upload' && canAdd && (
+          <Button onClick={onClick} className={s.addFileBtn} variant="secondary-accent">
+            <PlusIcon className={s.plusIcon} />
+            {t('list.addFile', { ns: 'datasetDocuments' })}
+          </Button>
+        )}
+      </div>
+    </div>
+  )
+}
+
+export default EmptyElement
--- a/web/app/components/datasets/documents/components/icons.tsx
+++ b/web/app/components/datasets/documents/components/icons.tsx
@@ -0,0 +1,34 @@
+import type * as React from 'react'
+
+export const FolderPlusIcon = ({ className }: React.SVGProps<SVGElement>) => {
+  return (
+    <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
+      <path d="M10.8332 5.83333L9.90355 3.9741C9.63601 3.439 9.50222 3.17144 9.30265 2.97597C9.12615 2.80311 8.91344 2.67164 8.6799 2.59109C8.41581 2.5 8.11668 2.5 7.51841 2.5H4.33317C3.39975 2.5 2.93304 2.5 2.57652 2.68166C2.26292 2.84144 2.00795 3.09641 1.84816 3.41002C1.6665 3.76654 1.6665 4.23325 1.6665 5.16667V5.83333M1.6665 5.83333H14.3332C15.7333 5.83333 16.4334 5.83333 16.9681 6.10582C17.4386 6.3455 17.821 6.72795 18.0607 7.19836C18.3332 7.73314 18.3332 8.4332 18.3332 9.83333V13.5C18.3332 14.9001 18.3332 15.6002 18.0607 16.135C17.821 16.6054 17.4386 16.9878 16.9681 17.2275C16.4334 17.5 15.7333 17.5 14.3332 17.5H5.6665C4.26637 17.5 3.56631 17.5 3.03153 17.2275C2.56112 16.9878 2.17867 16.6054 1.93899 16.135C1.6665 15.6002 1.6665 14.9001 1.6665 13.5V5.83333ZM9.99984 14.1667V9.16667M7.49984 11.6667H12.4998" stroke="#667085" strokeWidth="1.5" strokeLinecap="round" strokeLinejoin="round" />
+    </svg>
+  )
+}
+
+export const ThreeDotsIcon = ({ className }: React.SVGProps<SVGElement>) => {
+  return (
+    <svg width="16" height="16" viewBox="0 0 16 16" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
+      <path d="M5 6.5V5M8.93934 7.56066L10 6.5M10.0103 11.5H11.5103" stroke="#374151" strokeWidth="2" strokeLinecap="round" strokeLinejoin="round" />
+    </svg>
+  )
+}
+
+export const NotionIcon = ({ className }: React.SVGProps<SVGElement>) => {
+  return (
+    <svg width="20" height="20" viewBox="0 0 20 20" fill="none" xmlns="http://www.w3.org/2000/svg" className={className ?? ''}>
+      <g clipPath="url(#clip0_2164_11263)">
+        <path fillRule="evenodd" clipRule="evenodd" d="M3.5725 18.2611L1.4229 15.5832C0.905706 14.9389 0.625 14.1466 0.625 13.3312V3.63437C0.625 2.4129 1.60224 1.39936 2.86295 1.31328L12.8326 0.632614C13.5569 0.583164 14.2768 0.775682 14.8717 1.17794L18.3745 3.5462C19.0015 3.97012 19.375 4.66312 19.375 5.40266V16.427C19.375 17.6223 18.4141 18.6121 17.1798 18.688L6.11458 19.3692C5.12958 19.4298 4.17749 19.0148 3.5725 18.2611Z" fill="white" />
+        <path d="M7.03006 8.48669V8.35974C7.03006 8.03794 7.28779 7.77104 7.61997 7.74886L10.0396 7.58733L13.3857 12.5147V8.19009L12.5244 8.07528V8.01498C12.5244 7.68939 12.788 7.42074 13.1244 7.4035L15.326 7.29073V7.60755C15.326 7.75628 15.2154 7.88349 15.0638 7.90913L14.534 7.99874V15.0023L13.8691 15.231C13.3136 15.422 12.6952 15.2175 12.3772 14.7377L9.12879 9.83574V14.5144L10.1287 14.7057L10.1147 14.7985C10.0711 15.089 9.82028 15.3087 9.51687 15.3222L7.03006 15.4329C6.99718 15.1205 7.23132 14.841 7.55431 14.807L7.88143 14.7727V8.53453L7.03006 8.48669Z" fill="black" />
+        <path fillRule="evenodd" clipRule="evenodd" d="M12.9218 1.85424L2.95217 2.53491C2.35499 2.57568 1.89209 3.05578 1.89209 3.63437V13.3312C1.89209 13.8748 2.07923 14.403 2.42402 14.8325L4.57362 17.5104C4.92117 17.9434 5.46812 18.1818 6.03397 18.147L17.0991 17.4658C17.6663 17.4309 18.1078 16.9762 18.1078 16.427V5.40266C18.1078 5.06287 17.9362 4.74447 17.6481 4.54969L14.1453 2.18143C13.7883 1.94008 13.3564 1.82457 12.9218 1.85424ZM3.44654 3.78562C3.30788 3.68296 3.37387 3.46909 3.54806 3.4566L12.9889 2.77944C13.2897 2.75787 13.5886 2.8407 13.8318 3.01305L15.7261 4.35508C15.798 4.40603 15.7642 4.51602 15.6752 4.52086L5.67742 5.0646C5.37485 5.08106 5.0762 4.99217 4.83563 4.81406L3.44654 3.78562ZM5.20848 6.76919C5.20848 6.4444 5.47088 6.1761 5.80642 6.15783L16.3769 5.58216C16.7039 5.56435 16.9792 5.81583 16.9792 6.13239V15.6783C16.9792 16.0025 16.7177 16.2705 16.3829 16.2896L5.8793 16.8872C5.51537 16.9079 5.20848 16.6283 5.20848 16.2759V6.76919Z" fill="black" />
+      </g>
+      <defs>
+        <clipPath id="clip0_2164_11263">
+          <rect width="20" height="20" fill="white" />
+        </clipPath>
+      </defs>
+    </svg>
+  )
+}
--- a/web/app/components/datasets/documents/components/list.tsx
+++ b/web/app/components/datasets/documents/components/list.tsx
@@ -16,13 +16,16 @@ import * as React from 'react'
 import { useCallback, useEffect, useMemo, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import Checkbox from '@/app/components/base/checkbox'
+import FileTypeIcon from '@/app/components/base/file-uploader/file-type-icon'
 import NotionIcon from '@/app/components/base/notion-icon'
 import Pagination from '@/app/components/base/pagination'
 import Toast from '@/app/components/base/toast'
 import Tooltip from '@/app/components/base/tooltip'
+import ChunkingModeLabel from '@/app/components/datasets/common/chunking-mode-label'
 import { normalizeStatusForQuery } from '@/app/components/datasets/documents/status-filter'
 import { extensionToFileType } from '@/app/components/datasets/hit-testing/utils/extension-to-file-type'
 import EditMetadataBatchModal from '@/app/components/datasets/metadata/edit-metadata-batch/modal'
+import useBatchEditDocumentMetadata from '@/app/components/datasets/metadata/hooks/use-batch-edit-document-metadata'
 import { useDatasetDetailContextWithSelector as useDatasetDetailContext } from '@/context/dataset-detail'
 import useTimestamp from '@/hooks/use-timestamp'
 import { ChunkingMode, DataSourceType, DocumentActionType } from '@/models/datasets'
@@ -31,14 +34,11 @@ import { useDocumentArchive, useDocumentBatchRetryIndex, useDocumentDelete, useD
 import { asyncRunSafe } from '@/utils'
 import { cn } from '@/utils/classnames'
 import { formatNumber } from '@/utils/format'
-import FileTypeIcon from '../../base/file-uploader/file-type-icon'
-import ChunkingModeLabel from '../common/chunking-mode-label'
-import useBatchEditDocumentMetadata from '../metadata/hooks/use-batch-edit-document-metadata'
-import BatchAction from './detail/completed/common/batch-action'
+import BatchAction from '../detail/completed/common/batch-action'
+import StatusItem from '../status-item'
+import s from '../style.module.css'
 import Operations from './operations'
 import RenameModal from './rename-modal'
-import StatusItem from './status-item'
-import s from './style.module.css'

 export const renderTdValue = (value: string | number | null, isEmptyStyle = false) => {
  return (
--- a/web/app/components/datasets/documents/components/operations.tsx
+++ b/web/app/components/datasets/documents/components/operations.tsx
@@ -1,4 +1,4 @@
-import type { OperationName } from './types'
+import type { OperationName } from '../types'
 import type { CommonResponse } from '@/models/common'
 import {
  RiArchive2Line,
@@ -17,6 +17,12 @@ import * as React from 'react'
 import { useCallback, useState } from 'react'
 import { useTranslation } from 'react-i18next'
 import { useContext } from 'use-context-selector'
+import Confirm from '@/app/components/base/confirm'
+import Divider from '@/app/components/base/divider'
+import CustomPopover from '@/app/components/base/popover'
+import Switch from '@/app/components/base/switch'
+import { ToastContext } from '@/app/components/base/toast'
+import Tooltip from '@/app/components/base/tooltip'
 import { DataSourceType, DocumentActionType } from '@/models/datasets'
 import {
  useDocumentArchive,
@@ -31,14 +37,8 @@ import {
 } from '@/service/knowledge/use-document'
 import { asyncRunSafe } from '@/utils'
 import { cn } from '@/utils/classnames'
-import Confirm from '../../base/confirm'
-import Divider from '../../base/divider'
-import CustomPopover from '../../base/popover'
-import Switch from '../../base/switch'
-import { ToastContext } from '../../base/toast'
-import Tooltip from '../../base/tooltip'
+import s from '../style.module.css'
 import RenameModal from './rename-modal'
-import s from './style.module.css'

 type OperationsProps = {
  embeddingAvailable: boolean
--- a/web/app/components/datasets/documents/components/rename-modal.tsx
+++ b/web/app/components/datasets/documents/components/rename-modal.tsx
@@ -7,8 +7,8 @@ import { useTranslation } from 'react-i18next'
 import Button from '@/app/components/base/button'
 import Input from '@/app/components/base/input'
 import Modal from '@/app/components/base/modal'
+import Toast from '@/app/components/base/toast'
 import { renameDocumentName } from '@/service/datasets'
-import Toast from '../../base/toast'

 type Props = {
  datasetId: string
--- a/web/app/components/datasets/documents/create-from-pipeline/hooks/index.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/hooks/index.ts
@@ -0,0 +1,5 @@
+export { useAddDocumentsSteps } from './use-add-documents-steps'
+export { useDatasourceActions } from './use-datasource-actions'
+export { useDatasourceOptions } from './use-datasource-options'
+export { useLocalFile, useOnlineDocument, useOnlineDrive, useWebsiteCrawl } from './use-datasource-store'
+export { useDatasourceUIState } from './use-datasource-ui-state'
--- a/web/app/components/datasets/documents/create-from-pipeline/hooks/use-add-documents-steps.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/hooks/use-add-documents-steps.ts
@@ -0,0 +1,41 @@
+import { useCallback, useState } from 'react'
+import { useTranslation } from 'react-i18next'
+import { AddDocumentsStep } from '../types'
+
+/**
+ * Hook for managing add documents wizard steps
+ */
+export const useAddDocumentsSteps = () => {
+  const { t } = useTranslation()
+  const [currentStep, setCurrentStep] = useState(1)
+
+  const handleNextStep = useCallback(() => {
+    setCurrentStep(preStep => preStep + 1)
+  }, [])
+
+  const handleBackStep = useCallback(() => {
+    setCurrentStep(preStep => preStep - 1)
+  }, [])
+
+  const steps = [
+    {
+      label: t('addDocuments.steps.chooseDatasource', { ns: 'datasetPipeline' }),
+      value: AddDocumentsStep.dataSource,
+    },
+    {
+      label: t('addDocuments.steps.processDocuments', { ns: 'datasetPipeline' }),
+      value: AddDocumentsStep.processDocuments,
+    },
+    {
+      label: t('addDocuments.steps.processingDocuments', { ns: 'datasetPipeline' }),
+      value: AddDocumentsStep.processingDocuments,
+    },
+  ]
+
+  return {
+    steps,
+    currentStep,
+    handleNextStep,
+    handleBackStep,
+  }
+}
--- a/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-actions.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-actions.ts
@@ -0,0 +1,321 @@
+import type { StoreApi } from 'zustand'
+import type { DataSourceShape } from '@/app/components/datasets/documents/create-from-pipeline/data-source/store'
+import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
+import type { DataSourceNotionPageMap, NotionPage } from '@/models/common'
+import type { CrawlResultItem, DocumentItem, CustomFile as File, FileIndexingEstimateResponse } from '@/models/datasets'
+import type {
+  OnlineDriveFile,
+  PublishedPipelineRunPreviewResponse,
+  PublishedPipelineRunResponse,
+} from '@/models/pipeline'
+import { useCallback, useRef } from 'react'
+import { trackEvent } from '@/app/components/base/amplitude'
+import { DatasourceType } from '@/models/pipeline'
+import { useRunPublishedPipeline } from '@/service/use-pipeline'
+import {
+  buildLocalFileDatasourceInfo,
+  buildOnlineDocumentDatasourceInfo,
+  buildOnlineDriveDatasourceInfo,
+  buildWebsiteCrawlDatasourceInfo,
+} from '../utils/datasource-info-builder'
+
+type DatasourceActionsParams = {
+  datasource: Datasource | undefined
+  datasourceType: string | undefined
+  pipelineId: string | undefined
+  dataSourceStore: StoreApi<DataSourceShape>
+  setEstimateData: (data: FileIndexingEstimateResponse | undefined) => void
+  setBatchId: (id: string) => void
+  setDocuments: (docs: PublishedPipelineRunResponse['documents']) => void
+  handleNextStep: () => void
+  PagesMapAndSelectedPagesId: DataSourceNotionPageMap
+  currentWorkspacePages: { page_id: string }[] | undefined
+  clearOnlineDocumentData: () => void
+  clearWebsiteCrawlData: () => void
+  clearOnlineDriveData: () => void
+  setDatasource: (ds: Datasource) => void
+}
+
+/**
+ * Hook for datasource-related actions (preview, process, etc.)
+ */
+export const useDatasourceActions = ({
+  datasource,
+  datasourceType,
+  pipelineId,
+  dataSourceStore,
+  setEstimateData,
+  setBatchId,
+  setDocuments,
+  handleNextStep,
+  PagesMapAndSelectedPagesId,
+  currentWorkspacePages,
+  clearOnlineDocumentData,
+  clearWebsiteCrawlData,
+  clearOnlineDriveData,
+  setDatasource,
+}: DatasourceActionsParams) => {
+  const isPreview = useRef(false)
+  const formRef = useRef<{ submit: () => void } | null>(null)
+
+  const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
+
+  // Build datasource info for preview (single item)
+  const buildPreviewDatasourceInfo = useCallback(() => {
+    const {
+      previewLocalFileRef,
+      previewOnlineDocumentRef,
+      previewWebsitePageRef,
+      previewOnlineDriveFileRef,
+      currentCredentialId,
+      bucket,
+    } = dataSourceStore.getState()
+
+    const datasourceInfoList: Record<string, unknown>[] = []
+
+    if (datasourceType === DatasourceType.localFile && previewLocalFileRef.current) {
+      datasourceInfoList.push(buildLocalFileDatasourceInfo(
+        previewLocalFileRef.current as File,
+        currentCredentialId,
+      ))
+    }
+
+    if (datasourceType === DatasourceType.onlineDocument && previewOnlineDocumentRef.current) {
+      datasourceInfoList.push(buildOnlineDocumentDatasourceInfo(
+        previewOnlineDocumentRef.current,
+        currentCredentialId,
+      ))
+    }
+
+    if (datasourceType === DatasourceType.websiteCrawl && previewWebsitePageRef.current) {
+      datasourceInfoList.push(buildWebsiteCrawlDatasourceInfo(
+        previewWebsitePageRef.current,
+        currentCredentialId,
+      ))
+    }
+
+    if (datasourceType === DatasourceType.onlineDrive && previewOnlineDriveFileRef.current) {
+      datasourceInfoList.push(buildOnlineDriveDatasourceInfo(
+        previewOnlineDriveFileRef.current,
+        bucket,
+        currentCredentialId,
+      ))
+    }
+
+    return datasourceInfoList
+  }, [dataSourceStore, datasourceType])
+
+  // Build datasource info for processing (all items)
+  const buildProcessDatasourceInfo = useCallback(() => {
+    const {
+      currentCredentialId,
+      localFileList,
+      onlineDocuments,
+      websitePages,
+      bucket,
+      selectedFileIds,
+      onlineDriveFileList,
+    } = dataSourceStore.getState()
+
+    const datasourceInfoList: Record<string, unknown>[] = []
+
+    if (datasourceType === DatasourceType.localFile) {
+      localFileList.forEach((file) => {
+        datasourceInfoList.push(buildLocalFileDatasourceInfo(file.file, currentCredentialId))
+      })
+    }
+
+    if (datasourceType === DatasourceType.onlineDocument) {
+      onlineDocuments.forEach((page) => {
+        datasourceInfoList.push(buildOnlineDocumentDatasourceInfo(page, currentCredentialId))
+      })
+    }
+
+    if (datasourceType === DatasourceType.websiteCrawl) {
+      websitePages.forEach((page) => {
+        datasourceInfoList.push(buildWebsiteCrawlDatasourceInfo(page, currentCredentialId))
+      })
+    }
+
+    if (datasourceType === DatasourceType.onlineDrive) {
+      selectedFileIds.forEach((id) => {
+        const file = onlineDriveFileList.find(f => f.id === id)
+        if (file)
+          datasourceInfoList.push(buildOnlineDriveDatasourceInfo(file, bucket, currentCredentialId))
+      })
+    }
+
+    return datasourceInfoList
+  }, [dataSourceStore, datasourceType])
+
+  // Handle chunk preview
+  const handlePreviewChunks = useCallback(async (data: Record<string, unknown>) => {
+    if (!datasource || !pipelineId)
+      return
+
+    const datasourceInfoList = buildPreviewDatasourceInfo()
+    await runPublishedPipeline({
+      pipeline_id: pipelineId,
+      inputs: data,
+      start_node_id: datasource.nodeId,
+      datasource_type: datasourceType as DatasourceType,
+      datasource_info_list: datasourceInfoList,
+      is_preview: true,
+    }, {
+      onSuccess: (res) => {
+        setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs)
+      },
+    })
+  }, [datasource, pipelineId, datasourceType, buildPreviewDatasourceInfo, runPublishedPipeline, setEstimateData])
+
+  // Handle document processing
+  const handleProcess = useCallback(async (data: Record<string, unknown>) => {
+    if (!datasource || !pipelineId)
+      return
+
+    const datasourceInfoList = buildProcessDatasourceInfo()
+    await runPublishedPipeline({
+      pipeline_id: pipelineId,
+      inputs: data,
+      start_node_id: datasource.nodeId,
+      datasource_type: datasourceType as DatasourceType,
+      datasource_info_list: datasourceInfoList,
+      is_preview: false,
+    }, {
+      onSuccess: (res) => {
+        setBatchId((res as PublishedPipelineRunResponse).batch || '')
+        setDocuments((res as PublishedPipelineRunResponse).documents || [])
+        handleNextStep()
+        trackEvent('dataset_document_added', {
+          data_source_type: datasourceType,
+          indexing_technique: 'pipeline',
+        })
+      },
+    })
+  }, [datasource, pipelineId, datasourceType, buildProcessDatasourceInfo, runPublishedPipeline, setBatchId, setDocuments, handleNextStep])
+
+  // Form submission handlers
+  const onClickProcess = useCallback(() => {
+    isPreview.current = false
+    formRef.current?.submit()
+  }, [])
+
+  const onClickPreview = useCallback(() => {
+    isPreview.current = true
+    formRef.current?.submit()
+  }, [])
+
+  const handleSubmit = useCallback((data: Record<string, unknown>) => {
+    if (isPreview.current)
+      handlePreviewChunks(data)
+    else
+      handleProcess(data)
+  }, [handlePreviewChunks, handleProcess])
+
+  // Preview change handlers
+  const handlePreviewFileChange = useCallback((file: DocumentItem) => {
+    const { previewLocalFileRef } = dataSourceStore.getState()
+    previewLocalFileRef.current = file
+    onClickPreview()
+  }, [dataSourceStore, onClickPreview])
+
+  const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
+    const { previewOnlineDocumentRef } = dataSourceStore.getState()
+    previewOnlineDocumentRef.current = page
+    onClickPreview()
+  }, [dataSourceStore, onClickPreview])
+
+  const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
+    const { previewWebsitePageRef } = dataSourceStore.getState()
+    previewWebsitePageRef.current = website
+    onClickPreview()
+  }, [dataSourceStore, onClickPreview])
+
+  const handlePreviewOnlineDriveFileChange = useCallback((file: OnlineDriveFile) => {
+    const { previewOnlineDriveFileRef } = dataSourceStore.getState()
+    previewOnlineDriveFileRef.current = file
+    onClickPreview()
+  }, [dataSourceStore, onClickPreview])
+
+  // Select all handler
+  const handleSelectAll = useCallback(() => {
+    const {
+      onlineDocuments,
+      onlineDriveFileList,
+      selectedFileIds,
+      setOnlineDocuments,
+      setSelectedFileIds,
+      setSelectedPagesId,
+    } = dataSourceStore.getState()
+
+    if (datasourceType === DatasourceType.onlineDocument) {
+      const allIds = currentWorkspacePages?.map(page => page.page_id) || []
+      if (onlineDocuments.length < allIds.length) {
+        const selectedPages = Array.from(allIds).map(pageId => PagesMapAndSelectedPagesId[pageId])
+        setOnlineDocuments(selectedPages)
+        setSelectedPagesId(new Set(allIds))
+      }
+      else {
+        setOnlineDocuments([])
+        setSelectedPagesId(new Set())
+      }
+    }
+
+    if (datasourceType === DatasourceType.onlineDrive) {
+      const allKeys = onlineDriveFileList.filter(item => item.type !== 'bucket').map(file => file.id)
+      if (selectedFileIds.length < allKeys.length)
+        setSelectedFileIds(allKeys)
+      else
+        setSelectedFileIds([])
+    }
+  }, [PagesMapAndSelectedPagesId, currentWorkspacePages, dataSourceStore, datasourceType])
+
+  // Clear datasource data based on type
+  const clearDataSourceData = useCallback((dataSource: Datasource) => {
+    const providerType = dataSource.nodeData.provider_type
+    const clearFunctions: Record<string, () => void> = {
+      [DatasourceType.onlineDocument]: clearOnlineDocumentData,
+      [DatasourceType.websiteCrawl]: clearWebsiteCrawlData,
+      [DatasourceType.onlineDrive]: clearOnlineDriveData,
+      [DatasourceType.localFile]: () => {},
+    }
+    clearFunctions[providerType]?.()
+  }, [clearOnlineDocumentData, clearOnlineDriveData, clearWebsiteCrawlData])
+
+  // Switch datasource handler
+  const handleSwitchDataSource = useCallback((dataSource: Datasource) => {
+    const {
+      setCurrentCredentialId,
+      currentNodeIdRef,
+    } = dataSourceStore.getState()
+    clearDataSourceData(dataSource)
+    setCurrentCredentialId('')
+    currentNodeIdRef.current = dataSource.nodeId
+    setDatasource(dataSource)
+  }, [clearDataSourceData, dataSourceStore, setDatasource])
+
+  // Credential change handler
+  const handleCredentialChange = useCallback((credentialId: string) => {
+    const { setCurrentCredentialId } = dataSourceStore.getState()
+    if (datasource)
+      clearDataSourceData(datasource)
+    setCurrentCredentialId(credentialId)
+  }, [clearDataSourceData, dataSourceStore, datasource])
+
+  return {
+    isPreview,
+    formRef,
+    isIdle,
+    isPending,
+    onClickProcess,
+    onClickPreview,
+    handleSubmit,
+    handlePreviewFileChange,
+    handlePreviewOnlineDocumentChange,
+    handlePreviewWebsiteChange,
+    handlePreviewOnlineDriveFileChange,
+    handleSelectAll,
+    handleSwitchDataSource,
+    handleCredentialChange,
+  }
+}
--- a/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-options.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-options.ts
@@ -0,0 +1,27 @@
+import type { DataSourceOption } from '@/app/components/rag-pipeline/components/panel/test-run/types'
+import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
+import type { Node } from '@/app/components/workflow/types'
+import { useMemo } from 'react'
+import { BlockEnum } from '@/app/components/workflow/types'
+
+/**
+ * Hook for getting datasource options from pipeline nodes
+ */
+export const useDatasourceOptions = (pipelineNodes: Node<DataSourceNodeType>[]) => {
+  const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
+
+  const options = useMemo(() => {
+    const options: DataSourceOption[] = []
+    datasourceNodes.forEach((node) => {
+      const label = node.data.title
+      options.push({
+        label,
+        value: node.id,
+        data: node.data,
+      })
+    })
+    return options
+  }, [datasourceNodes])
+
+  return options
+}
--- a/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-store.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-store.ts
@@ -1,69 +1,12 @@
-import type { DataSourceOption } from '@/app/components/rag-pipeline/components/panel/test-run/types'
-import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
-import type { Node } from '@/app/components/workflow/types'
 import type { DataSourceNotionPageMap, DataSourceNotionWorkspace } from '@/models/common'
-import { useCallback, useMemo, useState } from 'react'
-import { useTranslation } from 'react-i18next'
+import { useCallback, useMemo } from 'react'
 import { useShallow } from 'zustand/react/shallow'
-import { BlockEnum } from '@/app/components/workflow/types'
 import { CrawlStep } from '@/models/datasets'
-import { useDataSourceStore, useDataSourceStoreWithSelector } from './data-source/store'
-import { AddDocumentsStep } from './types'
-
-export const useAddDocumentsSteps = () => {
-  const { t } = useTranslation()
-  const [currentStep, setCurrentStep] = useState(1)
-
-  const handleNextStep = useCallback(() => {
-    setCurrentStep(preStep => preStep + 1)
-  }, [])
-
-  const handleBackStep = useCallback(() => {
-    setCurrentStep(preStep => preStep - 1)
-  }, [])
-
-  const steps = [
-    {
-      label: t('addDocuments.steps.chooseDatasource', { ns: 'datasetPipeline' }),
-      value: AddDocumentsStep.dataSource,
-    },
-    {
-      label: t('addDocuments.steps.processDocuments', { ns: 'datasetPipeline' }),
-      value: AddDocumentsStep.processDocuments,
-    },
-    {
-      label: t('addDocuments.steps.processingDocuments', { ns: 'datasetPipeline' }),
-      value: AddDocumentsStep.processingDocuments,
-    },
-  ]
-
-  return {
-    steps,
-    currentStep,
-    handleNextStep,
-    handleBackStep,
-  }
-}
-
-export const useDatasourceOptions = (pipelineNodes: Node<DataSourceNodeType>[]) => {
-  const datasourceNodes = pipelineNodes.filter(node => node.data.type === BlockEnum.DataSource)
-
-  const options = useMemo(() => {
-    const options: DataSourceOption[] = []
-    datasourceNodes.forEach((node) => {
-      const label = node.data.title
-      options.push({
-        label,
-        value: node.id,
-        data: node.data,
-      })
-    })
-    return options
-  }, [datasourceNodes])
-
-  return options
-}
+import { useDataSourceStore, useDataSourceStoreWithSelector } from '../data-source/store'

+/**
+ * Hook for local file datasource store operations
+ */
 export const useLocalFile = () => {
  const {
    localFileList,
@@ -89,6 +32,9 @@ export const useLocalFile = () => {
  }
 }

+/**
+ * Hook for online document datasource store operations
+ */
 export const useOnlineDocument = () => {
  const {
    documentsData,
@@ -147,6 +93,9 @@ export const useOnlineDocument = () => {
  }
 }

+/**
+ * Hook for website crawl datasource store operations
+ */
 export const useWebsiteCrawl = () => {
  const {
    websitePages,
@@ -186,6 +135,9 @@ export const useWebsiteCrawl = () => {
  }
 }

+/**
+ * Hook for online drive datasource store operations
+ */
 export const useOnlineDrive = () => {
  const {
    onlineDriveFileList,
--- a/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-ui-state.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/hooks/use-datasource-ui-state.ts
@@ -0,0 +1,132 @@
+import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
+import type { OnlineDriveFile } from '@/models/pipeline'
+import { useMemo } from 'react'
+import { useTranslation } from 'react-i18next'
+import { DatasourceType } from '@/models/pipeline'
+
+type DatasourceUIStateParams = {
+  datasource: Datasource | undefined
+  allFileLoaded: boolean
+  localFileListLength: number
+  onlineDocumentsLength: number
+  websitePagesLength: number
+  selectedFileIdsLength: number
+  onlineDriveFileList: OnlineDriveFile[]
+  isVectorSpaceFull: boolean
+  enableBilling: boolean
+  currentWorkspacePagesLength: number
+  fileUploadConfig: { file_size_limit: number, batch_count_limit: number }
+}
+
+/**
+ * Hook for computing datasource UI state based on datasource type
+ */
+export const useDatasourceUIState = ({
+  datasource,
+  allFileLoaded,
+  localFileListLength,
+  onlineDocumentsLength,
+  websitePagesLength,
+  selectedFileIdsLength,
+  onlineDriveFileList,
+  isVectorSpaceFull,
+  enableBilling,
+  currentWorkspacePagesLength,
+  fileUploadConfig,
+}: DatasourceUIStateParams) => {
+  const { t } = useTranslation()
+  const datasourceType = datasource?.nodeData.provider_type
+
+  const isShowVectorSpaceFull = useMemo(() => {
+    if (!datasource || !datasourceType)
+      return false
+
+    // Lookup table for vector space full condition check
+    const vectorSpaceFullConditions: Record<string, boolean> = {
+      [DatasourceType.localFile]: allFileLoaded,
+      [DatasourceType.onlineDocument]: onlineDocumentsLength > 0,
+      [DatasourceType.websiteCrawl]: websitePagesLength > 0,
+      [DatasourceType.onlineDrive]: onlineDriveFileList.length > 0,
+    }
+
+    const condition = vectorSpaceFullConditions[datasourceType]
+    return condition && isVectorSpaceFull && enableBilling
+  }, [datasource, datasourceType, allFileLoaded, onlineDocumentsLength, websitePagesLength, onlineDriveFileList.length, isVectorSpaceFull, enableBilling])
+
+  // Lookup table for next button disabled conditions
+  const nextBtnDisabled = useMemo(() => {
+    if (!datasource || !datasourceType)
+      return true
+
+    const disabledConditions: Record<string, boolean> = {
+      [DatasourceType.localFile]: isShowVectorSpaceFull || localFileListLength === 0 || !allFileLoaded,
+      [DatasourceType.onlineDocument]: isShowVectorSpaceFull || onlineDocumentsLength === 0,
+      [DatasourceType.websiteCrawl]: isShowVectorSpaceFull || websitePagesLength === 0,
+      [DatasourceType.onlineDrive]: isShowVectorSpaceFull || selectedFileIdsLength === 0,
+    }
+
+    return disabledConditions[datasourceType] ?? true
+  }, [datasource, datasourceType, isShowVectorSpaceFull, localFileListLength, allFileLoaded, onlineDocumentsLength, websitePagesLength, selectedFileIdsLength])
+
+  // Check if select all should be shown
+  const showSelect = useMemo(() => {
+    if (datasourceType === DatasourceType.onlineDocument)
+      return currentWorkspacePagesLength > 0
+
+    if (datasourceType === DatasourceType.onlineDrive) {
+      const nonBucketItems = onlineDriveFileList.filter(item => item.type !== 'bucket')
+      const isBucketList = onlineDriveFileList.some(file => file.type === 'bucket')
+      return !isBucketList && nonBucketItems.length > 0
+    }
+
+    return false
+  }, [currentWorkspacePagesLength, datasourceType, onlineDriveFileList])
+
+  // Total selectable options count
+  const totalOptions = useMemo(() => {
+    if (datasourceType === DatasourceType.onlineDocument)
+      return currentWorkspacePagesLength
+
+    if (datasourceType === DatasourceType.onlineDrive)
+      return onlineDriveFileList.filter(item => item.type !== 'bucket').length
+
+    return undefined
+  }, [currentWorkspacePagesLength, datasourceType, onlineDriveFileList])
+
+  // Selected options count
+  const selectedOptions = useMemo(() => {
+    if (datasourceType === DatasourceType.onlineDocument)
+      return onlineDocumentsLength
+
+    if (datasourceType === DatasourceType.onlineDrive)
+      return selectedFileIdsLength
+
+    return undefined
+  }, [datasourceType, onlineDocumentsLength, selectedFileIdsLength])
+
+  // Tip message for selection
+  const tip = useMemo(() => {
+    if (datasourceType === DatasourceType.onlineDocument)
+      return t('addDocuments.selectOnlineDocumentTip', { ns: 'datasetPipeline', count: 50 })
+
+    if (datasourceType === DatasourceType.onlineDrive) {
+      return t('addDocuments.selectOnlineDriveTip', {
+        ns: 'datasetPipeline',
+        count: fileUploadConfig.batch_count_limit,
+        fileSize: fileUploadConfig.file_size_limit,
+      })
+    }
+
+    return ''
+  }, [datasourceType, fileUploadConfig.batch_count_limit, fileUploadConfig.file_size_limit, t])
+
+  return {
+    datasourceType,
+    isShowVectorSpaceFull,
+    nextBtnDisabled,
+    showSelect,
+    totalOptions,
+    selectedOptions,
+    tip,
+  }
+}
--- a/web/app/components/datasets/documents/create-from-pipeline/index.spec.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/index.spec.tsx
--- a/web/app/components/datasets/documents/create-from-pipeline/index.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/index.tsx
@@ -2,75 +2,71 @@
 import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
 import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
 import type { Node } from '@/app/components/workflow/types'
-import type { NotionPage } from '@/models/common'
-import type { CrawlResultItem, DocumentItem, CustomFile as File, FileIndexingEstimateResponse } from '@/models/datasets'
-import type {
-  InitialDocumentDetail,
-  OnlineDriveFile,
-  PublishedPipelineRunPreviewResponse,
-  PublishedPipelineRunResponse,
-} from '@/models/pipeline'
+import type { FileIndexingEstimateResponse } from '@/models/datasets'
+import type { InitialDocumentDetail } from '@/models/pipeline'
 import { useBoolean } from 'ahooks'
-import { useCallback, useMemo, useRef, useState } from 'react'
+import { useCallback, useMemo, useState } from 'react'
 import { useTranslation } from 'react-i18next'
-import { trackEvent } from '@/app/components/base/amplitude'
-import Divider from '@/app/components/base/divider'
 import Loading from '@/app/components/base/loading'
 import PlanUpgradeModal from '@/app/components/billing/plan-upgrade-modal'
-import VectorSpaceFull from '@/app/components/billing/vector-space-full'
-import LocalFile from '@/app/components/datasets/documents/create-from-pipeline/data-source/local-file'
-import OnlineDocuments from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-documents'
-import OnlineDrive from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-drive'
-import WebsiteCrawl from '@/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl'
 import { useDatasetDetailContextWithSelector } from '@/context/dataset-detail'
 import { useProviderContextSelector } from '@/context/provider-context'
 import { DatasourceType } from '@/models/pipeline'
 import { useFileUploadConfig } from '@/service/use-common'
-import { usePublishedPipelineInfo, useRunPublishedPipeline } from '@/service/use-pipeline'
-import { TransferMethod } from '@/types/app'
-import UpgradeCard from '../../create/step-one/upgrade-card'
-import Actions from './actions'
-import DataSourceOptions from './data-source-options'
+import { usePublishedPipelineInfo } from '@/service/use-pipeline'
 import { useDataSourceStore } from './data-source/store'
 import DataSourceProvider from './data-source/store/provider'
-import { useAddDocumentsSteps, useLocalFile, useOnlineDocument, useOnlineDrive, useWebsiteCrawl } from './hooks'
+import {
+  useAddDocumentsSteps,
+  useDatasourceActions,
+  useDatasourceUIState,
+  useLocalFile,
+  useOnlineDocument,
+  useOnlineDrive,
+  useWebsiteCrawl,
+} from './hooks'
 import LeftHeader from './left-header'
-import ChunkPreview from './preview/chunk-preview'
-import FilePreview from './preview/file-preview'
-import OnlineDocumentPreview from './preview/online-document-preview'
-import WebsitePreview from './preview/web-preview'
-import ProcessDocuments from './process-documents'
-import Processing from './processing'
+import { StepOneContent, StepThreeContent, StepTwoContent } from './steps'
+import { StepOnePreview, StepTwoPreview } from './steps/preview-panel'

 const CreateFormPipeline = () => {
  const { t } = useTranslation()
  const plan = useProviderContextSelector(state => state.plan)
  const enableBilling = useProviderContextSelector(state => state.enableBilling)
  const pipelineId = useDatasetDetailContextWithSelector(s => s.dataset?.pipeline_id)
+  const dataSourceStore = useDataSourceStore()
+
+  // Core state
  const [datasource, setDatasource] = useState<Datasource>()
  const [estimateData, setEstimateData] = useState<FileIndexingEstimateResponse | undefined>(undefined)
  const [batchId, setBatchId] = useState('')
  const [documents, setDocuments] = useState<InitialDocumentDetail[]>([])
-  const dataSourceStore = useDataSourceStore()
-
-  const isPreview = useRef(false)
-  const formRef = useRef<any>(null)

+  // Data fetching
  const { data: pipelineInfo, isFetching: isFetchingPipelineInfo } = usePublishedPipelineInfo(pipelineId || '')
  const { data: fileUploadConfigResponse } = useFileUploadConfig()

+  const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? {
+    file_size_limit: 15,
+    batch_count_limit: 5,
+  }, [fileUploadConfigResponse])
+
+  // Steps management
  const {
    steps,
    currentStep,
    handleNextStep: doHandleNextStep,
    handleBackStep,
  } = useAddDocumentsSteps()
+
+  // Datasource-specific hooks
  const {
    localFileList,
    allFileLoaded,
    currentLocalFile,
    hidePreviewLocalFile,
  } = useLocalFile()
+
  const {
    currentWorkspace,
    onlineDocuments,
@@ -79,12 +75,14 @@ const CreateFormPipeline = () => {
    hidePreviewOnlineDocument,
    clearOnlineDocumentData,
  } = useOnlineDocument()
+
  const {
    websitePages,
    currentWebsite,
    hideWebsitePreview,
    clearWebsiteCrawlData,
  } = useWebsiteCrawl()
+
  const {
    onlineDriveFileList,
    selectedFileIds,
@@ -92,43 +90,50 @@ const CreateFormPipeline = () => {
    clearOnlineDriveData,
  } = useOnlineDrive()

-  const datasourceType = useMemo(() => datasource?.nodeData.provider_type, [datasource])
+  // Computed values
  const isVectorSpaceFull = plan.usage.vectorSpace >= plan.total.vectorSpace
-  const isShowVectorSpaceFull = useMemo(() => {
-    if (!datasource)
-      return false
-    if (datasourceType === DatasourceType.localFile)
-      return allFileLoaded && isVectorSpaceFull && enableBilling
-    if (datasourceType === DatasourceType.onlineDocument)
-      return onlineDocuments.length > 0 && isVectorSpaceFull && enableBilling
-    if (datasourceType === DatasourceType.websiteCrawl)
-      return websitePages.length > 0 && isVectorSpaceFull && enableBilling
-    if (datasourceType === DatasourceType.onlineDrive)
-      return onlineDriveFileList.length > 0 && isVectorSpaceFull && enableBilling
-    return false
-  }, [allFileLoaded, datasource, datasourceType, enableBilling, isVectorSpaceFull, onlineDocuments.length, onlineDriveFileList.length, websitePages.length])
  const supportBatchUpload = !enableBilling || plan.type !== 'sandbox'

+  // UI state
+  const {
+    datasourceType,
+    isShowVectorSpaceFull,
+    nextBtnDisabled,
+    showSelect,
+    totalOptions,
+    selectedOptions,
+    tip,
+  } = useDatasourceUIState({
+    datasource,
+    allFileLoaded,
+    localFileListLength: localFileList.length,
+    onlineDocumentsLength: onlineDocuments.length,
+    websitePagesLength: websitePages.length,
+    selectedFileIdsLength: selectedFileIds.length,
+    onlineDriveFileList,
+    isVectorSpaceFull,
+    enableBilling,
+    currentWorkspacePagesLength: currentWorkspace?.pages.length ?? 0,
+    fileUploadConfig,
+  })
+
+  // Plan upgrade modal
  const [isShowPlanUpgradeModal, {
    setTrue: showPlanUpgradeModal,
    setFalse: hidePlanUpgradeModal,
  }] = useBoolean(false)
+
+  // Next step with batch upload check
  const handleNextStep = useCallback(() => {
    if (!supportBatchUpload) {
-      let isMultiple = false
-      if (datasourceType === DatasourceType.localFile && localFileList.length > 1)
-        isMultiple = true
-
-      if (datasourceType === DatasourceType.onlineDocument && onlineDocuments.length > 1)
-        isMultiple = true
-
-      if (datasourceType === DatasourceType.websiteCrawl && websitePages.length > 1)
-        isMultiple = true
-
-      if (datasourceType === DatasourceType.onlineDrive && selectedFileIds.length > 1)
-        isMultiple = true
-
-      if (isMultiple) {
+      const multipleCheckMap: Record<string, number> = {
+        [DatasourceType.localFile]: localFileList.length,
+        [DatasourceType.onlineDocument]: onlineDocuments.length,
+        [DatasourceType.websiteCrawl]: websitePages.length,
+        [DatasourceType.onlineDrive]: selectedFileIds.length,
+      }
+      const count = datasourceType ? multipleCheckMap[datasourceType] : 0
+      if (count > 1) {
        showPlanUpgradeModal()
        return
      }
@@ -136,334 +141,44 @@ const CreateFormPipeline = () => {
    doHandleNextStep()
  }, [datasourceType, doHandleNextStep, localFileList.length, onlineDocuments.length, selectedFileIds.length, showPlanUpgradeModal, supportBatchUpload, websitePages.length])

-  const nextBtnDisabled = useMemo(() => {
-    if (!datasource)
-      return true
-    if (datasourceType === DatasourceType.localFile)
-      return isShowVectorSpaceFull || !localFileList.length || !allFileLoaded
-    if (datasourceType === DatasourceType.onlineDocument)
-      return isShowVectorSpaceFull || !onlineDocuments.length
-    if (datasourceType === DatasourceType.websiteCrawl)
-      return isShowVectorSpaceFull || !websitePages.length
-    if (datasourceType === DatasourceType.onlineDrive)
-      return isShowVectorSpaceFull || !selectedFileIds.length
-    return false
-  }, [datasource, datasourceType, isShowVectorSpaceFull, localFileList.length, allFileLoaded, onlineDocuments.length, websitePages.length, selectedFileIds.length])
+  // Datasource actions
+  const {
+    isPreview,
+    formRef,
+    isIdle,
+    isPending,
+    onClickProcess,
+    onClickPreview,
+    handleSubmit,
+    handlePreviewFileChange,
+    handlePreviewOnlineDocumentChange,
+    handlePreviewWebsiteChange,
+    handlePreviewOnlineDriveFileChange,
+    handleSelectAll,
+    handleSwitchDataSource,
+    handleCredentialChange,
+  } = useDatasourceActions({
+    datasource,
+    datasourceType,
+    pipelineId,
+    dataSourceStore,
+    setEstimateData,
+    setBatchId,
+    setDocuments,
+    handleNextStep,
+    PagesMapAndSelectedPagesId,
+    currentWorkspacePages: currentWorkspace?.pages,
+    clearOnlineDocumentData,
+    clearWebsiteCrawlData,
+    clearOnlineDriveData,
+    setDatasource,
+  })

-  const fileUploadConfig = useMemo(() => fileUploadConfigResponse ?? {
-    file_size_limit: 15,
-    batch_count_limit: 5,
-  }, [fileUploadConfigResponse])
-
-  const showSelect = useMemo(() => {
-    if (datasourceType === DatasourceType.onlineDocument) {
-      const pagesCount = currentWorkspace?.pages.length ?? 0
-      return pagesCount > 0
-    }
-    if (datasourceType === DatasourceType.onlineDrive) {
-      const isBucketList = onlineDriveFileList.some(file => file.type === 'bucket')
-      return !isBucketList && onlineDriveFileList.filter((item) => {
-        return item.type !== 'bucket'
-      }).length > 0
-    }
-    return false
-  }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList])
-
-  const totalOptions = useMemo(() => {
-    if (datasourceType === DatasourceType.onlineDocument)
-      return currentWorkspace?.pages.length
-    if (datasourceType === DatasourceType.onlineDrive) {
-      return onlineDriveFileList.filter((item) => {
-        return item.type !== 'bucket'
-      }).length
-    }
-  }, [currentWorkspace?.pages.length, datasourceType, onlineDriveFileList])
-
-  const selectedOptions = useMemo(() => {
-    if (datasourceType === DatasourceType.onlineDocument)
-      return onlineDocuments.length
-    if (datasourceType === DatasourceType.onlineDrive)
-      return selectedFileIds.length
-  }, [datasourceType, onlineDocuments.length, selectedFileIds.length])
-
-  const tip = useMemo(() => {
-    if (datasourceType === DatasourceType.onlineDocument)
-      return t('addDocuments.selectOnlineDocumentTip', { ns: 'datasetPipeline', count: 50 })
-    if (datasourceType === DatasourceType.onlineDrive) {
-      return t('addDocuments.selectOnlineDriveTip', {
-        ns: 'datasetPipeline',
-        count: fileUploadConfig.batch_count_limit,
-        fileSize: fileUploadConfig.file_size_limit,
-      })
-    }
-    return ''
-  }, [datasourceType, fileUploadConfig.batch_count_limit, fileUploadConfig.file_size_limit, t])
-
-  const { mutateAsync: runPublishedPipeline, isIdle, isPending } = useRunPublishedPipeline()
-
-  const handlePreviewChunks = useCallback(async (data: Record<string, any>) => {
-    if (!datasource)
-      return
-    const {
-      previewLocalFileRef,
-      previewOnlineDocumentRef,
-      previewWebsitePageRef,
-      previewOnlineDriveFileRef,
-      currentCredentialId,
-    } = dataSourceStore.getState()
-    const datasourceInfoList: Record<string, any>[] = []
-    if (datasourceType === DatasourceType.localFile) {
-      const { id, name, type, size, extension, mime_type } = previewLocalFileRef.current as File
-      const documentInfo = {
-        related_id: id,
-        name,
-        type,
-        size,
-        extension,
-        mime_type,
-        url: '',
-        transfer_method: TransferMethod.local_file,
-        credential_id: currentCredentialId,
-      }
-      datasourceInfoList.push(documentInfo)
-    }
-    if (datasourceType === DatasourceType.onlineDocument) {
-      const { workspace_id, ...rest } = previewOnlineDocumentRef.current!
-      const documentInfo = {
-        workspace_id,
-        page: rest,
-        credential_id: currentCredentialId,
-      }
-      datasourceInfoList.push(documentInfo)
-    }
-    if (datasourceType === DatasourceType.websiteCrawl) {
-      datasourceInfoList.push({
-        ...previewWebsitePageRef.current!,
-        credential_id: currentCredentialId,
-      })
-    }
-    if (datasourceType === DatasourceType.onlineDrive) {
-      const { bucket } = dataSourceStore.getState()
-      const { id, type, name } = previewOnlineDriveFileRef.current!
-      datasourceInfoList.push({
-        bucket,
-        id,
-        name,
-        type,
-        credential_id: currentCredentialId,
-      })
-    }
-    await runPublishedPipeline({
-      pipeline_id: pipelineId!,
-      inputs: data,
-      start_node_id: datasource.nodeId,
-      datasource_type: datasourceType as DatasourceType,
-      datasource_info_list: datasourceInfoList,
-      is_preview: true,
-    }, {
-      onSuccess: (res) => {
-        setEstimateData((res as PublishedPipelineRunPreviewResponse).data.outputs)
-      },
-    })
-  }, [datasource, datasourceType, runPublishedPipeline, pipelineId, dataSourceStore])
-
-  const handleProcess = useCallback(async (data: Record<string, any>) => {
-    if (!datasource)
-      return
-    const { currentCredentialId } = dataSourceStore.getState()
-    const datasourceInfoList: Record<string, any>[] = []
-    if (datasourceType === DatasourceType.localFile) {
-      const {
-        localFileList,
-      } = dataSourceStore.getState()
-      localFileList.forEach((file) => {
-        const { id, name, type, size, extension, mime_type } = file.file
-        const documentInfo = {
-          related_id: id,
-          name,
-          type,
-          size,
-          extension,
-          mime_type,
-          url: '',
-          transfer_method: TransferMethod.local_file,
-          credential_id: currentCredentialId,
-        }
-        datasourceInfoList.push(documentInfo)
-      })
-    }
-    if (datasourceType === DatasourceType.onlineDocument) {
-      const {
-        onlineDocuments,
-      } = dataSourceStore.getState()
-      onlineDocuments.forEach((page) => {
-        const { workspace_id, ...rest } = page
-        const documentInfo = {
-          workspace_id,
-          page: rest,
-          credential_id: currentCredentialId,
-        }
-        datasourceInfoList.push(documentInfo)
-      })
-    }
-    if (datasourceType === DatasourceType.websiteCrawl) {
-      const {
-        websitePages,
-      } = dataSourceStore.getState()
-      websitePages.forEach((websitePage) => {
-        datasourceInfoList.push({
-          ...websitePage,
-          credential_id: currentCredentialId,
-        })
-      })
-    }
-    if (datasourceType === DatasourceType.onlineDrive) {
-      const {
-        bucket,
-        selectedFileIds,
-        onlineDriveFileList,
-      } = dataSourceStore.getState()
-      selectedFileIds.forEach((id) => {
-        const file = onlineDriveFileList.find(file => file.id === id)
-        datasourceInfoList.push({
-          bucket,
-          id: file?.id,
-          name: file?.name,
-          type: file?.type,
-          credential_id: currentCredentialId,
-        })
-      })
-    }
-    await runPublishedPipeline({
-      pipeline_id: pipelineId!,
-      inputs: data,
-      start_node_id: datasource.nodeId,
-      datasource_type: datasourceType as DatasourceType,
-      datasource_info_list: datasourceInfoList,
-      is_preview: false,
-    }, {
-      onSuccess: (res) => {
-        setBatchId((res as PublishedPipelineRunResponse).batch || '')
-        setDocuments((res as PublishedPipelineRunResponse).documents || [])
-        handleNextStep()
-        trackEvent('dataset_document_added', {
-          data_source_type: datasourceType,
-          indexing_technique: 'pipeline',
-        })
-      },
-    })
-  }, [dataSourceStore, datasource, datasourceType, handleNextStep, pipelineId, runPublishedPipeline])
-
-  const onClickProcess = useCallback(() => {
-    isPreview.current = false
-    formRef.current?.submit()
-  }, [])
-
-  const onClickPreview = useCallback(() => {
-    isPreview.current = true
-    formRef.current?.submit()
-  }, [])
-
-  const handleSubmit = useCallback((data: Record<string, any>) => {
-    if (isPreview.current)
-      handlePreviewChunks(data)
-    else
-      handleProcess(data)
-  }, [handlePreviewChunks, handleProcess])
-
-  const handlePreviewFileChange = useCallback((file: DocumentItem) => {
-    const { previewLocalFileRef } = dataSourceStore.getState()
-    previewLocalFileRef.current = file
-    onClickPreview()
-  }, [dataSourceStore, onClickPreview])
-
-  const handlePreviewOnlineDocumentChange = useCallback((page: NotionPage) => {
-    const { previewOnlineDocumentRef } = dataSourceStore.getState()
-    previewOnlineDocumentRef.current = page
-    onClickPreview()
-  }, [dataSourceStore, onClickPreview])
-
-  const handlePreviewWebsiteChange = useCallback((website: CrawlResultItem) => {
-    const { previewWebsitePageRef } = dataSourceStore.getState()
-    previewWebsitePageRef.current = website
-    onClickPreview()
-  }, [dataSourceStore, onClickPreview])
-
-  const handlePreviewOnlineDriveFileChange = useCallback((file: OnlineDriveFile) => {
-    const { previewOnlineDriveFileRef } = dataSourceStore.getState()
-    previewOnlineDriveFileRef.current = file
-    onClickPreview()
-  }, [dataSourceStore, onClickPreview])
-
-  const handleSelectAll = useCallback(() => {
-    const {
-      onlineDocuments,
-      onlineDriveFileList,
-      selectedFileIds,
-      setOnlineDocuments,
-      setSelectedFileIds,
-      setSelectedPagesId,
-    } = dataSourceStore.getState()
-    if (datasourceType === DatasourceType.onlineDocument) {
-      const allIds = currentWorkspace?.pages.map(page => page.page_id) || []
-      if (onlineDocuments.length < allIds.length) {
-        const selectedPages = Array.from(allIds).map(pageId => PagesMapAndSelectedPagesId[pageId])
-        setOnlineDocuments(selectedPages)
-        setSelectedPagesId(new Set(allIds))
-      }
-      else {
-        setOnlineDocuments([])
-        setSelectedPagesId(new Set())
-      }
-    }
-    if (datasourceType === DatasourceType.onlineDrive) {
-      const allKeys = onlineDriveFileList.filter((item) => {
-        return item.type !== 'bucket'
-      }).map(file => file.id)
-      if (selectedFileIds.length < allKeys.length)
-        setSelectedFileIds(allKeys)
-      else
-        setSelectedFileIds([])
-    }
-  }, [PagesMapAndSelectedPagesId, currentWorkspace?.pages, dataSourceStore, datasourceType])
-
-  const clearDataSourceData = useCallback((dataSource: Datasource) => {
-    const providerType = dataSource.nodeData.provider_type
-    if (providerType === DatasourceType.onlineDocument)
-      clearOnlineDocumentData()
-    else if (providerType === DatasourceType.websiteCrawl)
-      clearWebsiteCrawlData()
-    else if (providerType === DatasourceType.onlineDrive)
-      clearOnlineDriveData()
-  }, [clearOnlineDocumentData, clearOnlineDriveData, clearWebsiteCrawlData])
-
-  const handleSwitchDataSource = useCallback((dataSource: Datasource) => {
-    const {
-      setCurrentCredentialId,
-      currentNodeIdRef,
-    } = dataSourceStore.getState()
-    clearDataSourceData(dataSource)
-    setCurrentCredentialId('')
-    currentNodeIdRef.current = dataSource.nodeId
-    setDatasource(dataSource)
-  }, [clearDataSourceData, dataSourceStore])
-
-  const handleCredentialChange = useCallback((credentialId: string) => {
-    const { setCurrentCredentialId } = dataSourceStore.getState()
-    clearDataSourceData(datasource!)
-    setCurrentCredentialId(credentialId)
-  }, [clearDataSourceData, dataSourceStore, datasource])
-
-  if (isFetchingPipelineInfo) {
-    return (
-      <Loading type="app" />
-    )
-  }
+  if (isFetchingPipelineInfo)
+    return <Loading type="app" />

  return (
-    <div
-      className="relative flex h-[calc(100vh-56px)] w-full min-w-[1024px] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle"
-    >
+    <div className="relative flex h-[calc(100vh-56px)] w-full min-w-[1024px] overflow-x-auto rounded-t-2xl border-t border-effects-highlight bg-background-default-subtle">
      <div className="h-full min-w-0 flex-1">
        <div className="flex h-full flex-col px-14">
          <LeftHeader
@@ -472,139 +187,77 @@ const CreateFormPipeline = () => {
            currentStep={currentStep}
          />
          <div className="grow overflow-y-auto">
-            {
-              currentStep === 1 && (
-                <div className="flex flex-col gap-y-5 pt-4">
-                  <DataSourceOptions
-                    datasourceNodeId={datasource?.nodeId || ''}
-                    onSelect={handleSwitchDataSource}
-                    pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
-                  />
-                  {datasourceType === DatasourceType.localFile && (
-                    <LocalFile
-                      allowedExtensions={datasource!.nodeData.fileExtensions || []}
-                      supportBatchUpload={supportBatchUpload}
-                    />
-                  )}
-                  {datasourceType === DatasourceType.onlineDocument && (
-                    <OnlineDocuments
-                      nodeId={datasource!.nodeId}
-                      nodeData={datasource!.nodeData}
-                      onCredentialChange={handleCredentialChange}
-                    />
-                  )}
-                  {datasourceType === DatasourceType.websiteCrawl && (
-                    <WebsiteCrawl
-                      nodeId={datasource!.nodeId}
-                      nodeData={datasource!.nodeData}
-                      onCredentialChange={handleCredentialChange}
-                    />
-                  )}
-                  {datasourceType === DatasourceType.onlineDrive && (
-                    <OnlineDrive
-                      nodeId={datasource!.nodeId}
-                      nodeData={datasource!.nodeData}
-                      onCredentialChange={handleCredentialChange}
-                    />
-                  )}
-                  {isShowVectorSpaceFull && (
-                    <VectorSpaceFull />
-                  )}
-                  <Actions
-                    showSelect={showSelect}
-                    totalOptions={totalOptions}
-                    selectedOptions={selectedOptions}
-                    onSelectAll={handleSelectAll}
-                    disabled={nextBtnDisabled}
-                    handleNextStep={handleNextStep}
-                    tip={tip}
-                  />
-                  {
-                    !supportBatchUpload && datasourceType === DatasourceType.localFile && localFileList.length > 0 && (
-                      <>
-                        <Divider type="horizontal" className="my-4 h-px bg-divider-subtle" />
-                        <UpgradeCard />
-                      </>
-                    )
-                  }
-                </div>
-              )
-            }
-            {
-              currentStep === 2 && (
-                <ProcessDocuments
-                  ref={formRef}
-                  dataSourceNodeId={datasource!.nodeId}
-                  isRunning={isPending}
-                  onProcess={onClickProcess}
-                  onPreview={onClickPreview}
-                  onSubmit={handleSubmit}
-                  onBack={handleBackStep}
-                />
-              )
-            }
-            {
-              currentStep === 3 && (
-                <Processing
-                  batchId={batchId}
-                  documents={documents}
-                />
-              )
-            }
+            {currentStep === 1 && (
+              <StepOneContent
+                datasource={datasource}
+                datasourceType={datasourceType}
+                pipelineNodes={(pipelineInfo?.graph.nodes || []) as Node<DataSourceNodeType>[]}
+                supportBatchUpload={supportBatchUpload}
+                localFileListLength={localFileList.length}
+                isShowVectorSpaceFull={isShowVectorSpaceFull}
+                showSelect={showSelect}
+                totalOptions={totalOptions}
+                selectedOptions={selectedOptions}
+                tip={tip}
+                nextBtnDisabled={nextBtnDisabled}
+                onSelectDataSource={handleSwitchDataSource}
+                onCredentialChange={handleCredentialChange}
+                onSelectAll={handleSelectAll}
+                onNextStep={handleNextStep}
+              />
+            )}
+            {currentStep === 2 && (
+              <StepTwoContent
+                formRef={formRef}
+                dataSourceNodeId={datasource!.nodeId}
+                isRunning={isPending}
+                onProcess={onClickProcess}
+                onPreview={onClickPreview}
+                onSubmit={handleSubmit}
+                onBack={handleBackStep}
+              />
+            )}
+            {currentStep === 3 && (
+              <StepThreeContent
+                batchId={batchId}
+                documents={documents}
+              />
+            )}
          </div>
        </div>
      </div>
-      {/* Preview */}
-      {
-        currentStep === 1 && (
-          <div className="h-full min-w-0 flex-1">
-            <div className="flex h-full flex-col pl-2 pt-2">
-              {currentLocalFile && (
-                <FilePreview
-                  file={currentLocalFile}
-                  hidePreview={hidePreviewLocalFile}
-                />
-              )}
-              {currentDocument && (
-                <OnlineDocumentPreview
-                  datasourceNodeId={datasource!.nodeId}
-                  currentPage={currentDocument}
-                  hidePreview={hidePreviewOnlineDocument}
-                />
-              )}
-              {currentWebsite && (
-                <WebsitePreview
-                  currentWebsite={currentWebsite}
-                  hidePreview={hideWebsitePreview}
-                />
-              )}
-            </div>
-          </div>
-        )
-      }
-      {
-        currentStep === 2 && (
-          <div className="h-full min-w-0 flex-1">
-            <div className="flex h-full flex-col pl-2 pt-2">
-              <ChunkPreview
-                dataSourceType={datasourceType as DatasourceType}
-                localFiles={localFileList.map(file => file.file)}
-                onlineDocuments={onlineDocuments}
-                websitePages={websitePages}
-                onlineDriveFiles={selectedOnlineDriveFileList}
-                isIdle={isIdle}
-                isPending={isPending && isPreview.current}
-                estimateData={estimateData}
-                onPreview={onClickPreview}
-                handlePreviewFileChange={handlePreviewFileChange}
-                handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
-                handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
-                handlePreviewOnlineDriveFileChange={handlePreviewOnlineDriveFileChange}
-              />
-            </div>
-          </div>
-        )
-      }
+
+      {/* Preview Panel */}
+      {currentStep === 1 && (
+        <StepOnePreview
+          datasource={datasource}
+          currentLocalFile={currentLocalFile}
+          currentDocument={currentDocument}
+          currentWebsite={currentWebsite}
+          hidePreviewLocalFile={hidePreviewLocalFile}
+          hidePreviewOnlineDocument={hidePreviewOnlineDocument}
+          hideWebsitePreview={hideWebsitePreview}
+        />
+      )}
+      {currentStep === 2 && (
+        <StepTwoPreview
+          datasourceType={datasourceType}
+          localFileList={localFileList}
+          onlineDocuments={onlineDocuments}
+          websitePages={websitePages}
+          selectedOnlineDriveFileList={selectedOnlineDriveFileList}
+          isIdle={isIdle}
+          isPendingPreview={isPending && isPreview.current}
+          estimateData={estimateData}
+          onPreview={onClickPreview}
+          handlePreviewFileChange={handlePreviewFileChange}
+          handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
+          handlePreviewWebsitePageChange={handlePreviewWebsiteChange}
+          handlePreviewOnlineDriveFileChange={handlePreviewOnlineDriveFileChange}
+        />
+      )}
+
+      {/* Plan Upgrade Modal */}
      {isShowPlanUpgradeModal && (
        <PlanUpgradeModal
          show
--- a/web/app/components/datasets/documents/create-from-pipeline/steps/index.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/steps/index.ts
@@ -0,0 +1,3 @@
+export { default as StepOneContent } from './step-one-content'
+export { default as StepThreeContent } from './step-three-content'
+export { default as StepTwoContent } from './step-two-content'
--- a/web/app/components/datasets/documents/create-from-pipeline/steps/preview-panel.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/steps/preview-panel.tsx
@@ -0,0 +1,112 @@
+'use client'
+import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
+import type { NotionPage } from '@/models/common'
+import type { CrawlResultItem, CustomFile, DocumentItem, FileIndexingEstimateResponse, FileItem } from '@/models/datasets'
+import type { DatasourceType, OnlineDriveFile } from '@/models/pipeline'
+import { memo } from 'react'
+import ChunkPreview from '../preview/chunk-preview'
+import FilePreview from '../preview/file-preview'
+import OnlineDocumentPreview from '../preview/online-document-preview'
+import WebsitePreview from '../preview/web-preview'
+
+type StepOnePreviewProps = {
+  datasource: Datasource | undefined
+  currentLocalFile: CustomFile | undefined
+  currentDocument: (NotionPage & { workspace_id: string }) | undefined
+  currentWebsite: CrawlResultItem | undefined
+  hidePreviewLocalFile: () => void
+  hidePreviewOnlineDocument: () => void
+  hideWebsitePreview: () => void
+}
+
+export const StepOnePreview = memo(({
+  datasource,
+  currentLocalFile,
+  currentDocument,
+  currentWebsite,
+  hidePreviewLocalFile,
+  hidePreviewOnlineDocument,
+  hideWebsitePreview,
+}: StepOnePreviewProps) => {
+  return (
+    <div className="h-full min-w-0 flex-1">
+      <div className="flex h-full flex-col pl-2 pt-2">
+        {currentLocalFile && (
+          <FilePreview
+            file={currentLocalFile}
+            hidePreview={hidePreviewLocalFile}
+          />
+        )}
+        {currentDocument && (
+          <OnlineDocumentPreview
+            datasourceNodeId={datasource!.nodeId}
+            currentPage={currentDocument}
+            hidePreview={hidePreviewOnlineDocument}
+          />
+        )}
+        {currentWebsite && (
+          <WebsitePreview
+            currentWebsite={currentWebsite}
+            hidePreview={hideWebsitePreview}
+          />
+        )}
+      </div>
+    </div>
+  )
+})
+StepOnePreview.displayName = 'StepOnePreview'
+
+type StepTwoPreviewProps = {
+  datasourceType: string | undefined
+  localFileList: FileItem[]
+  onlineDocuments: (NotionPage & { workspace_id: string })[]
+  websitePages: CrawlResultItem[]
+  selectedOnlineDriveFileList: OnlineDriveFile[]
+  isIdle: boolean
+  isPendingPreview: boolean
+  estimateData: FileIndexingEstimateResponse | undefined
+  onPreview: () => void
+  handlePreviewFileChange: (file: DocumentItem) => void
+  handlePreviewOnlineDocumentChange: (page: NotionPage) => void
+  handlePreviewWebsitePageChange: (website: CrawlResultItem) => void
+  handlePreviewOnlineDriveFileChange: (file: OnlineDriveFile) => void
+}
+
+export const StepTwoPreview = memo(({
+  datasourceType,
+  localFileList,
+  onlineDocuments,
+  websitePages,
+  selectedOnlineDriveFileList,
+  isIdle,
+  isPendingPreview,
+  estimateData,
+  onPreview,
+  handlePreviewFileChange,
+  handlePreviewOnlineDocumentChange,
+  handlePreviewWebsitePageChange,
+  handlePreviewOnlineDriveFileChange,
+}: StepTwoPreviewProps) => {
+  return (
+    <div className="h-full min-w-0 flex-1">
+      <div className="flex h-full flex-col pl-2 pt-2">
+        <ChunkPreview
+          dataSourceType={datasourceType as DatasourceType}
+          localFiles={localFileList.map(file => file.file)}
+          onlineDocuments={onlineDocuments}
+          websitePages={websitePages}
+          onlineDriveFiles={selectedOnlineDriveFileList}
+          isIdle={isIdle}
+          isPending={isPendingPreview}
+          estimateData={estimateData}
+          onPreview={onPreview}
+          handlePreviewFileChange={handlePreviewFileChange}
+          handlePreviewOnlineDocumentChange={handlePreviewOnlineDocumentChange}
+          handlePreviewWebsitePageChange={handlePreviewWebsitePageChange}
+          handlePreviewOnlineDriveFileChange={handlePreviewOnlineDriveFileChange}
+        />
+      </div>
+    </div>
+  )
+})
+StepTwoPreview.displayName = 'StepTwoPreview'
--- a/web/app/components/datasets/documents/create-from-pipeline/steps/step-one-content.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/steps/step-one-content.tsx
@@ -0,0 +1,110 @@
+'use client'
+import type { Datasource } from '@/app/components/rag-pipeline/components/panel/test-run/types'
+import type { DataSourceNodeType } from '@/app/components/workflow/nodes/data-source/types'
+import type { Node } from '@/app/components/workflow/types'
+import { memo } from 'react'
+import Divider from '@/app/components/base/divider'
+import VectorSpaceFull from '@/app/components/billing/vector-space-full'
+import LocalFile from '@/app/components/datasets/documents/create-from-pipeline/data-source/local-file'
+import OnlineDocuments from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-documents'
+import OnlineDrive from '@/app/components/datasets/documents/create-from-pipeline/data-source/online-drive'
+import WebsiteCrawl from '@/app/components/datasets/documents/create-from-pipeline/data-source/website-crawl'
+import { DatasourceType } from '@/models/pipeline'
+import UpgradeCard from '../../../create/step-one/upgrade-card'
+import Actions from '../actions'
+import DataSourceOptions from '../data-source-options'
+
+type StepOneContentProps = {
+  datasource: Datasource | undefined
+  datasourceType: string | undefined
+  pipelineNodes: Node<DataSourceNodeType>[]
+  supportBatchUpload: boolean
+  localFileListLength: number
+  isShowVectorSpaceFull: boolean
+  showSelect: boolean
+  totalOptions: number | undefined
+  selectedOptions: number | undefined
+  tip: string
+  nextBtnDisabled: boolean
+  onSelectDataSource: (dataSource: Datasource) => void
+  onCredentialChange: (credentialId: string) => void
+  onSelectAll: () => void
+  onNextStep: () => void
+}
+
+const StepOneContent = ({
+  datasource,
+  datasourceType,
+  pipelineNodes,
+  supportBatchUpload,
+  localFileListLength,
+  isShowVectorSpaceFull,
+  showSelect,
+  totalOptions,
+  selectedOptions,
+  tip,
+  nextBtnDisabled,
+  onSelectDataSource,
+  onCredentialChange,
+  onSelectAll,
+  onNextStep,
+}: StepOneContentProps) => {
+  const showUpgradeCard = !supportBatchUpload
+    && datasourceType === DatasourceType.localFile
+    && localFileListLength > 0
+
+  return (
+    <div className="flex flex-col gap-y-5 pt-4">
+      <DataSourceOptions
+        datasourceNodeId={datasource?.nodeId || ''}
+        onSelect={onSelectDataSource}
+        pipelineNodes={pipelineNodes}
+      />
+      {datasourceType === DatasourceType.localFile && (
+        <LocalFile
+          allowedExtensions={datasource!.nodeData.fileExtensions || []}
+          supportBatchUpload={supportBatchUpload}
+        />
+      )}
+      {datasourceType === DatasourceType.onlineDocument && (
+        <OnlineDocuments
+          nodeId={datasource!.nodeId}
+          nodeData={datasource!.nodeData}
+          onCredentialChange={onCredentialChange}
+        />
+      )}
+      {datasourceType === DatasourceType.websiteCrawl && (
+        <WebsiteCrawl
+          nodeId={datasource!.nodeId}
+          nodeData={datasource!.nodeData}
+          onCredentialChange={onCredentialChange}
+        />
+      )}
+      {datasourceType === DatasourceType.onlineDrive && (
+        <OnlineDrive
+          nodeId={datasource!.nodeId}
+          nodeData={datasource!.nodeData}
+          onCredentialChange={onCredentialChange}
+        />
+      )}
+      {isShowVectorSpaceFull && <VectorSpaceFull />}
+      <Actions
+        showSelect={showSelect}
+        totalOptions={totalOptions}
+        selectedOptions={selectedOptions}
+        onSelectAll={onSelectAll}
+        disabled={nextBtnDisabled}
+        handleNextStep={onNextStep}
+        tip={tip}
+      />
+      {showUpgradeCard && (
+        <>
+          <Divider type="horizontal" className="my-4 h-px bg-divider-subtle" />
+          <UpgradeCard />
+        </>
+      )}
+    </div>
+  )
+}
+
+export default memo(StepOneContent)
--- a/web/app/components/datasets/documents/create-from-pipeline/steps/step-three-content.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/steps/step-three-content.tsx
@@ -0,0 +1,23 @@
+'use client'
+import type { InitialDocumentDetail } from '@/models/pipeline'
+import { memo } from 'react'
+import Processing from '../processing'
+
+type StepThreeContentProps = {
+  batchId: string
+  documents: InitialDocumentDetail[]
+}
+
+const StepThreeContent = ({
+  batchId,
+  documents,
+}: StepThreeContentProps) => {
+  return (
+    <Processing
+      batchId={batchId}
+      documents={documents}
+    />
+  )
+}
+
+export default memo(StepThreeContent)
--- a/web/app/components/datasets/documents/create-from-pipeline/steps/step-two-content.tsx
+++ b/web/app/components/datasets/documents/create-from-pipeline/steps/step-two-content.tsx
@@ -0,0 +1,38 @@
+'use client'
+import type { RefObject } from 'react'
+import { memo } from 'react'
+import ProcessDocuments from '../process-documents'
+
+type StepTwoContentProps = {
+  formRef: RefObject<{ submit: () => void } | null>
+  dataSourceNodeId: string
+  isRunning: boolean
+  onProcess: () => void
+  onPreview: () => void
+  onSubmit: (data: Record<string, unknown>) => void
+  onBack: () => void
+}
+
+const StepTwoContent = ({
+  formRef,
+  dataSourceNodeId,
+  isRunning,
+  onProcess,
+  onPreview,
+  onSubmit,
+  onBack,
+}: StepTwoContentProps) => {
+  return (
+    <ProcessDocuments
+      ref={formRef}
+      dataSourceNodeId={dataSourceNodeId}
+      isRunning={isRunning}
+      onProcess={onProcess}
+      onPreview={onPreview}
+      onSubmit={onSubmit}
+      onBack={onBack}
+    />
+  )
+}
+
+export default memo(StepTwoContent)
--- a/web/app/components/datasets/documents/create-from-pipeline/utils/datasource-info-builder.ts
+++ b/web/app/components/datasets/documents/create-from-pipeline/utils/datasource-info-builder.ts
@@ -0,0 +1,63 @@
+import type { NotionPage } from '@/models/common'
+import type { CrawlResultItem, CustomFile as File } from '@/models/datasets'
+import type { OnlineDriveFile } from '@/models/pipeline'
+import { TransferMethod } from '@/types/app'
+
+/**
+ * Build datasource info for local files
+ */
+export const buildLocalFileDatasourceInfo = (
+  file: File,
+  credentialId: string,
+): Record<string, unknown> => ({
+  related_id: file.id,
+  name: file.name,
+  type: file.type,
+  size: file.size,
+  extension: file.extension,
+  mime_type: file.mime_type,
+  url: '',
+  transfer_method: TransferMethod.local_file,
+  credential_id: credentialId,
+})
+
+/**
+ * Build datasource info for online documents
+ */
+export const buildOnlineDocumentDatasourceInfo = (
+  page: NotionPage & { workspace_id: string },
+  credentialId: string,
+): Record<string, unknown> => {
+  const { workspace_id, ...rest } = page
+  return {
+    workspace_id,
+    page: rest,
+    credential_id: credentialId,
+  }
+}
+
+/**
+ * Build datasource info for website crawl
+ */
+export const buildWebsiteCrawlDatasourceInfo = (
+  page: CrawlResultItem,
+  credentialId: string,
+): Record<string, unknown> => ({
+  ...page,
+  credential_id: credentialId,
+})
+
+/**
+ * Build datasource info for online drive
+ */
+export const buildOnlineDriveDatasourceInfo = (
+  file: OnlineDriveFile,
+  bucket: string,
+  credentialId: string,
+): Record<string, unknown> => ({
+  bucket,
+  id: file.id,
+  name: file.name,
+  type: file.type,
+  credential_id: credentialId,
+})
--- a/web/app/components/datasets/documents/detail/completed/index.tsx
+++ b/web/app/components/datasets/documents/detail/completed/index.tsx
@@ -442,6 +442,10 @@ const Completed: FC<ICompletedProps> = ({
    setFullScreen(!fullScreen)
  }, [fullScreen])

+  const toggleCollapsed = useCallback(() => {
+    setIsCollapsed(prev => !prev)
+  }, [])
+
  const viewNewlyAddedChunk = useCallback(async () => {
    const totalPages = segmentListData?.total_pages || 0
    const total = segmentListData?.total || 0
@@ -578,15 +582,16 @@ const Completed: FC<ICompletedProps> = ({
    return selectedStatus ? 1 : 0
  }, [selectedStatus])

+  const contextValue = useMemo<SegmentListContextValue>(() => ({
+    isCollapsed,
+    fullScreen,
+    toggleFullScreen,
+    currSegment,
+    currChildChunk,
+  }), [isCollapsed, fullScreen, toggleFullScreen, currSegment, currChildChunk])
+
  return (
-    <SegmentListContext.Provider value={{
-      isCollapsed,
-      fullScreen,
-      toggleFullScreen,
-      currSegment,
-      currChildChunk,
-    }}
-    >
+    <SegmentListContext.Provider value={contextValue}>
      {/* Menu Bar */}
      {!isFullDocMode && (
        <div className={s.docSearchWrapper}>
@@ -618,7 +623,7 @@ const Completed: FC<ICompletedProps> = ({
            onClear={() => handleInputChange('')}
          />
          <Divider type="vertical" className="mx-3 h-3.5" />
-          <DisplayToggle isCollapsed={isCollapsed} toggleCollapsed={() => setIsCollapsed(!isCollapsed)} />
+          <DisplayToggle isCollapsed={isCollapsed} toggleCollapsed={toggleCollapsed} />
        </div>
      )}
      {/* Segment list */}
--- a/web/app/components/datasets/documents/detail/completed/segment-card/chunk-content.tsx
+++ b/web/app/components/datasets/documents/detail/completed/segment-card/chunk-content.tsx
@@ -1,4 +1,5 @@
 import type { FC } from 'react'
+import type { SegmentListContextValue } from '..'
 import * as React from 'react'
 import { Markdown } from '@/app/components/base/markdown'
 import { cn } from '@/utils/classnames'
@@ -14,13 +15,15 @@ type ChunkContentProps = {
  className?: string
 }

+const selectIsCollapsed = (s: SegmentListContextValue) => s.isCollapsed
+
 const ChunkContent: FC<ChunkContentProps> = ({
  detail,
  isFullDocMode,
  className,
 }) => {
  const { answer, content, sign_content } = detail
-  const isCollapsed = useSegmentListContext(s => s.isCollapsed)
+  const isCollapsed = useSegmentListContext(selectIsCollapsed)

  if (answer) {
    return (
--- a/web/app/components/datasets/documents/detail/index.tsx
+++ b/web/app/components/datasets/documents/detail/index.tsx
@@ -18,7 +18,7 @@ import { useDocumentDetail, useDocumentMetadata, useInvalidDocumentList } from '
 import { useCheckSegmentBatchImportProgress, useChildSegmentListKey, useSegmentBatchImport, useSegmentListKey } from '@/service/knowledge/use-segment'
 import { useInvalid } from '@/service/use-base'
 import { cn } from '@/utils/classnames'
-import Operations from '../operations'
+import Operations from '../components/operations'
 import StatusItem from '../status-item'
 import BatchModal from './batch-modal'
 import Completed from './completed'
--- a/web/app/components/datasets/documents/hooks/use-documents-page-state.ts
+++ b/web/app/components/datasets/documents/hooks/use-documents-page-state.ts
@@ -0,0 +1,197 @@
+import type { DocumentListResponse } from '@/models/datasets'
+import type { SortType } from '@/service/datasets'
+import { useDebounce, useDebounceFn } from 'ahooks'
+import { useCallback, useEffect, useMemo, useState } from 'react'
+import { normalizeStatusForQuery, sanitizeStatusValue } from '../status-filter'
+import useDocumentListQueryState from './use-document-list-query-state'
+
+/**
+ * Custom hook to manage documents page state including:
+ * - Search state (input value, debounced search value)
+ * - Filter state (status filter, sort value)
+ * - Pagination state (current page, limit)
+ * - Selection state (selected document ids)
+ * - Polling state (timer control for auto-refresh)
+ */
+export function useDocumentsPageState() {
+  const { query, updateQuery } = useDocumentListQueryState()
+
+  // Search state
+  const [inputValue, setInputValue] = useState<string>('')
+  const [searchValue, setSearchValue] = useState<string>('')
+  const debouncedSearchValue = useDebounce(searchValue, { wait: 500 })
+
+  // Filter & sort state
+  const [statusFilterValue, setStatusFilterValue] = useState<string>(() => sanitizeStatusValue(query.status))
+  const [sortValue, setSortValue] = useState<SortType>(query.sort)
+  const normalizedStatusFilterValue = useMemo(
+    () => normalizeStatusForQuery(statusFilterValue),
+    [statusFilterValue],
+  )
+
+  // Pagination state
+  const [currPage, setCurrPage] = useState<number>(query.page - 1)
+  const [limit, setLimit] = useState<number>(query.limit)
+
+  // Selection state
+  const [selectedIds, setSelectedIds] = useState<string[]>([])
+
+  // Polling state
+  const [timerCanRun, setTimerCanRun] = useState(true)
+
+  // Initialize search value from URL on mount
+  useEffect(() => {
+    if (query.keyword) {
+      setInputValue(query.keyword)
+      setSearchValue(query.keyword)
+    }
+  }, []) // Only run on mount
+
+  // Sync local state with URL query changes
+  useEffect(() => {
+    setCurrPage(query.page - 1)
+    setLimit(query.limit)
+    if (query.keyword !== searchValue) {
+      setInputValue(query.keyword)
+      setSearchValue(query.keyword)
+    }
+    setStatusFilterValue((prev) => {
+      const nextValue = sanitizeStatusValue(query.status)
+      return prev === nextValue ? prev : nextValue
+    })
+    setSortValue(query.sort)
+  }, [query])
+
+  // Update URL when search changes
+  useEffect(() => {
+    if (debouncedSearchValue !== query.keyword) {
+      setCurrPage(0)
+      updateQuery({ keyword: debouncedSearchValue, page: 1 })
+    }
+  }, [debouncedSearchValue, query.keyword, updateQuery])
+
+  // Clear selection when search changes
+  useEffect(() => {
+    if (searchValue !== query.keyword)
+      setSelectedIds([])
+  }, [searchValue, query.keyword])
+
+  // Clear selection when status filter changes
+  useEffect(() => {
+    setSelectedIds([])
+  }, [normalizedStatusFilterValue])
+
+  // Page change handler
+  const handlePageChange = useCallback((newPage: number) => {
+    setCurrPage(newPage)
+    updateQuery({ page: newPage + 1 })
+  }, [updateQuery])
+
+  // Limit change handler
+  const handleLimitChange = useCallback((newLimit: number) => {
+    setLimit(newLimit)
+    setCurrPage(0)
+    updateQuery({ limit: newLimit, page: 1 })
+  }, [updateQuery])
+
+  // Debounced search handler
+  const { run: handleSearch } = useDebounceFn(() => {
+    setSearchValue(inputValue)
+  }, { wait: 500 })
+
+  // Input change handler
+  const handleInputChange = useCallback((value: string) => {
+    setInputValue(value)
+    handleSearch()
+  }, [handleSearch])
+
+  // Status filter change handler
+  const handleStatusFilterChange = useCallback((value: string) => {
+    const selectedValue = sanitizeStatusValue(value)
+    setStatusFilterValue(selectedValue)
+    setCurrPage(0)
+    updateQuery({ status: selectedValue, page: 1 })
+  }, [updateQuery])
+
+  // Status filter clear handler
+  const handleStatusFilterClear = useCallback(() => {
+    if (statusFilterValue === 'all')
+      return
+    setStatusFilterValue('all')
+    setCurrPage(0)
+    updateQuery({ status: 'all', page: 1 })
+  }, [statusFilterValue, updateQuery])
+
+  // Sort change handler
+  const handleSortChange = useCallback((value: string) => {
+    const next = value as SortType
+    if (next === sortValue)
+      return
+    setSortValue(next)
+    setCurrPage(0)
+    updateQuery({ sort: next, page: 1 })
+  }, [sortValue, updateQuery])
+
+  // Update polling state based on documents response
+  const updatePollingState = useCallback((documentsRes: DocumentListResponse | undefined) => {
+    if (!documentsRes?.data)
+      return
+
+    let completedNum = 0
+    documentsRes.data.forEach((documentItem) => {
+      const { indexing_status } = documentItem
+      const isEmbedded = indexing_status === 'completed' || indexing_status === 'paused' || indexing_status === 'error'
+      if (isEmbedded)
+        completedNum++
+    })
+
+    const hasIncompleteDocuments = completedNum !== documentsRes.data.length
+    const transientStatuses = ['queuing', 'indexing', 'paused']
+    const shouldForcePolling = normalizedStatusFilterValue === 'all'
+      ? false
+      : transientStatuses.includes(normalizedStatusFilterValue)
+    setTimerCanRun(shouldForcePolling || hasIncompleteDocuments)
+  }, [normalizedStatusFilterValue])
+
+  // Adjust page when total pages change
+  const adjustPageForTotal = useCallback((documentsRes: DocumentListResponse | undefined) => {
+    if (!documentsRes)
+      return
+    const totalPages = Math.ceil(documentsRes.total / limit)
+    if (currPage > 0 && currPage + 1 > totalPages)
+      handlePageChange(totalPages > 0 ? totalPages - 1 : 0)
+  }, [limit, currPage, handlePageChange])
+
+  return {
+    // Search state
+    inputValue,
+    searchValue,
+    debouncedSearchValue,
+    handleInputChange,
+
+    // Filter & sort state
+    statusFilterValue,
+    sortValue,
+    normalizedStatusFilterValue,
+    handleStatusFilterChange,
+    handleStatusFilterClear,
+    handleSortChange,
+
+    // Pagination state
+    currPage,
+    limit,
+    handlePageChange,
+    handleLimitChange,
+
+    // Selection state
+    selectedIds,
+    setSelectedIds,
+
+    // Polling state
+    timerCanRun,
+    updatePollingState,
+    adjustPageForTotal,
+  }
+}
+
+export default useDocumentsPageState
--- a/Show More
+++ b/Show More
Author	SHA1	Message	Date
yyh	a0aa8cdb45	Merge remote-tracking branch 'origin/main' into feature/task-quadrant-view	2026-01-16 18:20:29 +08:00
yyh	ae8618877b	fix(web): quadrant matrix i18n	2026-01-16 18:17:28 +08:00
가은 정	fad6fa141d	chore: improve accessibility for learn more link (#31120 ) Co-authored-by: khmandarrin <jeong-ga-eun@jeong-ga-eun-ui-MacBookAir.local>	2026-01-16 18:12:07 +08:00
Pádraic Slattery	30821fd26c	chore: Update outdated GitHub Actions versions (#31114 )	2026-01-16 17:56:55 +08:00
Xiangxuan Qu	1a9fdd9a65	refactor: migrate tag list API query parameters to Pydantic (#31097 ) Co-authored-by: fghpdf <fghpdf@users.noreply.github.com>	2026-01-16 17:49:52 +08:00
Stream	de610cbf39	fix: call `get_text_content()` instead of casting to `str` (#31121 ) Signed-off-by: Stream <Stream_2@qq.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-01-16 18:41:00 +09:00
yyh	1c55602445	fix(web): add calendar icon and DDL label to deadline badge in task-item	2026-01-16 17:24:11 +08:00
yyh	a3f1220d23	feat(web): add fullscreen expand mode to quadrant-matrix component - Add expand button in header to open FullScreenModal - Add numbered circles (1-4) to quadrant headers - Add expanded prop to show full content without line-clamp - Reorder grid layout: Q1 top-left, Q2 top-right, Q3 bottom-left, Q4 bottom-right - Remove axis labels for cleaner design	2026-01-16 17:16:13 +08:00
yyh	d62e16b9bb	fix(web): improve quadrant-matrix layout and text overflow handling - Simplify axis label layout with horizontal/vertical arrangement - Add proper text truncation with line-clamp and tooltips - Fix overflow issues by adding min-w-0 on flex children - Move scores inline with task name for compact display - Add task count badge to quadrant headers - Reduce maxDisplay to 3 for better density	2026-01-16 16:58:57 +08:00
yyh	13f2a43ccc	feat(web): add Eisenhower Matrix visualization component for task quadrants Add a new quadrant-matrix component that renders tasks in a 2x2 grid based on importance and urgency scores. Integrate with code-block as a new 'quadrant' language type for markdown rendering.	2026-01-16 16:58:56 +08:00
yyh	6903c31b84	fix(search-input): retain focus after clearing input (#31107 )	2026-01-16 16:22:14 +08:00
盐粒 Yanli	b2cc9b255d	chore: Update coding agent workflow for backend (#31093 )	2026-01-16 14:28:47 +08:00
XiaoBa	e9f0e1e839	fix(web): replace Response.json with legacy Response constructor for pre-Chrome 105 compatibility(#31091 ) (#31095 ) Co-authored-by: Xiaoba Yu <xb1823725853@gmail.com>	2026-01-16 14:26:23 +08:00
pavior	cd497a8c52	fix(web): use portal for variable picker in code editor (Fixes #31063 ) (#31066 )	2026-01-16 13:31:57 +08:00
Stephen Zhou	7aab4529e6	chore: lint for state hooks (#31088 )	2026-01-16 11:58:28 +08:00
E.G	4bff0cd0ab	fix: resolve 'Expand all chunks' button not working (#31074 ) Co-authored-by: GlobalStar117 <GlobalStar117@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: crazywoola <427733928@qq.com>	2026-01-16 11:34:42 +08:00
byteforge	c98870c3f4	refactor: always preserve marketplace search state in URL (#31069 ) Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>	2026-01-16 08:52:53 +09:00
Stephen Zhou	b06c7c8f33	ci: disable limit annotation (#31072 )	2026-01-15 23:04:26 +08:00
Stephen Zhou	1a2fce7055	ci: eslint annotation (#31056 )	2026-01-15 21:49:46 +08:00
lif	2b021e8752	fix: remove hardcoded 48-character limit from text inputs (#30156 ) Signed-off-by: majiayu000 <1835304752@qq.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2026-01-15 17:43:00 +08:00
wangxiaolei	4a197b9458	fix: fix log updated_at is refreshed (#31045 )	2026-01-15 15:42:46 +08:00
Xiyuan Chen	772ff636ec	feat: credential sync fix for enterprise edition (#30626 )	2026-01-14 23:33:24 -08:00
Stephen Zhou	ab1c5a2027	refactor: remove manual set query logic (#31039 )	2026-01-15 15:25:43 +08:00
hj24	33e99f069b	fix: message clean service ut (#31038 )	2026-01-15 15:13:25 +08:00
hj24	52af829f1f	refactor: enhance clean messages task (#29638 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: 非法操作 <hjlarry@163.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2026-01-15 14:03:17 +08:00
-LAN-	0ef8b5a0ca	chore: bump version to 1.11.4 (#30961 )	2026-01-15 11:36:15 +08:00
wangxiaolei	2bfc54314e	feat: single run add opentelemetry (#31020 )	2026-01-15 11:10:55 +08:00
Coding On Star	bdd8d5b470	test: add unit tests for PluginPage and related components (#30908 ) Co-authored-by: CodingOnStar <hanxujiang@dify.ai>	2026-01-15 10:56:02 +08:00
Joseph Adams	4955de5905	fix: validation error when uploading images with None URL values (#31012 ) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com>	2026-01-15 10:54:10 +08:00
yyh	3bee2ee067	refactor(contract): restructure console contracts with nested billing module (#30999 )	2026-01-15 10:41:18 +08:00
Stephen Zhou	328897f81c	build: require node 24.13.0 (#30945 )	2026-01-15 10:38:55 +08:00
Coding On Star	ab078380a3	feat(web): refactor documents component structure and enhance functionality (#30854 ) Co-authored-by: CodingOnStar <hanxujiang@dify.ai>	2026-01-15 10:33:58 +08:00
Coding On Star	a33ac77a22	feat: implement document creation pipeline with multi-step wizard and datasource management (#30843 ) Co-authored-by: CodingOnStar <hanxujiang@dify.ai>	2026-01-15 10:33:48 +08:00
Asuka Minato	d3923e7b56	refactor: port AppAnnotationHitHistory (#30922 ) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com>	2026-01-15 10:14:55 +08:00
Asuka Minato	2f633de45e	refactor: port TenantCreditPool (#30926 ) Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com>	2026-01-15 10:14:15 +08:00
wangxiaolei	98c88cec34	refactor: delete_endpoint should be idempotent (#30954 )	2026-01-15 10:10:10 +08:00
wangxiaolei	c6999fb5be	fix: fix plugin edit endpoint app disappear (#30951 )	2026-01-15 10:09:57 +08:00
Asuka Minato	f7f9a08fa5	refactor: port TidbAuthBinding( (#31006 ) Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com>	2026-01-15 10:07:02 +08:00
wangxiaolei	5008f5e89b	fix: Use raw SQL UPDATE to set read status without triggering updated… (#31015 )	2026-01-15 09:51:44 +08:00
wangxiaolei	1dd89a02ea	fix: fix missing id and message_id (#31008 )	2026-01-14 23:26:17 +09:00
盐粒 Yanli	5bf4114d6f	fix: increase name length limit in ExternalDatasetCreatePayload (#31000 ) Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Asuka Minato <i@asukaminato.eu.org>	2026-01-14 22:13:53 +09:00
yyh	a56e94ba8e	feat: add .agent/skills symlink and orpc-contract-first skill (#30968 )	2026-01-14 21:13:14 +08:00
Milad Rashidikhah	11f1782df0	fix: correct API Extension documentation link (#30962 )	2026-01-14 21:21:15 +09:00
wangxiaolei	8cf5d9a6a1	fix: fix Cannot destructure property 'name' of 'value' as it is undef… (#30991 )	2026-01-14 19:30:47 +08:00
wangxiaolei	0ec2b12e65	feat: allow pass hostname in docker env (#30975 )	2026-01-14 19:30:37 +08:00
Stephen Zhou	f33b1a3332	fix: redirect after login (#30985 )	2026-01-14 17:20:49 +08:00
kenwoodjw	08026f7399	fix(deps): security updates for pdfminer.six, authlib, werkzeug, aiohttp and others (#30976 ) Signed-off-by: kenwoodjw <blackxin55+@gmail.com>	2026-01-14 17:03:46 +08:00
yyh	18e051bd66	chore(web): remove unused demo service component (#30979 )	2026-01-14 17:03:35 +08:00
yyh	42f991dbef	chore(web): disable Serwist dev logs (#30980 )	2026-01-14 16:23:58 +08:00
yyh	b1b2c9636f	fix(web): preserve HTTP method in ORPC fetchCompat mode (#30971 ) Co-authored-by: Stephen Zhou <38493346+hyoban@users.noreply.github.com>	2026-01-14 16:18:12 +08:00
				`@@ -0,0 +1 @@`
				"""Unit tests for `controllers.console.datasets` controllers."""
@@ -1 +1 @@
 .21.1