mirror of
https://github.com/langgenius/dify.git
synced 2026-02-09 23:20:12 -05:00
feat: knowledge pipeline (#25360)
Signed-off-by: -LAN- <laipz8200@outlook.com> Co-authored-by: twwu <twwu@dify.ai> Co-authored-by: crazywoola <100913391+crazywoola@users.noreply.github.com> Co-authored-by: jyong <718720800@qq.com> Co-authored-by: Wu Tianwei <30284043+WTW0313@users.noreply.github.com> Co-authored-by: QuantumGhost <obelisk.reg+git@gmail.com> Co-authored-by: lyzno1 <yuanyouhuilyz@gmail.com> Co-authored-by: quicksand <quicksandzn@gmail.com> Co-authored-by: Jyong <76649700+JohnJyong@users.noreply.github.com> Co-authored-by: lyzno1 <92089059+lyzno1@users.noreply.github.com> Co-authored-by: zxhlyh <jasonapring2015@outlook.com> Co-authored-by: Yongtao Huang <yongtaoh2022@gmail.com> Co-authored-by: autofix-ci[bot] <114827586+autofix-ci[bot]@users.noreply.github.com> Co-authored-by: Joel <iamjoel007@gmail.com> Co-authored-by: Copilot <175728472+Copilot@users.noreply.github.com> Co-authored-by: nite-knite <nkCoding@gmail.com> Co-authored-by: Hanqing Zhao <sherry9277@gmail.com> Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> Co-authored-by: Harry <xh001x@hotmail.com>
This commit is contained in:
@@ -1,9 +1,12 @@
|
||||
import type { DataSourceNotionPage, DataSourceProvider } from './common'
|
||||
import type { AppIconType, AppMode, RetrievalConfig } from '@/types/app'
|
||||
import type { AppIconType, AppMode, RetrievalConfig, TransferMethod } from '@/types/app'
|
||||
import type { Tag } from '@/app/components/base/tag-management/constant'
|
||||
import type { IndexingType } from '@/app/components/datasets/create/step-two'
|
||||
import type { MetadataFilteringVariableType } from '@/app/components/workflow/nodes/knowledge-retrieval/types'
|
||||
import type { MetadataItemWithValue } from '@/app/components/datasets/metadata/types'
|
||||
import { ExternalKnowledgeBase, General, ParentChild, Qa } from '@/app/components/base/icons/src/public/knowledge/dataset-card'
|
||||
import { GeneralChunk, ParentChildChunk, QuestionAndAnswer } from '@/app/components/base/icons/src/vender/knowledge'
|
||||
import type { DatasourceType } from './pipeline'
|
||||
|
||||
export enum DataSourceType {
|
||||
FILE = 'upload_file',
|
||||
@@ -21,6 +24,7 @@ export enum ChunkingMode {
|
||||
text = 'text_model', // General text
|
||||
qa = 'qa_model', // General QA
|
||||
parentChild = 'hierarchical_model', // Parent-Child
|
||||
// graph = 'graph', // todo: Graph RAG
|
||||
}
|
||||
|
||||
export type MetadataInDoc = {
|
||||
@@ -30,11 +34,18 @@ export type MetadataInDoc = {
|
||||
name: string
|
||||
}
|
||||
|
||||
export type IconInfo = {
|
||||
icon: string
|
||||
icon_background?: string
|
||||
icon_type: AppIconType
|
||||
icon_url?: string
|
||||
}
|
||||
|
||||
export type DataSet = {
|
||||
id: string
|
||||
name: string
|
||||
icon: string
|
||||
icon_background: string
|
||||
indexing_status: DocumentIndexingStatus
|
||||
icon_info: IconInfo
|
||||
description: string
|
||||
permission: DatasetPermission
|
||||
data_source_type: DataSourceType
|
||||
@@ -45,6 +56,8 @@ export type DataSet = {
|
||||
app_count: number
|
||||
doc_form: ChunkingMode
|
||||
document_count: number
|
||||
total_document_count: number
|
||||
total_available_documents?: number
|
||||
word_count: number
|
||||
provider: string
|
||||
embedding_model: string
|
||||
@@ -67,6 +80,11 @@ export type DataSet = {
|
||||
}
|
||||
built_in_field_enabled: boolean
|
||||
doc_metadata?: MetadataInDoc[]
|
||||
keyword_number?: number
|
||||
pipeline_id?: string
|
||||
is_published?: boolean // Indicates if the pipeline is published
|
||||
runtime_mode: 'rag_pipeline' | 'general'
|
||||
enable_api: boolean
|
||||
}
|
||||
|
||||
export type ExternalAPIItem = {
|
||||
@@ -136,11 +154,22 @@ export type CrawlOptions = {
|
||||
|
||||
export type CrawlResultItem = {
|
||||
title: string
|
||||
markdown: string
|
||||
content: string
|
||||
description: string
|
||||
source_url: string
|
||||
}
|
||||
|
||||
export type CrawlResult = {
|
||||
data: CrawlResultItem[]
|
||||
time_consuming: number | string
|
||||
}
|
||||
|
||||
export enum CrawlStep {
|
||||
init = 'init',
|
||||
running = 'running',
|
||||
finished = 'finished',
|
||||
}
|
||||
|
||||
export type FileItem = {
|
||||
fileID: string
|
||||
file: CustomFile
|
||||
@@ -159,6 +188,14 @@ export type FetchDatasetsParams = {
|
||||
}
|
||||
}
|
||||
|
||||
export type DatasetListRequest = {
|
||||
initialPage: number
|
||||
tag_ids?: string[]
|
||||
limit: number
|
||||
include_all?: boolean
|
||||
keyword?: string
|
||||
}
|
||||
|
||||
export type DataSetListResponse = {
|
||||
data: DataSet[]
|
||||
has_more: boolean
|
||||
@@ -272,7 +309,7 @@ export const DisplayStatusList = [
|
||||
|
||||
export type DocumentDisplayStatus = typeof DisplayStatusList[number]
|
||||
|
||||
export type DataSourceInfo = {
|
||||
export type LegacyDataSourceInfo = {
|
||||
upload_file: {
|
||||
id: string
|
||||
name: string
|
||||
@@ -288,18 +325,60 @@ export type DataSourceInfo = {
|
||||
provider?: DataSourceProvider
|
||||
job_id: string
|
||||
url: string
|
||||
credential_id?: string
|
||||
}
|
||||
|
||||
export type LocalFileInfo = {
|
||||
extension: string
|
||||
mime_type: string
|
||||
name: string
|
||||
related_id: string
|
||||
size: number
|
||||
transfer_method: TransferMethod
|
||||
url: string
|
||||
}
|
||||
|
||||
export type WebsiteCrawlInfo = {
|
||||
content: string
|
||||
credential_id: string
|
||||
description: string
|
||||
source_url: string
|
||||
title: string
|
||||
}
|
||||
|
||||
export type OnlineDocumentInfo = {
|
||||
credential_id: string
|
||||
workspace_id: string
|
||||
page: {
|
||||
last_edited_time: string
|
||||
page_icon: DataSourceNotionPage['page_icon']
|
||||
page_id: string
|
||||
page_name: string
|
||||
parent_id: string
|
||||
type: string
|
||||
},
|
||||
}
|
||||
|
||||
export type OnlineDriveInfo = {
|
||||
bucket: string
|
||||
credential_id: string
|
||||
id: string
|
||||
name: string
|
||||
type: 'file' | 'folder'
|
||||
}
|
||||
|
||||
export type DataSourceInfo = LegacyDataSourceInfo | LocalFileInfo | OnlineDocumentInfo | WebsiteCrawlInfo
|
||||
|
||||
export type InitialDocumentDetail = {
|
||||
id: string
|
||||
batch: string
|
||||
position: number
|
||||
dataset_id: string
|
||||
data_source_type: DataSourceType
|
||||
data_source_type: DataSourceType | DatasourceType
|
||||
data_source_info: DataSourceInfo
|
||||
dataset_process_rule_id: string
|
||||
name: string
|
||||
created_from: 'api' | 'web'
|
||||
created_from: 'rag-pipeline' | 'api' | 'web'
|
||||
created_by: string
|
||||
created_at: number
|
||||
indexing_status: DocumentIndexingStatus
|
||||
@@ -313,7 +392,6 @@ export type InitialDocumentDetail = {
|
||||
export type SimpleDocumentDetail = InitialDocumentDetail & {
|
||||
enabled: boolean
|
||||
word_count: number
|
||||
is_qa: boolean // TODO waiting for backend to add this field
|
||||
error?: string | null
|
||||
archived: boolean
|
||||
updated_at: number
|
||||
@@ -338,7 +416,7 @@ export type DocumentListResponse = {
|
||||
|
||||
export type DocumentReq = {
|
||||
original_document_id?: string
|
||||
indexing_technique?: string
|
||||
indexing_technique?: IndexingType
|
||||
doc_form: ChunkingMode
|
||||
doc_language: string
|
||||
process_rule: ProcessRule
|
||||
@@ -374,6 +452,7 @@ export type DataSource = {
|
||||
export type NotionInfo = {
|
||||
workspace_id: string
|
||||
pages: DataSourceNotionPage[]
|
||||
credential_id: string
|
||||
}
|
||||
export type NotionPage = {
|
||||
page_id: string
|
||||
@@ -688,3 +767,47 @@ export type BatchImportResponse = {
|
||||
job_id: string
|
||||
job_status: string
|
||||
}
|
||||
|
||||
export const DOC_FORM_ICON_WITH_BG: Record<ChunkingMode | 'external', React.ComponentType<{ className: string }>> = {
|
||||
[ChunkingMode.text]: General,
|
||||
[ChunkingMode.qa]: Qa,
|
||||
[ChunkingMode.parentChild]: ParentChild,
|
||||
// [ChunkingMode.graph]: Graph, // todo: Graph RAG
|
||||
external: ExternalKnowledgeBase,
|
||||
}
|
||||
|
||||
export const DOC_FORM_ICON: Record<ChunkingMode.text | ChunkingMode.qa | ChunkingMode.parentChild, React.ComponentType<{ className: string }>> = {
|
||||
[ChunkingMode.text]: GeneralChunk,
|
||||
[ChunkingMode.qa]: QuestionAndAnswer,
|
||||
[ChunkingMode.parentChild]: ParentChildChunk,
|
||||
}
|
||||
|
||||
export const DOC_FORM_TEXT: Record<ChunkingMode, string> = {
|
||||
[ChunkingMode.text]: 'general',
|
||||
[ChunkingMode.qa]: 'qa',
|
||||
[ChunkingMode.parentChild]: 'parentChild',
|
||||
// [ChunkingMode.graph]: 'graph', // todo: Graph RAG
|
||||
}
|
||||
|
||||
export type CreateDatasetReq = {
|
||||
yaml_content?: string
|
||||
}
|
||||
|
||||
export type CreateDatasetResponse = {
|
||||
id: string
|
||||
name: string
|
||||
description: string
|
||||
permission: DatasetPermission
|
||||
indexing_technique: IndexingType
|
||||
created_by: string
|
||||
created_at: number
|
||||
updated_by: string
|
||||
updated_at: number
|
||||
pipeline_id: string
|
||||
dataset_id: string
|
||||
}
|
||||
|
||||
export type IndexingStatusBatchRequest = {
|
||||
datasetId: string
|
||||
batchId: string
|
||||
}
|
||||
|
||||
@@ -285,8 +285,13 @@ export type WorkflowRunDetailResponse = {
|
||||
viewport?: Viewport
|
||||
}
|
||||
inputs: string
|
||||
inputs_truncated: boolean
|
||||
status: 'running' | 'succeeded' | 'failed' | 'stopped'
|
||||
outputs?: string
|
||||
outputs_truncated: boolean
|
||||
outputs_full_content?: {
|
||||
download_url: string
|
||||
}
|
||||
error?: string
|
||||
elapsed_time?: number
|
||||
total_tokens?: number
|
||||
|
||||
301
web/models/pipeline.ts
Normal file
301
web/models/pipeline.ts
Normal file
@@ -0,0 +1,301 @@
|
||||
import type { Edge, EnvironmentVariable, Node, SupportUploadFileTypes } from '@/app/components/workflow/types'
|
||||
import type { DSLImportMode, DSLImportStatus } from './app'
|
||||
import type { ChunkingMode, DatasetPermission, DocumentIndexingStatus, FileIndexingEstimateResponse, IconInfo } from './datasets'
|
||||
import type { Dependency } from '@/app/components/plugins/types'
|
||||
import type { AppIconSelection } from '@/app/components/base/app-icon-picker'
|
||||
import type { Viewport } from 'reactflow'
|
||||
import type { TransferMethod } from '@/types/app'
|
||||
import { BaseFieldType } from '@/app/components/base/form/form-scenarios/base/types'
|
||||
import type { NodeRunResult } from '@/types/workflow'
|
||||
|
||||
export enum DatasourceType {
|
||||
localFile = 'local_file',
|
||||
onlineDocument = 'online_document',
|
||||
websiteCrawl = 'website_crawl',
|
||||
onlineDrive = 'online_drive',
|
||||
}
|
||||
|
||||
export type PipelineTemplateListParams = {
|
||||
type: 'built-in' | 'customized'
|
||||
}
|
||||
|
||||
export type PipelineTemplate = {
|
||||
id: string
|
||||
name: string
|
||||
icon: IconInfo
|
||||
description: string
|
||||
position: number
|
||||
chunk_structure: ChunkingMode
|
||||
}
|
||||
|
||||
export type PipelineTemplateListResponse = {
|
||||
pipeline_templates: PipelineTemplate[]
|
||||
}
|
||||
|
||||
export type PipelineTemplateByIdRequest = {
|
||||
template_id: string
|
||||
type: 'built-in' | 'customized'
|
||||
}
|
||||
|
||||
export type PipelineTemplateByIdResponse = {
|
||||
id: string
|
||||
name: string
|
||||
icon_info: IconInfo
|
||||
description: string
|
||||
chunk_structure: ChunkingMode
|
||||
export_data: string // DSL content
|
||||
graph: {
|
||||
nodes: Node[]
|
||||
edges: Edge[]
|
||||
viewport: Viewport
|
||||
}
|
||||
created_by: string
|
||||
}
|
||||
|
||||
export type CreateFormData = {
|
||||
name: string
|
||||
appIcon: AppIconSelection
|
||||
description: string
|
||||
permission: DatasetPermission
|
||||
selectedMemberIDs: string[]
|
||||
}
|
||||
|
||||
export type UpdateTemplateInfoRequest = {
|
||||
template_id: string
|
||||
name: string
|
||||
icon_info: IconInfo
|
||||
description: string
|
||||
}
|
||||
|
||||
export type UpdateTemplateInfoResponse = {
|
||||
pipeline_id: string
|
||||
name: string
|
||||
icon: IconInfo
|
||||
description: string
|
||||
position: number
|
||||
}
|
||||
|
||||
export type DeleteTemplateResponse = {
|
||||
code: number
|
||||
}
|
||||
|
||||
export type ExportTemplateDSLResponse = {
|
||||
data: string
|
||||
}
|
||||
|
||||
export type ImportPipelineDSLRequest = {
|
||||
mode: DSLImportMode
|
||||
yaml_content?: string
|
||||
yaml_url?: string
|
||||
pipeline_id?: string
|
||||
}
|
||||
|
||||
export type ImportPipelineDSLResponse = {
|
||||
id: string
|
||||
status: DSLImportStatus
|
||||
pipeline_id: string
|
||||
dataset_id: string
|
||||
current_dsl_version: string
|
||||
imported_dsl_version: string
|
||||
}
|
||||
|
||||
export type ImportPipelineDSLConfirmResponse = {
|
||||
status: DSLImportStatus
|
||||
pipeline_id: string
|
||||
dataset_id: string
|
||||
current_dsl_version: string
|
||||
imported_dsl_version: string
|
||||
error: string
|
||||
}
|
||||
|
||||
export type PipelineCheckDependenciesResponse = {
|
||||
leaked_dependencies: Dependency[]
|
||||
}
|
||||
|
||||
export enum PipelineInputVarType {
|
||||
textInput = 'text-input',
|
||||
paragraph = 'paragraph',
|
||||
select = 'select',
|
||||
number = 'number',
|
||||
singleFile = 'file',
|
||||
multiFiles = 'file-list',
|
||||
checkbox = 'checkbox',
|
||||
}
|
||||
|
||||
export const VAR_TYPE_MAP: Record<PipelineInputVarType, BaseFieldType> = {
|
||||
[PipelineInputVarType.textInput]: BaseFieldType.textInput,
|
||||
[PipelineInputVarType.paragraph]: BaseFieldType.paragraph,
|
||||
[PipelineInputVarType.select]: BaseFieldType.select,
|
||||
[PipelineInputVarType.singleFile]: BaseFieldType.file,
|
||||
[PipelineInputVarType.multiFiles]: BaseFieldType.fileList,
|
||||
[PipelineInputVarType.number]: BaseFieldType.numberInput,
|
||||
[PipelineInputVarType.checkbox]: BaseFieldType.checkbox,
|
||||
}
|
||||
|
||||
export type RAGPipelineVariable = {
|
||||
belong_to_node_id: string // indicates belong to which node or 'shared'
|
||||
type: PipelineInputVarType
|
||||
label: string
|
||||
variable: string
|
||||
max_length?: number
|
||||
default_value?: string
|
||||
placeholder?: string
|
||||
unit?: string
|
||||
required: boolean
|
||||
tooltips?: string
|
||||
options?: string[]
|
||||
allowed_file_upload_methods?: TransferMethod[]
|
||||
allowed_file_types?: SupportUploadFileTypes[]
|
||||
allowed_file_extensions?: string[]
|
||||
}
|
||||
|
||||
export type InputVar = Omit<RAGPipelineVariable, 'belong_to_node_id'>
|
||||
export type RAGPipelineVariables = RAGPipelineVariable[]
|
||||
|
||||
export type PipelineProcessingParamsRequest = {
|
||||
pipeline_id: string
|
||||
node_id: string
|
||||
}
|
||||
|
||||
export type PipelineProcessingParamsResponse = {
|
||||
variables: RAGPipelineVariables
|
||||
}
|
||||
|
||||
export type PipelinePreProcessingParamsRequest = {
|
||||
pipeline_id: string
|
||||
node_id: string
|
||||
}
|
||||
|
||||
export type PipelinePreProcessingParamsResponse = {
|
||||
variables: RAGPipelineVariables
|
||||
}
|
||||
|
||||
export type PublishedPipelineInfoResponse = {
|
||||
id: string
|
||||
graph: {
|
||||
nodes: Node[]
|
||||
edges: Edge[]
|
||||
viewport: Viewport
|
||||
}
|
||||
created_at: number
|
||||
created_by: {
|
||||
id: string
|
||||
name: string
|
||||
email: string
|
||||
}
|
||||
hash: string
|
||||
updated_at: number
|
||||
updated_by: {
|
||||
id: string
|
||||
name: string
|
||||
email: string
|
||||
},
|
||||
environment_variables?: EnvironmentVariable[]
|
||||
rag_pipeline_variables?: RAGPipelineVariables
|
||||
version: string
|
||||
marked_name: string
|
||||
marked_comment: string
|
||||
}
|
||||
|
||||
export type PublishedPipelineRunRequest = {
|
||||
pipeline_id: string
|
||||
inputs: Record<string, any>
|
||||
start_node_id: string
|
||||
datasource_type: DatasourceType
|
||||
datasource_info_list: Array<Record<string, any>>
|
||||
original_document_id?: string
|
||||
is_preview: boolean
|
||||
}
|
||||
|
||||
export type PublishedPipelineRunPreviewResponse = {
|
||||
task_iod: string
|
||||
workflow_run_id: string
|
||||
data: {
|
||||
id: string
|
||||
status: string
|
||||
created_at: number
|
||||
elapsed_time: number
|
||||
error: string
|
||||
finished_at: number
|
||||
outputs: FileIndexingEstimateResponse
|
||||
total_steps: number
|
||||
total_tokens: number
|
||||
workflow_id: string
|
||||
}
|
||||
}
|
||||
|
||||
export type PublishedPipelineRunResponse = {
|
||||
batch: string
|
||||
dataset: {
|
||||
chunk_structure: ChunkingMode
|
||||
description: string
|
||||
id: string
|
||||
name: string
|
||||
}
|
||||
documents: InitialDocumentDetail[]
|
||||
}
|
||||
|
||||
export type InitialDocumentDetail = {
|
||||
data_source_info: Record<string, any>
|
||||
data_source_type: DatasourceType
|
||||
enable: boolean
|
||||
error: string
|
||||
id: string
|
||||
indexing_status: DocumentIndexingStatus
|
||||
name: string
|
||||
position: number
|
||||
}
|
||||
|
||||
export type PipelineExecutionLogRequest = {
|
||||
dataset_id: string
|
||||
document_id: string
|
||||
}
|
||||
|
||||
export type PipelineExecutionLogResponse = {
|
||||
datasource_info: Record<string, any>
|
||||
datasource_type: DatasourceType
|
||||
input_data: Record<string, any>
|
||||
datasource_node_id: string
|
||||
}
|
||||
|
||||
export type OnlineDocumentPreviewRequest = {
|
||||
workspaceID: string
|
||||
pageID: string
|
||||
pageType: string
|
||||
pipelineId: string
|
||||
datasourceNodeId: string
|
||||
credentialId: string
|
||||
}
|
||||
|
||||
export type OnlineDocumentPreviewResponse = {
|
||||
content: string
|
||||
}
|
||||
|
||||
export type ConversionResponse = {
|
||||
pipeline_id: string
|
||||
dataset_id: string
|
||||
status: 'success' | 'failed'
|
||||
}
|
||||
|
||||
export enum OnlineDriveFileType {
|
||||
file = 'file',
|
||||
folder = 'folder',
|
||||
bucket = 'bucket',
|
||||
}
|
||||
|
||||
export type OnlineDriveFile = {
|
||||
id: string
|
||||
name: string
|
||||
size?: number
|
||||
type: OnlineDriveFileType
|
||||
}
|
||||
|
||||
export type DatasourceNodeSingleRunRequest = {
|
||||
pipeline_id: string
|
||||
start_node_id: string
|
||||
start_node_title: string
|
||||
datasource_type: DatasourceType
|
||||
datasource_info: Record<string, any>
|
||||
}
|
||||
|
||||
export type DatasourceNodeSingleRunResponse = NodeRunResult
|
||||
Reference in New Issue
Block a user