mirror of
https://github.com/langgenius/dify.git
synced 2026-02-09 15:10:13 -05:00
chore: fix review issues
This commit is contained in:
@@ -21,6 +21,7 @@ from core.model_runtime.model_providers.__base.speech2text_model import Speech2T
|
||||
from core.model_runtime.model_providers.__base.text_embedding_model import TextEmbeddingModel
|
||||
from core.model_runtime.model_providers.__base.tts_model import TTSModel
|
||||
from core.provider_manager import ProviderManager
|
||||
from core.workflow.utils.generator_timeout import with_first_token_timeout
|
||||
from extensions.ext_redis import redis_client
|
||||
from models.provider import ProviderType
|
||||
from services.enterprise.plugin_manager_service import PluginCredentialType
|
||||
@@ -180,8 +181,6 @@ class ModelInstance:
|
||||
|
||||
# Apply first token timeout wrapper for streaming responses
|
||||
if stream and first_token_timeout and first_token_timeout > 0 and isinstance(result, Generator):
|
||||
from core.workflow.utils.generator_timeout import with_first_token_timeout
|
||||
|
||||
result = with_first_token_timeout(result, first_token_timeout)
|
||||
|
||||
return cast(Union[LLMResult, Generator], result)
|
||||
|
||||
@@ -43,11 +43,3 @@ class FileTypeNotSupportError(LLMNodeError):
|
||||
class UnsupportedPromptContentTypeError(LLMNodeError):
|
||||
def __init__(self, *, type_name: str):
|
||||
super().__init__(f"Prompt content type {type_name} is not supported.")
|
||||
|
||||
|
||||
class LLMFirstTokenTimeoutError(LLMNodeError):
|
||||
"""Raised when LLM request fails to receive first token within configured timeout."""
|
||||
|
||||
def __init__(self, timeout_ms: int):
|
||||
self.timeout_ms = timeout_ms
|
||||
super().__init__(f"LLM request timed out after {timeout_ms}ms without receiving first token")
|
||||
|
||||
@@ -42,13 +42,15 @@ def with_first_token_timeout(
|
||||
FirstTokenTimeoutError: If first item doesn't arrive within timeout
|
||||
"""
|
||||
start_time = time.monotonic()
|
||||
first_token_received = False
|
||||
|
||||
for item in generator:
|
||||
if not first_token_received:
|
||||
current_time = time.monotonic()
|
||||
if current_time - start_time > timeout_seconds:
|
||||
raise FirstTokenTimeoutError(int(timeout_seconds * 1000))
|
||||
first_token_received = True
|
||||
# Handle first item separately to check timeout only once
|
||||
try:
|
||||
first_item = next(generator)
|
||||
if time.monotonic() - start_time > timeout_seconds:
|
||||
raise FirstTokenTimeoutError(int(timeout_seconds * 1000))
|
||||
yield first_item
|
||||
except StopIteration:
|
||||
return
|
||||
|
||||
yield item
|
||||
# Yield remaining items without timeout checks
|
||||
yield from generator
|
||||
|
||||
@@ -9,7 +9,6 @@ import pytest
|
||||
from core.model_runtime.entities.llm_entities import LLMResultChunk, LLMResultChunkDelta
|
||||
from core.model_runtime.entities.message_entities import AssistantPromptMessage
|
||||
from core.workflow.nodes.base.entities import RetryConfig
|
||||
from core.workflow.nodes.llm.exc import LLMFirstTokenTimeoutError
|
||||
from core.workflow.utils.generator_timeout import FirstTokenTimeoutError, with_first_token_timeout
|
||||
|
||||
|
||||
@@ -87,26 +86,6 @@ class TestRetryConfigFirstTokenTimeout:
|
||||
assert restored_config.has_first_token_timeout is True
|
||||
|
||||
|
||||
class TestLLMFirstTokenTimeoutError:
|
||||
"""Test cases for LLMFirstTokenTimeoutError exception."""
|
||||
|
||||
def test_error_message_format(self):
|
||||
"""Test that error message contains timeout value in milliseconds."""
|
||||
error = LLMFirstTokenTimeoutError(timeout_ms=3000)
|
||||
|
||||
assert "3000ms" in str(error)
|
||||
assert "first token" in str(error).lower()
|
||||
|
||||
def test_inherits_from_llm_node_error(self):
|
||||
"""Test that LLMFirstTokenTimeoutError inherits from LLMNodeError."""
|
||||
from core.workflow.nodes.llm.exc import LLMNodeError
|
||||
|
||||
error = LLMFirstTokenTimeoutError(timeout_ms=3000)
|
||||
|
||||
assert isinstance(error, LLMNodeError)
|
||||
assert isinstance(error, ValueError)
|
||||
|
||||
|
||||
class TestWithFirstTokenTimeout:
|
||||
"""Test cases for with_first_token_timeout function."""
|
||||
|
||||
|
||||
Reference in New Issue
Block a user