diff --git a/deepsearcher/config.yaml b/deepsearcher/config.yaml index 292a7de..22201a1 100644 --- a/deepsearcher/config.yaml +++ b/deepsearcher/config.yaml @@ -2,16 +2,16 @@ provide_settings: llm: provider: "OpenAILLM" config: - model: "Qwen/Qwen3-8B-FP8" - api_key: "empty" - base_url: "http://localhost:8000/v1" + model: "Qwen/Qwen3-30B-A3B" + api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt" + base_url: "https://api.siliconflow.cn/v1" embedding: provider: "OpenAIEmbedding" - config: + config: model: "Qwen/Qwen3-Embedding-0.6B" - api_key: "empty" - base_url: "http://localhost:8001/v1" + api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt" + base_url: "https://api.siliconflow.cn/v1" dimension: 1024 dim_change: false diff --git a/deepsearcher/embedding/base.py b/deepsearcher/embedding/base.py index d9412ea..4053418 100644 --- a/deepsearcher/embedding/base.py +++ b/deepsearcher/embedding/base.py @@ -1,5 +1,3 @@ -from typing import List - from tqdm import tqdm from deepsearcher.loader.splitter import Chunk @@ -14,7 +12,7 @@ class BaseEmbedding: for the dimensionality of the embeddings. """ - def embed_query(self, text: str) -> List[float]: + def embed_query(self, text: str) -> list[float]: """ Embed a single query text. @@ -26,7 +24,7 @@ class BaseEmbedding: """ pass - def embed_documents(self, texts: List[str]) -> List[List[float]]: + def embed_documents(self, texts: list[str]) -> list[list[float]]: """ Embed a list of document texts. @@ -41,7 +39,7 @@ class BaseEmbedding: """ return [self.embed_query(text) for text in texts] - def embed_chunks(self, chunks: List[Chunk], batch_size: int = 256) -> List[Chunk]: + def embed_chunks(self, chunks: list[Chunk], batch_size: int = 256) -> list[Chunk]: """ Embed a list of Chunk objects. diff --git a/deepsearcher/embedding/openai_embedding.py b/deepsearcher/embedding/openai_embedding.py index 88d37e5..f33df42 100644 --- a/deepsearcher/embedding/openai_embedding.py +++ b/deepsearcher/embedding/openai_embedding.py @@ -1,5 +1,4 @@ import os -from typing import List from openai import OpenAI from openai._types import NOT_GIVEN @@ -45,20 +44,20 @@ class OpenAIEmbedding(BaseEmbedding): model = kwargs.pop("model_name") if "dimension" in kwargs: - dimension = kwargs.pop("dimension") + dimension = kwargs.pop("dimension") else: dimension = NOT_GIVEN - + if "dim_change" in kwargs: dim_change = kwargs.pop("dim_change") - + self.dim = dimension self.dim_change = dim_change self.model = model self.client = OpenAI(api_key=api_key, base_url=base_url, **kwargs) - def embed_query(self, text: str) -> List[float]: + def embed_query(self, text: str) -> list[float]: """ Embed a single query text. @@ -75,7 +74,7 @@ class OpenAIEmbedding(BaseEmbedding): return response.data[0].embedding - def embed_documents(self, texts: List[str]) -> List[List[float]]: + def embed_documents(self, texts: list[str]) -> list[list[float]]: """ Embed a list of document texts. diff --git a/deepsearcher/llm/openai_llm.py b/deepsearcher/llm/openai_llm.py index fddfeac..ea94ca5 100644 --- a/deepsearcher/llm/openai_llm.py +++ b/deepsearcher/llm/openai_llm.py @@ -59,7 +59,9 @@ class OpenAILLM(BaseLLM): for chunk in completion: stream_response = chunk.choices[0].delta.content if stream_response: + print(stream_response, end="", flush=True) response += stream_response if stream_callback: stream_callback(stream_response) + print("\n") return response diff --git a/deepsearcher/offline_loading.py b/deepsearcher/offline_loading.py index 9e2a2a2..01598b8 100644 --- a/deepsearcher/offline_loading.py +++ b/deepsearcher/offline_loading.py @@ -1,6 +1,5 @@ import hashlib import os -from typing import List, Union from tqdm import tqdm @@ -10,7 +9,7 @@ from deepsearcher.loader.splitter import split_docs_to_chunks def load_from_local_files( - paths_or_directory: Union[str, List[str]], + paths_or_directory: str | list[str], collection_name: str = None, collection_description: str = None, force_new_collection: bool = False, @@ -50,7 +49,7 @@ def load_from_local_files( description=collection_description, force_new_collection=force_new_collection, ) - + # 如果force_rebuild为True,则强制重建集合 if force_rebuild: vector_db.init_collection( @@ -59,7 +58,7 @@ def load_from_local_files( description=collection_description, force_new_collection=True, ) - + if isinstance(paths_or_directory, str): paths_or_directory = [paths_or_directory] all_docs = [] @@ -92,7 +91,7 @@ def load_from_local_files( def load_from_website( - urls: Union[str, List[str]], + urls: str | list[str], collection_name: str = None, collection_description: str = None, force_new_collection: bool = False, diff --git a/deepsearcher/online_query.py b/deepsearcher/online_query.py index dffd3ed..92b1acb 100644 --- a/deepsearcher/online_query.py +++ b/deepsearcher/online_query.py @@ -1,11 +1,9 @@ -from typing import List, Tuple - # from deepsearcher.configuration import vector_db, embedding_model, llm from deepsearcher import configuration from deepsearcher.vector_db.base import RetrievalResult -def query(original_query: str, max_iter: int = 3) -> Tuple[str, List[RetrievalResult]]: +def query(original_query: str, max_iter: int = 3) -> tuple[str, list[RetrievalResult]]: """ Query the knowledge base with a question and get an answer. @@ -27,7 +25,7 @@ def query(original_query: str, max_iter: int = 3) -> Tuple[str, List[RetrievalRe def retrieve( original_query: str, max_iter: int = 3 -) -> Tuple[List[RetrievalResult], List[str]]: +) -> tuple[list[RetrievalResult], list[str]]: """ Retrieve relevant information from the knowledge base without generating an answer. @@ -48,47 +46,3 @@ def retrieve( original_query, max_iter=max_iter ) return retrieved_results, [] - - -def naive_retrieve(query: str, collection: str = None, top_k=10) -> List[RetrievalResult]: - """ - Perform a simple retrieval from the knowledge base using the naive RAG approach. - - This function uses the naive RAG agent to retrieve information from the knowledge base - without any advanced techniques like iterative refinement. - - Args: - query: The question or query to search for. - collection: The name of the collection to search in. If None, searches in all collections. - top_k: The maximum number of results to return. - - Returns: - A list of retrieval results. - """ - naive_rag = configuration.naive_rag - all_retrieved_results, _ = naive_rag.retrieve(query) - return all_retrieved_results - - -def naive_rag_query( - query: str, collection: str = None, top_k=10 -) -> Tuple[str, List[RetrievalResult]]: - """ - Query the knowledge base using the naive RAG approach and get an answer. - - This function uses the naive RAG agent to query the knowledge base and generate - an answer based on the retrieved information, without any advanced techniques. - - Args: - query: The question or query to search for. - collection: The name of the collection to search in. If None, searches in all collections. - top_k: The maximum number of results to consider. - - Returns: - A tuple containing: - - The generated answer as a string - - A list of retrieval results that were used to generate the answer - """ - naive_rag = configuration.naive_rag - answer, retrieved_results = naive_rag.query(query) - return answer, retrieved_results diff --git a/main.py b/main.py index 78916c4..69b7cd6 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,4 @@ import argparse -from typing import Dict, List, Union import uvicorn from fastapi import Body, FastAPI, HTTPException, Query @@ -31,7 +30,7 @@ class ProviderConfigRequest(BaseModel): feature: str provider: str - config: Dict + config: dict @app.get("/", response_class=HTMLResponse) @@ -43,15 +42,15 @@ async def read_root(): current_dir = os.path.dirname(os.path.abspath(__file__)) # 构建模板文件路径 - 修复路径问题 template_path = os.path.join(current_dir, "deepsearcher", "backend", "templates", "index.html") - + # 读取HTML文件内容 try: - with open(template_path, "r", encoding="utf-8") as file: + with open(template_path, encoding="utf-8") as file: html_content = file.read() return HTMLResponse(content=html_content, status_code=200) except FileNotFoundError: # 如果找不到文件,提供一个简单的默认页面 - default_html = """ + default_html = f"""
@@ -71,7 +70,7 @@ async def read_root():欢迎使用 DeepSearcher 智能搜索系统!
系统正在运行,但未找到前端模板文件。
-请确认文件是否存在: {}
+请确认文件是否存在: {template_path}