diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md deleted file mode 100644 index cd60e2a..0000000 --- a/.github/ISSUE_TEMPLATE/bug_report.md +++ /dev/null @@ -1,32 +0,0 @@ ---- -name: Bug report -about: Create a report to help us improve -title: '' -labels: '' -assignees: '' - ---- - -Please describe your issue **in English** - -*Note: Small LLMs cannot perform well at prompt following, and are prone to hallucinations. Please make sure your LLM is cutting-edge, preferably a reasoning model, e.g. OpenAI o-series, DeepSeek R1, Claude 3.7 Sonnet etc.* - -**Describe the bug** -A clear and concise description of what the bug is. - -**To Reproduce** -Steps to reproduce the behavior: - -**Expected behavior** -A clear and concise description of what you expected to happen. - -**Screenshots** -If applicable, add screenshots to help explain your problem. - -**Environment (please complete the following information):** - - OS: [e.g. MacOS] - - pip dependencies - - Version [e.g. 0.0.1] - -**Additional context** -Add any other context about the problem here. diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md deleted file mode 100644 index ec75cea..0000000 --- a/.github/ISSUE_TEMPLATE/feature_request.md +++ /dev/null @@ -1,22 +0,0 @@ ---- -name: Feature request -about: Suggest an idea for this project -title: '' -labels: '' -assignees: '' - ---- - -Please describe your suggestion **in English**. - -**Is your feature request related to a problem? Please describe.** -A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] - -**Describe the solution you'd like** -A clear and concise description of what you want to happen. - -**Describe alternatives you've considered** -A clear and concise description of any alternative solutions or features you've considered. - -**Additional context** -Add any other context or screenshots about the feature request here. diff --git a/.github/mergify.yml b/.github/mergify.yml deleted file mode 100644 index d5f7cc8..0000000 --- a/.github/mergify.yml +++ /dev/null @@ -1,34 +0,0 @@ -misc: - - branch: &BRANCHES - # In this pull request, the changes are based on the main branch - - &MASTER_BRANCH base=main - - - name: Label bug fix PRs - conditions: - # branch condition: in this pull request, the changes are based on any branch referenced by BRANCHES - - or: *BRANCHES - - 'title~=^fix:' - actions: - label: - add: - - kind/bug - - - name: Label feature PRs - conditions: - # branch condition: in this pull request, the changes are based on any branch referenced by BRANCHES - - or: *BRANCHES - - 'title~=^feat:' - actions: - label: - add: - - kind/feature - - - name: Label enhancement PRs - conditions: - # branch condition: in this pull request, the changes are based on any branch referenced by BRANCHES - - or: *BRANCHES - - 'title~=^enhance:' - actions: - label: - add: - - kind/enhancement diff --git a/.github/workflows/cd-docs.yml b/.github/workflows/cd-docs.yml deleted file mode 100644 index 925c0d3..0000000 --- a/.github/workflows/cd-docs.yml +++ /dev/null @@ -1,20 +0,0 @@ -name: "Run Docs CD with UV" - -on: - push: - branches: - - "main" - - "master" - paths: - - 'docs/**' - - 'mkdocs.yml' - - '.github/workflows/docs.yml' - -jobs: - build-deploy-docs: - if: github.repository == 'zilliztech/deep-searcher' - uses: ./.github/workflows/docs.yml - with: - deploy: true - permissions: - contents: write diff --git a/.github/workflows/ci-docs.yml b/.github/workflows/ci-docs.yml deleted file mode 100644 index 8676ffa..0000000 --- a/.github/workflows/ci-docs.yml +++ /dev/null @@ -1,24 +0,0 @@ -name: "Run Docs CI with UV" - -on: - pull_request: - types: [opened, reopened, synchronize] - paths: - - 'docs/**' - - 'mkdocs.yml' - - '.github/workflows/docs.yml' - push: - branches: - - "**" - - "!gh-pages" - paths: - - 'docs/**' - - 'mkdocs.yml' - - '.github/workflows/docs.yml' - -jobs: - build-docs: - if: ${{ github.event_name == 'push' || (github.event.pull_request.head.repo.full_name != 'zilliztech/deep-searcher') }} - uses: ./.github/workflows/docs.yml - with: - deploy: false diff --git a/.github/workflows/docs.yml b/.github/workflows/docs.yml deleted file mode 100644 index 9d6a9f8..0000000 --- a/.github/workflows/docs.yml +++ /dev/null @@ -1,27 +0,0 @@ -on: - workflow_call: - inputs: - deploy: - type: boolean - description: "If true, the docs will be deployed." - default: false - -jobs: - run-docs: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v5 - - name: Install dependencies - run: | - uv sync --all-extras --dev - source .venv/bin/activate - - - name: Build docs - run: uv run mkdocs build --verbose --clean - - - name: Build and push docs - if: inputs.deploy - run: uv run mkdocs gh-deploy --force diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml deleted file mode 100644 index ab00425..0000000 --- a/.github/workflows/release.yml +++ /dev/null @@ -1,37 +0,0 @@ -#git tag v0.x.x # Must be same as the version in pyproject.toml -#git push --tags - -name: Publish Python Package to PyPI - -on: - push: - tags: - - "v*" - -jobs: - publish: - name: Publish to PyPI - runs-on: ubuntu-latest - environment: pypi - - permissions: - id-token: write - contents: read - - steps: - - name: Checkout code - uses: actions/checkout@v4 - - - name: Set up Python - uses: actions/setup-python@v5 - with: - python-version: "3.10" - - - name: Install build tools - run: python -m pip install build - - - name: Build package - run: python -m build - - - name: Publish to PyPI - uses: pypa/gh-action-pypi-publish@release/v1 \ No newline at end of file diff --git a/.github/workflows/ruff.yml b/.github/workflows/ruff.yml deleted file mode 100644 index 1a4541e..0000000 --- a/.github/workflows/ruff.yml +++ /dev/null @@ -1,25 +0,0 @@ -name: Ruff -on: - push: - branches: [ main, master ] - pull_request: -jobs: - build: - runs-on: ubuntu-latest - steps: - - uses: actions/checkout@v4 - - - name: Install uv - uses: astral-sh/setup-uv@v5 - - name: Install the project - run: | - uv sync --all-extras --dev - source .venv/bin/activate - - - name: Run Ruff - run: | - uv run ruff format --diff - uv run ruff check - - # - name: Run tests - # run: uv run pytest tests \ No newline at end of file diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py index c1623d4..b11c81c 100644 --- a/deepsearcher/agent/deep_search.py +++ b/deepsearcher/agent/deep_search.py @@ -5,6 +5,7 @@ from deepsearcher.utils import log from deepsearcher.vector_db import RetrievalResult from deepsearcher.vector_db.base import BaseVectorDB, deduplicate + COLLECTION_ROUTE_PROMPT = """ I provide you with collection_name(s) and corresponding collection_description(s). Please select the collection names that may be related to the question and return a python list of str. @@ -17,6 +18,7 @@ When you return, you can ONLY return a json convertable python list of str, WITH Your selected collection name list is: """ + SUB_QUERY_PROMPT = """ To answer this question more comprehensively, please break down the original question into few numbers of sub-questions (more if necessary). If this is a very simple question and no decomposition is necessary, then keep the only one original question. @@ -41,6 +43,7 @@ Example output: Provide your response in a python code list of str format: """ + RERANK_PROMPT = """ Based on the query questions and the retrieved chunks, determine whether each chunk is helpful in answering any of the query questions. For each chunk, you must return "YES" or "NO" without any other information. @@ -107,7 +110,7 @@ class DeepSearch(BaseAgent): embedding_model: BaseEmbedding, vector_db: BaseVectorDB, max_iter: int = 3, - route_collection: bool = True, + route_collection: bool = False, text_window_splitter: bool = True, **kwargs, ): @@ -398,5 +401,5 @@ class DeepSearch(BaseAgent): def _format_chunk_texts(self, chunk_texts: list[str]) -> str: chunk_str = "" for i, chunk in enumerate(chunk_texts): - chunk_str += f"""\n{chunk}\n\n""" + chunk_str += f"""\n{chunk}\n\n""" return chunk_str diff --git a/deepsearcher/backend/templates/index.html b/deepsearcher/backend/templates/index.html index 465eaad..902801f 100644 --- a/deepsearcher/backend/templates/index.html +++ b/deepsearcher/backend/templates/index.html @@ -297,7 +297,6 @@

查询结果:

-
@@ -515,7 +514,6 @@ if (response.ok) { showStatus('queryStatus', '查询完成', 'success'); document.getElementById('resultText').textContent = data.result; - document.getElementById('tokenInfo').textContent = `消耗Token数: ${data.consume_token}`; showResult(); // 显示进度日志 diff --git a/deepsearcher/configuration.py b/deepsearcher/configuration.py index 09b741c..6a6f44c 100644 --- a/deepsearcher/configuration.py +++ b/deepsearcher/configuration.py @@ -210,6 +210,6 @@ def init_config(config: Configuration): embedding_model=embedding_model, vector_db=vector_db, max_iter=config.query_settings["max_iter"], - route_collection=True, + route_collection=False, text_window_splitter=True, ) diff --git a/deepsearcher/llm/openai_llm.py b/deepsearcher/llm/openai_llm.py index 93336b0..e66231f 100644 --- a/deepsearcher/llm/openai_llm.py +++ b/deepsearcher/llm/openai_llm.py @@ -32,7 +32,7 @@ class OpenAILLM(BaseLLM): base_url = kwargs.pop("base_url") self.client = OpenAI(api_key=api_key, base_url=base_url, **kwargs) - def chat(self, messages: list[dict], stream_callback = None) -> str: + def chat(self, messages: list[dict]) -> str: """ Send a chat message to the OpenAI model and get a response. @@ -47,7 +47,7 @@ class OpenAILLM(BaseLLM): with self.client.chat.completions.create( model=self.model, messages=messages, - stream=True, + stream=True ) as stream: content = "" reasoning_content = "" @@ -59,12 +59,8 @@ class OpenAILLM(BaseLLM): if hasattr(delta, 'reasoning_content') and delta.reasoning_content is not None: print(delta.reasoning_content, end='', flush=True) reasoning_content += delta.reasoning_content - if stream_callback: - stream_callback(delta.reasoning_content) if hasattr(delta, 'content') and delta.content is not None: print(delta.content, end="", flush=True) content += delta.content - if stream_callback: - stream_callback(delta.content) print("\n") return content diff --git a/deepsearcher/online_query.py b/deepsearcher/online_query.py index eb0a321..e5c3533 100644 --- a/deepsearcher/online_query.py +++ b/deepsearcher/online_query.py @@ -42,7 +42,7 @@ def retrieve( - A list of strings representing consumed tokens """ default_searcher = configuration.default_searcher - retrieved_results, consume_tokens, metadata = default_searcher.retrieve( + retrieved_results, metadata = default_searcher.retrieve( original_query, max_iter=max_iter ) - return retrieved_results, consume_tokens + return retrieved_results diff --git a/main.py b/main.py index 69b7cd6..d0ea344 100644 --- a/main.py +++ b/main.py @@ -38,55 +38,15 @@ async def read_root(): """ Serve the main HTML page. """ - # 获取当前文件所在目录 current_dir = os.path.dirname(os.path.abspath(__file__)) - # 构建模板文件路径 - 修复路径问题 template_path = os.path.join(current_dir, "deepsearcher", "backend", "templates", "index.html") - # 读取HTML文件内容 try: with open(template_path, encoding="utf-8") as file: html_content = file.read() return HTMLResponse(content=html_content, status_code=200) except FileNotFoundError: - # 如果找不到文件,提供一个简单的默认页面 - default_html = f""" - - - - DeepSearcher - - - - -
-

DeepSearcher

-
-

欢迎使用 DeepSearcher 智能搜索系统!

-

系统正在运行,但未找到前端模板文件。

-

请确认文件是否存在: {template_path}

-
-
-

API 接口

-

您仍然可以通过以下 API 接口使用系统:

- -

有关 API 使用详情,请查看 API 文档

-
-
- - - """ - return HTMLResponse(content=default_html, status_code=200) + raise HTTPException(status_code=404, detail="Template file not found") @app.post("/set-provider-config/") @@ -154,12 +114,11 @@ def load_files( HTTPException: If loading files fails. """ try: - # 修复batch_size为None时的问题 load_from_local_files( paths_or_directory=paths, collection_name=collection_name, collection_description=collection_description, - batch_size=batch_size if batch_size is not None else 8, # 提供默认值 + batch_size=batch_size if batch_size is not None else 8, ) return {"message": "Files loaded successfully."} except Exception as e: @@ -205,12 +164,11 @@ def load_website( HTTPException: If loading website content fails. """ try: - # 修复batch_size为None时的问题 load_from_website( urls=urls, collection_name=collection_name, collection_description=collection_description, - batch_size=batch_size if batch_size is not None else 256, # 提供默认值 + batch_size=batch_size if batch_size is not None else 8, ) return {"message": "Website loaded successfully."} except Exception as e: @@ -249,7 +207,7 @@ def perform_query( from deepsearcher.utils.log import clear_progress_messages clear_progress_messages() - result_text, _, consume_token = query(original_query, max_iter) + result_text, _ = query(original_query, max_iter) # 获取进度消息 from deepsearcher.utils.log import get_progress_messages @@ -257,7 +215,6 @@ def perform_query( return { "result": result_text, - "consume_token": consume_token, "progress_messages": progress_messages } except Exception as e: