From 3d2ee4512221871054b21e1cbf6187067af3938a Mon Sep 17 00:00:00 2001
From: tanxing <eternaldwellers@gmail.com>
Date: Tue, 12 Aug 2025 00:37:50 +0800
Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E5=A2=9Ereference=E7=94=9F=E6=88=90?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deepsearcher/agent/deep_search.py | 16 ++++++++++++----
 deepsearcher/config.yaml          |  2 +-
 deepsearcher/offline_loading.py   |  6 ------
 3 files changed, 13 insertions(+), 11 deletions(-)
diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py
index 9b6e30c..c018827 100644
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@@ -83,7 +83,16 @@ Please generate a long, specific and detailed answer or report based on the prev
 If the chunks are not enough to answer the query or additional information is needed to enhance the content, you should answer with your own knowledge.
 In this case, mark the part(s) that generated by your own with <unref>your knowledge here</unref>
 (Don't place <unref></unref> part(s) individually into one paragraph, but insert it the proper place of the report)
-Plus, you should give references in the report where you quote from the chunks using markdown links, and give a list of references at the end of the report.
+Plus, you should quote chunk references and give a list of references at the end of the report.
+
+Here is an example:
+<EXAMPLE>
+Quote example (an upper quote anchor, strictly apply the format below):
+XGBoost is a powerful ensemble learning method[<sup>[2]</sup>](#2)
+
+Reference list example (should be exact the same as the <reference><reference>):
+<div id="2"><a href="MachineLearning.pdf">[2] MachineLearning.pdf</a></div>
+</EXAMPLE>
 
 Original Query: {original_query}
 
@@ -111,7 +120,7 @@ class DeepSearch(BaseAgent):
         llm: BaseLLM,
         embedding_model: BaseEmbedding,
         vector_db: BaseVectorDB,
-        max_iter: int = 3,
+        max_iter: int,
         route_collection: bool = False,
         text_window_splitter: bool = True,
         **kwargs,
@@ -338,7 +347,6 @@ class DeepSearch(BaseAgent):
         for it in range(max_iter):
             log.color_print(f">> Iteration: {it + 1}\n")
 
-
             # Execute all search tasks sequentially
             for query in sub_queries:
                 result = self._search_chunks_from_vectordb(query)
@@ -352,7 +360,7 @@ class DeepSearch(BaseAgent):
                     )
             # search_res_from_internet = deduplicate_results(search_res_from_internet)
             # all_search_res.extend(search_res_from_vectordb + search_res_from_internet)
-            if it == max_iter - 1:
+            if it + 1 >= max_iter:
                 log.color_print("</think> Exceeded maximum iterations. Exiting.  ")
                 break
             ### REFLECTION & GET MORE SUB QUERIES ###
diff --git a/deepsearcher/config.yaml b/deepsearcher/config.yaml
index a86cc50..c3d1c2d 100644
--- a/deepsearcher/config.yaml
+++ b/deepsearcher/config.yaml
@@ -80,7 +80,7 @@ provide_settings:
   #     port: 6333
 
 query_settings:
-  max_iter: 2
+  max_iter: 1
 
 load_settings:
   chunk_size: 2048
diff --git a/deepsearcher/offline_loading.py b/deepsearcher/offline_loading.py
index f460420..57c1506 100644
--- a/deepsearcher/offline_loading.py
+++ b/deepsearcher/offline_loading.py
@@ -42,12 +42,6 @@ def load_from_local_files(
     collection_name = collection_name.replace(" ", "_").replace("-", "_")
     embedding_model = configuration.embedding_model
     file_loader = configuration.file_loader
-    vector_db.init_collection(
-        dim=embedding_model.dimension,
-        collection=collection_name,
-        description=collection_description,
-        force_rebuild=force_rebuild,
-    )
 
     # 如果force_rebuild为True，则强制重建集合
     if force_rebuild: