diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py index 853a7c0..d34f75e 100644 --- a/deepsearcher/agent/deep_search.py +++ b/deepsearcher/agent/deep_search.py @@ -91,13 +91,13 @@ SUMMARY_PROMPT = """ 例子: -文内引用示例(使用markdown和html的混合语法,格式必须和例子一致): -"XGBoost是非常强大的集成学习模型[[2]](#2)" -(必须使用 "[[index]](#index)" 而不是 "[index]",注意不要遗漏"(#index)",这里的index是chunk对应的reference的id) +文内引用示例(使用markdown脚注): +"XGBoost是非常强大的集成学习模型[^2]" +(必须使用 "[^index]",这里的index是对应的的id) -文末引用示例 (需要href与前文参考中的一致,不需要对每个chunk分配一个引用,而是每一个referecen共用一个引用): -
{original_query} \n") all_search_results = [] @@ -342,7 +341,7 @@ class DeepSearch(BaseAgent): log.color_print(f" Break down the original query into new sub queries: {sub_queries} ") all_sub_queries.extend(sub_queries) - for it in range(max_iter): + for it in range(self.max_iter): log.color_print(f">> Iteration: {it + 1}\n") # Execute all search tasks sequentially @@ -358,7 +357,7 @@ class DeepSearch(BaseAgent): ) # search_res_from_internet = deduplicate_results(search_res_from_internet) # all_search_res.extend(search_res_from_vectordb + search_res_from_internet) - if it + 1 >= max_iter: + if it + 1 >= self.max_iter: log.color_print(" Exceeded maximum iterations. Exiting. ") break ### REFLECTION & GET MORE SUB QUERIES ### @@ -421,10 +420,11 @@ class DeepSearch(BaseAgent): for i, reference in enumerate(references): formated = f"\n" + "".join( [ - f"\n{chunk}\n\n" + f"\n{chunk}\n\n" for j, chunk in enumerate(references[reference]) ] - ) + "\n" + ) + f"\n" + print(formated) chunks.append(formated) chunk_count += len(references[reference]) return "".join(chunks) diff --git a/deepsearcher/config.yaml b/deepsearcher/config.yaml index 2addc92..29e1b8e 100644 --- a/deepsearcher/config.yaml +++ b/deepsearcher/config.yaml @@ -84,4 +84,4 @@ query_settings: load_settings: chunk_size: 1024 - chunk_overlap: 256 + chunk_overlap: 512 diff --git a/deepsearcher/llm/openai_llm.py b/deepsearcher/llm/openai_llm.py index 02a21c9..dd4f378 100644 --- a/deepsearcher/llm/openai_llm.py +++ b/deepsearcher/llm/openai_llm.py @@ -48,7 +48,9 @@ class OpenAILLM(BaseLLM): model=self.model, messages=messages, stream=True, - temperature=0.6 + temperature=0.6, + top_p=0.8, + presence_penalty=1.2 ) as stream: # stream仅做测试,不需要发送到前端 content = "" diff --git a/deepsearcher/vector_db/milvus.py b/deepsearcher/vector_db/milvus.py index 6ebe08c..98acb05 100644 --- a/deepsearcher/vector_db/milvus.py +++ b/deepsearcher/vector_db/milvus.py @@ -149,7 +149,7 @@ class Milvus(BaseVectorDB): self, collection: str, vector: np.ndarray | list[float], - top_k: int = 4, + top_k: int = 3, query_text: str = None, *args, **kwargs,