Browse Source

chore: 修改脚注样式

main
tanxing 5 days ago
parent
commit
1edc9bf1ee
  1. 20
      deepsearcher/agent/deep_search.py
  2. 2
      deepsearcher/config.yaml
  3. 4
      deepsearcher/llm/openai_llm.py
  4. 2
      deepsearcher/vector_db/milvus.py

20
deepsearcher/agent/deep_search.py

@ -91,13 +91,13 @@ SUMMARY_PROMPT = """
例子:
<EXAMPLE>
文内引用示例使用markdown和html的混合语法格式必须和例子一致:
"XGBoost是非常强大的集成学习模型[<sup>[2]</sup>](#2)"
(必须使用 "[<sup>[index]</sup>](#index)" 而不是 "[index]"注意不要遗漏"(#index)"这里的index是chunk对应的reference的id)
文内引用示例使用markdown脚注:
"XGBoost是非常强大的集成学习模型[^2]"
(必须使用 "[^index]"这里的index是对应的<reference>的id)
文末引用示例 (需要href与前文参考中的一致不需要对每个chunk分配一个引用而是每一个referecen共用一个引用):
<div id="2"><a href="files/docs/chap_001_003_models.md" style="text-decoration: none;>[2] chap_001_models.md</a></div>
文末引用示例 (需要与前文reference的href一致不需要对每个chunk分配一个引用而是每一个referecen共用一个引用):
[^2]: files/docs/chap_001_003_models.md
</EXAMPLE>
@ -328,7 +328,6 @@ class DeepSearch(BaseAgent):
- A list of retrieved document results
- Additional information about the retrieval process
"""
max_iter = kwargs.pop("max_iter", self.max_iter)
### SUB QUERIES ###
log.color_print(f"<query> {original_query} </query>\n")
all_search_results = []
@ -342,7 +341,7 @@ class DeepSearch(BaseAgent):
log.color_print(f"</think> Break down the original query into new sub queries: {sub_queries} ")
all_sub_queries.extend(sub_queries)
for it in range(max_iter):
for it in range(self.max_iter):
log.color_print(f">> Iteration: {it + 1}\n")
# Execute all search tasks sequentially
@ -358,7 +357,7 @@ class DeepSearch(BaseAgent):
)
# search_res_from_internet = deduplicate_results(search_res_from_internet)
# all_search_res.extend(search_res_from_vectordb + search_res_from_internet)
if it + 1 >= max_iter:
if it + 1 >= self.max_iter:
log.color_print("</think> Exceeded maximum iterations. Exiting. ")
break
### REFLECTION & GET MORE SUB QUERIES ###
@ -421,10 +420,11 @@ class DeepSearch(BaseAgent):
for i, reference in enumerate(references):
formated = f"<reference id='{i + 1}' href='{reference}'>\n" + "".join(
[
f"<chunk id='{j + 1 + chunk_count}'>\n{chunk}\n</chunk>\n"
f"<chunk id='{j + 1 + chunk_count}'>\n{chunk}\n</chunk id='{j + 1 + chunk_count}'>\n"
for j, chunk in enumerate(references[reference])
]
) + "</reference>\n"
) + f"</reference id='{i + 1}'>\n"
print(formated)
chunks.append(formated)
chunk_count += len(references[reference])
return "".join(chunks)

2
deepsearcher/config.yaml

@ -84,4 +84,4 @@ query_settings:
load_settings:
chunk_size: 1024
chunk_overlap: 256
chunk_overlap: 512

4
deepsearcher/llm/openai_llm.py

@ -48,7 +48,9 @@ class OpenAILLM(BaseLLM):
model=self.model,
messages=messages,
stream=True,
temperature=0.6
temperature=0.6,
top_p=0.8,
presence_penalty=1.2
) as stream:
# stream仅做测试,不需要发送到前端
content = ""

2
deepsearcher/vector_db/milvus.py

@ -149,7 +149,7 @@ class Milvus(BaseVectorDB):
self,
collection: str,
vector: np.ndarray | list[float],
top_k: int = 4,
top_k: int = 3,
query_text: str = None,
*args,
**kwargs,

Loading…
Cancel
Save