Browse Source

新增reference生成

main
tanxing 6 days ago
parent
commit
3d2ee45122
  1. 16
      deepsearcher/agent/deep_search.py
  2. 2
      deepsearcher/config.yaml
  3. 6
      deepsearcher/offline_loading.py

16
deepsearcher/agent/deep_search.py

@ -83,7 +83,16 @@ Please generate a long, specific and detailed answer or report based on the prev
If the chunks are not enough to answer the query or additional information is needed to enhance the content, you should answer with your own knowledge.
In this case, mark the part(s) that generated by your own with <unref>your knowledge here</unref>
(Don't place <unref></unref> part(s) individually into one paragraph, but insert it the proper place of the report)
Plus, you should give references in the report where you quote from the chunks using markdown links, and give a list of references at the end of the report.
Plus, you should quote chunk references and give a list of references at the end of the report.
Here is an example:
<EXAMPLE>
Quote example (an upper quote anchor, strictly apply the format below):
XGBoost is a powerful ensemble learning method[<sup>[2]</sup>](#2)
Reference list example (should be exact the same as the <reference><reference>):
<div id="2"><a href="MachineLearning.pdf">[2] MachineLearning.pdf</a></div>
</EXAMPLE>
Original Query: {original_query}
@ -111,7 +120,7 @@ class DeepSearch(BaseAgent):
llm: BaseLLM,
embedding_model: BaseEmbedding,
vector_db: BaseVectorDB,
max_iter: int = 3,
max_iter: int,
route_collection: bool = False,
text_window_splitter: bool = True,
**kwargs,
@ -338,7 +347,6 @@ class DeepSearch(BaseAgent):
for it in range(max_iter):
log.color_print(f">> Iteration: {it + 1}\n")
# Execute all search tasks sequentially
for query in sub_queries:
result = self._search_chunks_from_vectordb(query)
@ -352,7 +360,7 @@ class DeepSearch(BaseAgent):
)
# search_res_from_internet = deduplicate_results(search_res_from_internet)
# all_search_res.extend(search_res_from_vectordb + search_res_from_internet)
if it == max_iter - 1:
if it + 1 >= max_iter:
log.color_print("</think> Exceeded maximum iterations. Exiting. ")
break
### REFLECTION & GET MORE SUB QUERIES ###

2
deepsearcher/config.yaml

@ -80,7 +80,7 @@ provide_settings:
# port: 6333
query_settings:
max_iter: 2
max_iter: 1
load_settings:
chunk_size: 2048

6
deepsearcher/offline_loading.py

@ -42,12 +42,6 @@ def load_from_local_files(
collection_name = collection_name.replace(" ", "_").replace("-", "_")
embedding_model = configuration.embedding_model
file_loader = configuration.file_loader
vector_db.init_collection(
dim=embedding_model.dimension,
collection=collection_name,
description=collection_description,
force_rebuild=force_rebuild,
)
# 如果force_rebuild为True,则强制重建集合
if force_rebuild:

Loading…
Cancel
Save