暂时移除异步检索

7 days ago · a52310748d
2 changed files with 17 additions and 32 deletions
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@ -1,5 +1,3 @@
 import asyncio
 from deepsearcher.agent.base import BaseAgent, describe_class
 from deepsearcher.embedding.base import BaseEmbedding
 from deepsearcher.llm.base import BaseLLM
@ -52,7 +50,8 @@ Query Questions: {query}
 Retrieved Chunks:
 {retrieved_chunks}
-Respond with a list of "YES" or "NO" values, one for each chunk, in the same order as the chunks are listed. For example a list of chunks of three: ["YES", "NO", "YES"]
+Respond with a list of "YES" or "NO" values, one for each chunk, in the same order as the chunks are listed.
 For example, if there is a list of four chunks, the answer could be: ["YES", "NO", "YES", "YES"]
 """
@ -60,7 +59,7 @@ REFLECT_PROMPT = """
 Determine whether additional search queries are needed based on the original query, previous sub queries, and all retrieved document chunks.
 If returned chunks does not cover all previous sub-queries, this means that there are no related documents can be retrieved.
 In this case, try generate simliar but slightly different queries to the previous sub-queries.
-And if further research is needed based on the new information, provide a Python list of more queries.
+And if further research is needed based on the new information which those chunks provided, give more queries on the basis of them.
 (which is prefered, even if the previous sub-queries can be well answered by retrieved chunks, but ultimately according to your judge)
 If no further research is needed, return an empty list.
@ -79,7 +78,7 @@ You are a AI content analysis expert.
 Please generate a long, specific and detailed answer or report based on the previous queries and the retrieved document chunks.
 If the chunks are not enough to answer the query or additional information is needed to enhance the content, you should answer with your own knowledge.
 In this case, mark the part(s) that generated by your own with <unref>your knowledge here</unref>
-(Don't place <unref></unref> part(s) individually into one paragraph, but insert it the proper place of the context)
+(Don't place <unref></unref> part(s) individually into one paragraph, but insert it the proper place of the report)
 Original Query: {question}
@ -198,7 +197,7 @@ class DeepSearch(BaseAgent):
        content = self.llm.remove_think(content)
        return self.llm.literal_eval(content)
-    async def _search_chunks_from_vectordb(self, query: str):
+    def _search_chunks_from_vectordb(self, query: str):
        if self.route_collection:
            selected_collections = self.invoke(
                query=query, dim=self.embedding_model.dimension
@ -222,7 +221,7 @@ class DeepSearch(BaseAgent):
            # Format all chunks for batch processing
            formatted_chunks = ""
            for i, retrieved_result in enumerate(retrieved_results):
-                formatted_chunks += f"<chunk_{i}>\n{retrieved_result.text}\n</chunk_{i}>\n"
+                formatted_chunks += f"<chunk_{i + 1}>\n{retrieved_result.text}\n</chunk_{i + 1}>\n"
            # Batch process all chunks with a single LLM call
            content = self.llm.chat(
@ -308,11 +307,6 @@ class DeepSearch(BaseAgent):
                - A list of retrieved document results
                - Additional information about the retrieval process
        """
        return asyncio.run(self.async_retrieve(original_query, **kwargs))
    async def async_retrieve(
        self, original_query: str, **kwargs
    ) -> tuple[list[RetrievalResult], dict]:
        max_iter = kwargs.pop("max_iter", self.max_iter)
        ### SUB QUERIES ###
        log.color_print(f"<query> {original_query} </query>\n")
@ -325,8 +319,7 @@ class DeepSearch(BaseAgent):
            return [], {}
        else:
            log.color_print(
-                f"<think> Break down the original query into new sub queries: {sub_queries}</think>\n"
+                f"</think> Break down the original query into new sub queries: {sub_queries}  ")
            )
        all_sub_queries.extend(sub_queries)
        sub_gap_queries = sub_queries
@ -335,36 +328,28 @@ class DeepSearch(BaseAgent):
            search_res_from_vectordb = []
            search_res_from_internet = []  # TODO
-            # Create all search tasks
+            # Execute all search tasks sequentially
-            search_tasks = [
+            for query in sub_gap_queries:
-                self._search_chunks_from_vectordb(query)
+                result = self._search_chunks_from_vectordb(query)
-                for query in sub_gap_queries
+                search_res_from_vectordb.extend(result)
            ]
            # Execute all tasks in parallel and wait for results
            search_results = await asyncio.gather(*search_tasks)
            # Merge all results
            for result in search_results:
                search_res = result
                search_res_from_vectordb.extend(search_res)
            search_res_from_vectordb = deduplicate(search_res_from_vectordb)
            # search_res_from_internet = deduplicate_results(search_res_from_internet)
            all_search_res.extend(search_res_from_vectordb + search_res_from_internet)
            if iter == max_iter - 1:
-                log.color_print("<think> Exceeded maximum iterations. Exiting. </think>\n")
+                log.color_print("</think> Exceeded maximum iterations. Exiting.  ")
                break
            ### REFLECTION & GET GAP QUERIES ###
-            log.color_print("<think> Reflecting on the search results... </think>\n")
+            log.color_print("</think> Reflecting on the search results...  ")
            sub_gap_queries = self._generate_gap_queries(
                original_query, all_sub_queries, all_search_res
            )
            if not sub_gap_queries or len(sub_gap_queries) == 0:
-                log.color_print("<think> No new search queries were generated. Exiting. </think>\n")
+                log.color_print("</think> No new search queries were generated. Exiting.  ")
                break
            else:
                log.color_print(
-                    f"<think> New search queries for next iteration: {sub_gap_queries} </think>\n"
+                    f"</think> New search queries for next iteration: {sub_gap_queries}  ")
                )
                all_sub_queries.extend(sub_gap_queries)
        all_search_res = deduplicate(all_search_res)
--- a/deepsearcher/online_query.py
+++ b/deepsearcher/online_query.py
@ -39,10 +39,10 @@ def retrieve(
    Returns:
        A tuple containing:
            - A list of retrieval results
-            - An empty list (placeholder for future use)
+            - A list of strings representing consumed tokens
    """
    default_searcher = configuration.default_searcher
    retrieved_results, consume_tokens, metadata = default_searcher.retrieve(
        original_query, max_iter=max_iter
    )
-    return retrieved_results, []
+    return retrieved_results, consume_tokens