From 21ea200b7cc7830eb4f1ac4d0f38ee113890f64b Mon Sep 17 00:00:00 2001
From: tanxing <eternaldwellers@gmail.com>
Date: Tue, 5 Aug 2025 15:32:09 +0800
Subject: [PATCH] =?UTF-8?q?=E5=90=88=E5=B9=B6=E5=A4=9A=E4=B8=AAchunk?=
 =?UTF-8?q?=E4=B8=80=E6=AC=A1=E6=80=A7=E5=88=A4=E6=96=AD=E6=98=AF=E5=90=A6?=
 =?UTF-8?q?=E6=9C=89=E7=94=A8?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 deepsearcher/agent/deep_search.py | 68 ++++++++++++++++++++++---------
 tests/agent/test_base.py          |  7 ++++
 tests/agent/test_deep_search.py   |  2 +-
 3 files changed, 57 insertions(+), 20 deletions(-)
diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py
index ab60daa..3e3c53e 100644
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@@ -30,13 +30,14 @@ Example output:
 Provide your response in a python code list of str format:
 """
 
-RERANK_PROMPT = """Based on the query questions and the retrieved chunk, to determine whether the chunk is helpful in answering any of the query question, you can only return "YES" or "NO", without any other information.
+RERANK_PROMPT = """Based on the query questions and the retrieved chunks, determine whether each chunk is helpful in answering any of the query questions. For each chunk, you must return "YES" or "NO" without any other information.
 
 Query Questions: {query}
-Retrieved Chunk: {retrieved_chunk}
 
-Is the chunk helpful in answering the any of the questions?
-"""
+Retrieved Chunks:
+{retrieved_chunks}
+
+Respond with a list of "YES" or "NO" values, one for each chunk, in the same order as the chunks are listed. For example a list of chunks of three: ["YES", "NO", "YES"]"""
 
 
 REFLECT_PROMPT = """Determine whether additional search queries are needed based on the original query, previous sub queries, and all retrieved document chunks. If further research is required, provide a Python list of up to 3 search queries. If no further research is required, return an empty list.
@@ -140,26 +141,55 @@ class DeepSearch(RAGAgent):
                     f"<search> No relevant document chunks found in '{collection}'! </search>\n"
                 )
                 continue
+            
+            # Format all chunks for batch processing
+            formatted_chunks = ""
+            for i, retrieved_result in enumerate(retrieved_results):
+                formatted_chunks += f"<chunk_{i}>\n{retrieved_result.text}\n</chunk_{i}>\n"
+            
+            # Batch process all chunks with a single LLM call
+            chat_response = self.llm.chat(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": RERANK_PROMPT.format(
+                            query=[query] + sub_queries,
+                            retrieved_chunks=formatted_chunks,
+                        ),
+                    }
+                ]
+            )
+            consume_tokens += chat_response.total_tokens
+            response_content = self.llm.remove_think(chat_response.content).strip()
+            
+            # Parse the response to determine which chunks are relevant
+            try:
+                relevance_list = self.llm.literal_eval(response_content)
+                if not isinstance(relevance_list, list):
+                    raise ValueError("Response is not a list")
+            except:
+                # Fallback: if parsing fails, treat all chunks as relevant
+                log.color_print(f"Warning: Failed to parse relevance response. Treating all chunks as relevant. Response was: {response_content}")
+                relevance_list = ["YES"] * len(retrieved_results)
+            
+            # Ensure we have enough relevance judgments for all chunks
+            while len(relevance_list) < len(retrieved_results):
+                relevance_list.append("YES")  # Default to relevant if no judgment provided
+            
+            # Filter relevant chunks based on LLM response
             accepted_chunk_num = 0
             references = set()
-            for retrieved_result in retrieved_results:
-                chat_response = self.llm.chat(
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": RERANK_PROMPT.format(
-                                query=[query] + sub_queries,
-                                retrieved_chunk=f"<chunk>{retrieved_result.text}</chunk>",
-                            ),
-                        }
-                    ]
-                )
-                consume_tokens += chat_response.total_tokens
-                response_content = self.llm.remove_think(chat_response.content).strip()
-                if "YES" in response_content and "NO" not in response_content:
+            for i, retrieved_result in enumerate(retrieved_results):
+                # Check if we have a relevance judgment for this chunk
+                is_relevant = (i < len(relevance_list) and 
+                              "YES" in relevance_list[i].upper() and 
+                              "NO" not in relevance_list[i].upper()) if i < len(relevance_list) else True
+                
+                if is_relevant:
                     all_retrieved_results.append(retrieved_result)
                     accepted_chunk_num += 1
                     references.add(retrieved_result.reference)
+            
             if accepted_chunk_num > 0:
                 log.color_print(
                     f"<search> Accept {accepted_chunk_num} document chunk(s) from references: {list(references)} </search>\n"
diff --git a/tests/agent/test_base.py b/tests/agent/test_base.py
index 6ea59f9..33fb7bd 100644
--- a/tests/agent/test_base.py
+++ b/tests/agent/test_base.py
@@ -32,6 +32,13 @@ class MockLLM(BaseLLM):
                 if key in message_content:
                     return ChatResponse(content=response, total_tokens=10)
         
+        # Default response for RERANK_PROMPT - treat all chunks as relevant
+        if "Based on the query questions and the retrieved chunks" in message_content:
+            # Count the number of chunks in the message
+            chunk_count = message_content.count("<chunk_")
+            # Return a list with "YES" for each chunk
+            return ChatResponse(content=str(["YES"] * chunk_count), total_tokens=10)
+        
         return ChatResponse(content="This is a test answer", total_tokens=10)
     
     def literal_eval(self, text):
diff --git a/tests/agent/test_deep_search.py b/tests/agent/test_deep_search.py
index 26f8787..895abf9 100644
--- a/tests/agent/test_deep_search.py
+++ b/tests/agent/test_deep_search.py
@@ -73,7 +73,7 @@ class TestDeepSearch(BaseAgentTest):
         
         # With our mock returning "YES" for RERANK_PROMPT, all chunks should be accepted
         self.assertEqual(len(results), 3)  # 3 mock results from MockVectorDB
-        self.assertEqual(tokens, 35)  # 5 from collection_router + 10*3 from LLM calls for reranking
+        self.assertEqual(tokens, 15)  # 5 from collection_router + 10*1 from LLM calls for reranking (batch)
     
     def test_generate_gap_queries(self):
         """Test the _generate_gap_queries method."""