合并多个chunk一次性判断是否有用

2 weeks ago · 21ea200b7c
3 changed files with 57 additions and 20 deletions
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@ -30,13 +30,14 @@ Example output:
 Provide your response in a python code list of str format:
 """

-RERANK_PROMPT = """Based on the query questions and the retrieved chunk, to determine whether the chunk is helpful in answering any of the query question, you can only return "YES" or "NO", without any other information.
+RERANK_PROMPT = """Based on the query questions and the retrieved chunks, determine whether each chunk is helpful in answering any of the query questions. For each chunk, you must return "YES" or "NO" without any other information.

 Query Questions: {query}
-Retrieved Chunk: {retrieved_chunk}

-Is the chunk helpful in answering the any of the questions?
-"""
+Retrieved Chunks:
+{retrieved_chunks}
+
+Respond with a list of "YES" or "NO" values, one for each chunk, in the same order as the chunks are listed. For example a list of chunks of three: ["YES", "NO", "YES"]"""


 REFLECT_PROMPT = """Determine whether additional search queries are needed based on the original query, previous sub queries, and all retrieved document chunks. If further research is required, provide a Python list of up to 3 search queries. If no further research is required, return an empty list.
@ -140,26 +141,55 @@ class DeepSearch(RAGAgent):
                    f"<search> No relevant document chunks found in '{collection}'! </search>\n"
                )
                continue
+            
+            # Format all chunks for batch processing
+            formatted_chunks = ""
+            for i, retrieved_result in enumerate(retrieved_results):
+                formatted_chunks += f"<chunk_{i}>\n{retrieved_result.text}\n</chunk_{i}>\n"
+            
+            # Batch process all chunks with a single LLM call
+            chat_response = self.llm.chat(
+                messages=[
+                    {
+                        "role": "user",
+                        "content": RERANK_PROMPT.format(
+                            query=[query] + sub_queries,
+                            retrieved_chunks=formatted_chunks,
+                        ),
+                    }
+                ]
+            )
+            consume_tokens += chat_response.total_tokens
+            response_content = self.llm.remove_think(chat_response.content).strip()
+            
+            # Parse the response to determine which chunks are relevant
+            try:
+                relevance_list = self.llm.literal_eval(response_content)
+                if not isinstance(relevance_list, list):
+                    raise ValueError("Response is not a list")
+            except:
+                # Fallback: if parsing fails, treat all chunks as relevant
+                log.color_print(f"Warning: Failed to parse relevance response. Treating all chunks as relevant. Response was: {response_content}")
+                relevance_list = ["YES"] * len(retrieved_results)
+            
+            # Ensure we have enough relevance judgments for all chunks
+            while len(relevance_list) < len(retrieved_results):
+                relevance_list.append("YES")  # Default to relevant if no judgment provided
+            
+            # Filter relevant chunks based on LLM response
            accepted_chunk_num = 0
            references = set()
-            for retrieved_result in retrieved_results:
-                chat_response = self.llm.chat(
-                    messages=[
-                        {
-                            "role": "user",
-                            "content": RERANK_PROMPT.format(
-                                query=[query] + sub_queries,
-                                retrieved_chunk=f"<chunk>{retrieved_result.text}</chunk>",
-                            ),
-                        }
-                    ]
-                )
-                consume_tokens += chat_response.total_tokens
-                response_content = self.llm.remove_think(chat_response.content).strip()
-                if "YES" in response_content and "NO" not in response_content:
+            for i, retrieved_result in enumerate(retrieved_results):
+                # Check if we have a relevance judgment for this chunk
+                is_relevant = (i < len(relevance_list) and 
+                              "YES" in relevance_list[i].upper() and 
+                              "NO" not in relevance_list[i].upper()) if i < len(relevance_list) else True
+                
+                if is_relevant:
                    all_retrieved_results.append(retrieved_result)
                    accepted_chunk_num += 1
                    references.add(retrieved_result.reference)
+            
            if accepted_chunk_num > 0:
                log.color_print(
                    f"<search> Accept {accepted_chunk_num} document chunk(s) from references: {list(references)} </search>\n"
--- a/tests/agent/test_base.py
+++ b/tests/agent/test_base.py
@ -32,6 +32,13 @@ class MockLLM(BaseLLM):
                if key in message_content:
                    return ChatResponse(content=response, total_tokens=10)
        
+        # Default response for RERANK_PROMPT - treat all chunks as relevant
+        if "Based on the query questions and the retrieved chunks" in message_content:
+            # Count the number of chunks in the message
+            chunk_count = message_content.count("<chunk_")
+            # Return a list with "YES" for each chunk
+            return ChatResponse(content=str(["YES"] * chunk_count), total_tokens=10)
+        
        return ChatResponse(content="This is a test answer", total_tokens=10)
    
    def literal_eval(self, text):
--- a/tests/agent/test_deep_search.py
+++ b/tests/agent/test_deep_search.py
@ -73,7 +73,7 @@ class TestDeepSearch(BaseAgentTest):
        
        # With our mock returning "YES" for RERANK_PROMPT, all chunks should be accepted
        self.assertEqual(len(results), 3)  # 3 mock results from MockVectorDB
-        self.assertEqual(tokens, 35)  # 5 from collection_router + 10*3 from LLM calls for reranking
+        self.assertEqual(tokens, 15)  # 5 from collection_router + 10*1 from LLM calls for reranking (batch)
    
    def test_generate_gap_queries(self):
        """Test the _generate_gap_queries method."""