@ -30,13 +30,14 @@ Example output:
Provide your response in a python code list of str format :
"""
RERANK_PROMPT = """ Based on the query questions and the retrieved chunk, to determine whether the chunk is helpful in answering any of the query question, you can only return " YES " or " NO " , without any other information.
RERANK_PROMPT = """ Based on the query questions and the retrieved chunks, determine whether each chunk is helpful in answering any of the query questions. For each chunk, you must return " YES " or " NO " without any other information.
Query Questions : { query }
Retrieved Chunk : { retrieved_chunk }
Is the chunk helpful in answering the any of the questions ?
"""
Retrieved Chunks :
{ retrieved_chunks }
Respond with a list of " YES " or " NO " values , one for each chunk , in the same order as the chunks are listed . For example a list of chunks of three : [ " YES " , " NO " , " YES " ] """
REFLECT_PROMPT = """ Determine whether additional search queries are needed based on the original query, previous sub queries, and all retrieved document chunks. If further research is required, provide a Python list of up to 3 search queries. If no further research is required, return an empty list.
@ -140,26 +141,55 @@ class DeepSearch(RAGAgent):
f " <search> No relevant document chunks found in ' { collection } ' ! </search> \n "
)
continue
# Format all chunks for batch processing
formatted_chunks = " "
for i , retrieved_result in enumerate ( retrieved_results ) :
formatted_chunks + = f " <chunk_ { i } > \n { retrieved_result . text } \n </chunk_ { i } > \n "
# Batch process all chunks with a single LLM call
chat_response = self . llm . chat (
messages = [
{
" role " : " user " ,
" content " : RERANK_PROMPT . format (
query = [ query ] + sub_queries ,
retrieved_chunks = formatted_chunks ,
) ,
}
]
)
consume_tokens + = chat_response . total_tokens
response_content = self . llm . remove_think ( chat_response . content ) . strip ( )
# Parse the response to determine which chunks are relevant
try :
relevance_list = self . llm . literal_eval ( response_content )
if not isinstance ( relevance_list , list ) :
raise ValueError ( " Response is not a list " )
except :
# Fallback: if parsing fails, treat all chunks as relevant
log . color_print ( f " Warning: Failed to parse relevance response. Treating all chunks as relevant. Response was: { response_content } " )
relevance_list = [ " YES " ] * len ( retrieved_results )
# Ensure we have enough relevance judgments for all chunks
while len ( relevance_list ) < len ( retrieved_results ) :
relevance_list . append ( " YES " ) # Default to relevant if no judgment provided
# Filter relevant chunks based on LLM response
accepted_chunk_num = 0
references = set ( )
for retrieved_result in retrieved_results :
chat_response = self . llm . chat (
messages = [
{
" role " : " user " ,
" content " : RERANK_PROMPT . format (
query = [ query ] + sub_queries ,
retrieved_chunk = f " <chunk> { retrieved_result . text } </chunk> " ,
) ,
}
]
)
consume_tokens + = chat_response . total_tokens
response_content = self . llm . remove_think ( chat_response . content ) . strip ( )
if " YES " in response_content and " NO " not in response_content :
for i , retrieved_result in enumerate ( retrieved_results ) :
# Check if we have a relevance judgment for this chunk
is_relevant = ( i < len ( relevance_list ) and
" YES " in relevance_list [ i ] . upper ( ) and
" NO " not in relevance_list [ i ] . upper ( ) ) if i < len ( relevance_list ) else True
if is_relevant :
all_retrieved_results . append ( retrieved_result )
accepted_chunk_num + = 1
references . add ( retrieved_result . reference )
if accepted_chunk_num > 0 :
log . color_print (
f " <search> Accept { accepted_chunk_num } document chunk(s) from references: { list ( references ) } </search> \n "