diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py index 3b6afc1..c059af5 100644 --- a/deepsearcher/agent/deep_search.py +++ b/deepsearcher/agent/deep_search.py @@ -6,6 +6,9 @@ from deepsearcher.utils.message_stream import send_info, send_answer from deepsearcher.vector_db import RetrievalResult from deepsearcher.vector_db.base import BaseVectorDB, deduplicate from collections import defaultdict +import os +import urllib.parse + COLLECTION_ROUTE_PROMPT = """ @@ -22,7 +25,7 @@ COLLECTION_ROUTE_PROMPT = """ SUB_QUERY_PROMPT = """ -为了能够全面的回答这个问题,请你尝试把原本的问题拆分或扩展为几个子问题 +为了能够全面的回答这个问题,请你尝试把原本的问题拆分或扩展为几个子问题(三五个即可) 请你使用自顶向下和自底向上两种方向来思考如何拆分问题 子问题的数量不可以太多,但是也不可以太少,应当保证问题的回答全面性,请根据问题复杂程度来决定子问题的数量 如果原问题本身非常简单,没有必要进行拆分,则保留输出原问题本身 @@ -95,6 +98,7 @@ SUMMARY_PROMPT = """ 来自的引用序号从[^index]从index=1开始,来源需要与前文中的"id"一致。 不需要对每个分配一个引用,而是相同共用引用,并确保每一个都被引用。 另外,如果回答的内容文内引用需要引用多个,请添加多个[^index]到句尾。 +注意,不需要你生成文末脚注,即不需要参考列表,只需要在回答中添加文内引用即可。 @@ -431,7 +435,7 @@ class DeepSearch(BaseAgent): print(formated_chunk) formated_chunks.append(formated_chunk) chunk_count += len(ref_dict[reference]) - formated_refs.append(f"[{i + 1}]: " + str(reference) + "\n") + formated_refs.append(f"[^{i + 1}]: " + str(urllib.parse.quote_plus('file://' + str(os.path.abspath(reference)))) + "\n") formated_chunks = "".join(formated_chunks) formated_refs = "".join(formated_refs) return formated_chunks, formated_refs diff --git a/pyproject.toml b/pyproject.toml index a608a00..7931aeb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,6 +11,7 @@ dependencies = [ "langchain-text-splitters>=0.3.8", "numpy>=1.26.4", "openai>=1.77.0", + "pathlib>=1.0.1", "pdfplumber>=0.11.6", "pymilvus>=2.5.8", "requests>=2.32.3", diff --git a/uv.lock b/uv.lock index cd867a7..9f8fcdc 100644 --- a/uv.lock +++ b/uv.lock @@ -880,6 +880,7 @@ dependencies = [ { name = "langchain-text-splitters" }, { name = "numpy" }, { name = "openai" }, + { name = "pathlib" }, { name = "pdfplumber" }, { name = "pymilvus" }, { name = "requests" }, @@ -1001,6 +1002,7 @@ requires-dist = [ { name = "openai", specifier = ">=1.77.0" }, { name = "oracledb", marker = "extra == 'all'", specifier = ">=3.1.0" }, { name = "oracledb", marker = "extra == 'oracledb'", specifier = ">=3.1.0" }, + { name = "pathlib", specifier = ">=1.0.1" }, { name = "pdfplumber", specifier = ">=0.11.6" }, { name = "pymilvus", specifier = ">=2.5.8" }, { name = "qdrant-client", marker = "extra == 'all'", specifier = ">=1.14.2" }, @@ -4045,6 +4047,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/c6/ac/dac4a63f978e4dcb3c6d3a78c4d8e0192a113d288502a1216950c41b1027/parso-0.8.4-py2.py3-none-any.whl", hash = "sha256:a418670a20291dacd2dddc80c377c5c3791378ee1e8d12bffc35420643d43f18", size = 103650, upload-time = "2024-04-05T09:43:53.299Z" }, ] +[[package]] +name = "pathlib" +version = "1.0.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ac/aa/9b065a76b9af472437a0059f77e8f962fe350438b927cb80184c32f075eb/pathlib-1.0.1.tar.gz", hash = "sha256:6940718dfc3eff4258203ad5021090933e5c04707d5ca8cc9e73c94a7894ea9f", size = 49298, upload-time = "2014-09-03T15:41:57.18Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/78/f9/690a8600b93c332de3ab4a344a4ac34f00c8f104917061f779db6a918ed6/pathlib-1.0.1-py3-none-any.whl", hash = "sha256:f35f95ab8b0f59e6d354090350b44a80a80635d22efdedfa84c7ad1cf0a74147", size = 14363, upload-time = "2022-05-04T13:37:20.585Z" }, +] + [[package]] name = "pathspec" version = "0.12.1"