diff --git a/deepsearcher/agent/deep_search.py b/deepsearcher/agent/deep_search.py
index c018827..40eacda 100644
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@@ -21,7 +21,7 @@ Your selected collection name list is:
SUB_QUERY_PROMPT = """
To answer this question more comprehensively, please break down the original question into few numbers of sub-questions
-(the less the better, but more if nesscessary to ensure answering the original question).
+(the less the better, but you can give more if nesscessary, to ensure answering the original question).
If this is a very simple question and no decomposition is necessary, then keep the only one original question.
Make sure each sub-question is clear, concise and atomic.
Return as list of str in python style and json convertable.
@@ -36,6 +36,7 @@ Example input:
Example output:
[
"What is deep learning?",
+ "What is deep learning's purpose?",
"What is the difference between deep learning and machine learning?",
"What is the history of deep learning?"
]
@@ -52,7 +53,7 @@ For each chunk, you must return "YES" or "NO" python style list without any othe
Query: {query}
Retrieved Chunks:
-{retrieved_chunks}
+{chunks}
Respond with a list of "YES" or "NO" values, one for each chunk, in the same order as the chunks are listed.
For example, if there is a list of four chunks, the answer could be: ["YES", "NO", "YES", "YES"]
@@ -82,16 +83,33 @@ You are a AI content analysis expert.
Please generate a long, specific and detailed answer or report based on the previous queries and the retrieved document chunks.
If the chunks are not enough to answer the query or additional information is needed to enhance the content, you should answer with your own knowledge.
In this case, mark the part(s) that generated by your own with your knowledge here
-(Don't place part(s) individually into one paragraph, but insert it the proper place of the report)
+(unref blocks don't need to be indexed and multiple unref blocks are supposed to be split into multiple paragraphs)
Plus, you should quote chunk references and give a list of references at the end of the report.
Here is an example:
-Quote example (an upper quote anchor, strictly apply the format below):
-XGBoost is a powerful ensemble learning method[[2]](#2)
-Reference list example (should be exact the same as the ):
-
+Quote example (an upper quote anchor, strictly apply the html format below):
+
+XGBoost is a powerful ensemble learning method [[2]](#2)
+(It must be quote using "[[2]](#2)" instead of "[2]")
+
+Reference list example:
+(The href should be exact the same as the what is quoted inside , full path is required and no simplification is allowed)
+
+given chunks and references:
+Random Foreset uses ensembling learning model using bagging strategy
+lab/EnsembleLearning.pdf
+XGBoost is a powerful ensemble learning method that is widely used in machine learning
+DB/data/MachineLearning.pdf
+Gradient Boost requires a fake residual to optimize the parameters
+files/chap_001_models.md
+
+the reference at the end:
+
+
+
+
Original Query: {original_query}
@@ -233,12 +251,7 @@ class DeepSearch(BaseAgent):
continue
# Format all chunks for batch processing
- formatted_chunks = ""
- for i, retrieved_result in enumerate(retrieved_results):
- formatted_chunks += f'''
- \n{retrieved_result.text}\n\n
- \n{retrieved_result.reference}\n
- '''
+ chunks = self._format_chunks(retrieved_results)
# Batch process all chunks with a single LLM call
content = self.llm.chat(
@@ -247,7 +260,7 @@ class DeepSearch(BaseAgent):
"role": "user",
"content": RERANK_PROMPT.format(
query=query,
- retrieved_chunks=formatted_chunks,
+ chunks=chunks,
),
}
]
@@ -297,12 +310,7 @@ class DeepSearch(BaseAgent):
def _generate_more_sub_queries(
self, original_query: str, all_sub_queries: list[str], all_retrieved_results: list[RetrievalResult]
) -> list[str]:
- chunks = []
- for i, chunk in enumerate(all_retrieved_results):
- if self.text_window_splitter and "wider_text" in chunk.metadata:
- chunks.append(chunk.metadata["wider_text"])
- else:
- chunks.append(f'''{chunk.text}{chunk.reference}''')
+ chunks = self._format_chunks(all_retrieved_results)
reflect_prompt = REFLECT_PROMPT.format(
original_query=original_query,
all_sub_queries=all_sub_queries,
@@ -356,7 +364,7 @@ class DeepSearch(BaseAgent):
deduped_len = len(all_search_results)
if undeduped_len - deduped_len != 0:
log.color_print(
- f" Removed {undeduped_len - deduped_len} duplicates "
+ f" Remove {undeduped_len - deduped_len} duplicates "
)
# search_res_from_internet = deduplicate_results(search_res_from_internet)
# all_search_res.extend(search_res_from_vectordb + search_res_from_internet)
@@ -397,13 +405,9 @@ class DeepSearch(BaseAgent):
"""
all_retrieved_results, all_sub_queries = self.retrieve(original_query, **kwargs)
if not all_retrieved_results or len(all_retrieved_results) == 0:
- return f"No relevant information found for query '{original_query}'.", []
- chunks = [] # type: list[str]
- for i, chunk in enumerate(all_retrieved_results):
- if self.text_window_splitter and "wider_text" in chunk.metadata:
- chunks.append(chunk.metadata["wider_text"])
- else:
- chunks.append(f'''{chunk.text}{chunk.reference}''')
+ log.color_print(f"No relevant information found for query '{original_query}'.")
+ return "", []
+ chunks = self._format_chunks(all_retrieved_results)
log.color_print(
f" Summarize answer from all {len(all_retrieved_results)} retrieved chunks... \n"
)
@@ -416,3 +420,14 @@ class DeepSearch(BaseAgent):
log.color_print("\n==== FINAL ANSWER====\n")
log.color_print(self.llm.remove_think(response))
return self.llm.remove_think(response), all_retrieved_results
+
+ def _format_chunks(self, retrieved_results: list[RetrievalResult]):
+ chunks = [] # type: list[str]
+ for i, chunk in enumerate(retrieved_results):
+ chunks.append(
+ f'''
+ \n{chunk.text}\n\n
+ \n{chunk.reference}\n\n
+ '''
+ )
+ return "".join(chunks)
diff --git a/deepsearcher/config.yaml b/deepsearcher/config.yaml
index c3d1c2d..e4fb97d 100644
--- a/deepsearcher/config.yaml
+++ b/deepsearcher/config.yaml
@@ -2,7 +2,7 @@ provide_settings:
llm:
provider: "OpenAILLM"
config:
- model: "Qwen/Qwen3-235B-A22B"
+ model: "Qwen/Qwen3-30B-A3B-Thinking-2507"
api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt"
base_url: "https://api.siliconflow.cn/v1"
@@ -83,5 +83,5 @@ query_settings:
max_iter: 1
load_settings:
- chunk_size: 2048
- chunk_overlap: 128
+ chunk_size: 1024
+ chunk_overlap: 256
diff --git a/deepsearcher/online_query.py b/deepsearcher/online_query.py
index e5c3533..77ed6e6 100644
--- a/deepsearcher/online_query.py
+++ b/deepsearcher/online_query.py
@@ -3,7 +3,7 @@ from deepsearcher import configuration
from deepsearcher.vector_db.base import RetrievalResult
-def query(original_query: str, max_iter: int = 3) -> tuple[str, list[RetrievalResult]]:
+def query(original_query: str) -> tuple[str, list[RetrievalResult]]:
"""
Query the knowledge base with a question and get an answer.
@@ -20,12 +20,10 @@ def query(original_query: str, max_iter: int = 3) -> tuple[str, list[RetrievalRe
- A list of retrieval results that were used to generate the answer
"""
default_searcher = configuration.default_searcher
- return default_searcher.query(original_query, max_iter=max_iter)
+ return default_searcher.query(original_query)
-def retrieve(
- original_query: str, max_iter: int = 3
-) -> tuple[list[RetrievalResult], list[str]]:
+def retrieve(original_query: str) -> tuple[list[RetrievalResult], list[str]]:
"""
Retrieve relevant information from the knowledge base without generating an answer.
@@ -42,7 +40,5 @@ def retrieve(
- A list of strings representing consumed tokens
"""
default_searcher = configuration.default_searcher
- retrieved_results, metadata = default_searcher.retrieve(
- original_query, max_iter=max_iter
- )
+ retrieved_results, metadata = default_searcher.retrieve(original_query)
return retrieved_results
diff --git a/examples/data/docs/design_docs/00000000-MEP-Template.md b/docs/design_docs/00000000-MEP-Template.md
similarity index 100%
rename from examples/data/docs/design_docs/00000000-MEP-Template.md
rename to docs/design_docs/00000000-MEP-Template.md
diff --git a/examples/data/docs/design_docs/20210521-datanode_recovery_design.md b/docs/design_docs/20210521-datanode_recovery_design.md
similarity index 100%
rename from examples/data/docs/design_docs/20210521-datanode_recovery_design.md
rename to docs/design_docs/20210521-datanode_recovery_design.md
diff --git a/examples/data/docs/design_docs/20210604-datanode_flowgraph_recovery_design.md b/docs/design_docs/20210604-datanode_flowgraph_recovery_design.md
similarity index 100%
rename from examples/data/docs/design_docs/20210604-datanode_flowgraph_recovery_design.md
rename to docs/design_docs/20210604-datanode_flowgraph_recovery_design.md
diff --git a/examples/data/docs/design_docs/20210731-index_design.md b/docs/design_docs/20210731-index_design.md
similarity index 100%
rename from examples/data/docs/design_docs/20210731-index_design.md
rename to docs/design_docs/20210731-index_design.md
diff --git a/examples/data/docs/design_docs/20211109-milvus_flush_collections.md b/docs/design_docs/20211109-milvus_flush_collections.md
similarity index 100%
rename from examples/data/docs/design_docs/20211109-milvus_flush_collections.md
rename to docs/design_docs/20211109-milvus_flush_collections.md
diff --git a/examples/data/docs/design_docs/20211115-milvus_drop_collection.md b/docs/design_docs/20211115-milvus_drop_collection.md
similarity index 100%
rename from examples/data/docs/design_docs/20211115-milvus_drop_collection.md
rename to docs/design_docs/20211115-milvus_drop_collection.md
diff --git a/examples/data/docs/design_docs/20211214-milvus_hybrid_ts.md b/docs/design_docs/20211214-milvus_hybrid_ts.md
similarity index 100%
rename from examples/data/docs/design_docs/20211214-milvus_hybrid_ts.md
rename to docs/design_docs/20211214-milvus_hybrid_ts.md
diff --git a/examples/data/docs/design_docs/20211215-milvus_timesync.md b/docs/design_docs/20211215-milvus_timesync.md
similarity index 100%
rename from examples/data/docs/design_docs/20211215-milvus_timesync.md
rename to docs/design_docs/20211215-milvus_timesync.md
diff --git a/examples/data/docs/design_docs/20211217-milvus_create_collection.md b/docs/design_docs/20211217-milvus_create_collection.md
similarity index 100%
rename from examples/data/docs/design_docs/20211217-milvus_create_collection.md
rename to docs/design_docs/20211217-milvus_create_collection.md
diff --git a/examples/data/docs/design_docs/20211221-retrieve_entity.md b/docs/design_docs/20211221-retrieve_entity.md
similarity index 100%
rename from examples/data/docs/design_docs/20211221-retrieve_entity.md
rename to docs/design_docs/20211221-retrieve_entity.md
diff --git a/examples/data/docs/design_docs/20211223-knowhere_design.md b/docs/design_docs/20211223-knowhere_design.md
similarity index 100%
rename from examples/data/docs/design_docs/20211223-knowhere_design.md
rename to docs/design_docs/20211223-knowhere_design.md
diff --git a/examples/data/docs/design_docs/20211224-drop_collection_release_resources.md b/docs/design_docs/20211224-drop_collection_release_resources.md
similarity index 100%
rename from examples/data/docs/design_docs/20211224-drop_collection_release_resources.md
rename to docs/design_docs/20211224-drop_collection_release_resources.md
diff --git a/examples/data/docs/design_docs/20211227-milvus_create_index.md b/docs/design_docs/20211227-milvus_create_index.md
similarity index 100%
rename from examples/data/docs/design_docs/20211227-milvus_create_index.md
rename to docs/design_docs/20211227-milvus_create_index.md
diff --git a/examples/data/docs/design_docs/20220105-proxy.md b/docs/design_docs/20220105-proxy.md
similarity index 100%
rename from examples/data/docs/design_docs/20220105-proxy.md
rename to docs/design_docs/20220105-proxy.md
diff --git a/examples/data/docs/design_docs/20220105-query_boolean_expr.md b/docs/design_docs/20220105-query_boolean_expr.md
similarity index 100%
rename from examples/data/docs/design_docs/20220105-query_boolean_expr.md
rename to docs/design_docs/20220105-query_boolean_expr.md
diff --git a/examples/data/docs/design_docs/20220105-root_coordinator_recovery_on_power_failure.md b/docs/design_docs/20220105-root_coordinator_recovery_on_power_failure.md
similarity index 100%
rename from examples/data/docs/design_docs/20220105-root_coordinator_recovery_on_power_failure.md
rename to docs/design_docs/20220105-root_coordinator_recovery_on_power_failure.md
diff --git a/examples/data/docs/design_docs/20220725-dynamic-config.md b/docs/design_docs/20220725-dynamic-config.md
similarity index 100%
rename from examples/data/docs/design_docs/20220725-dynamic-config.md
rename to docs/design_docs/20220725-dynamic-config.md
diff --git a/examples/data/docs/design_docs/20230403-search_by_pk.md b/docs/design_docs/20230403-search_by_pk.md
similarity index 100%
rename from examples/data/docs/design_docs/20230403-search_by_pk.md
rename to docs/design_docs/20230403-search_by_pk.md
diff --git a/examples/data/docs/design_docs/20230405-default_value.md b/docs/design_docs/20230405-default_value.md
similarity index 100%
rename from examples/data/docs/design_docs/20230405-default_value.md
rename to docs/design_docs/20230405-default_value.md
diff --git a/examples/data/docs/design_docs/20230418-querynode_v2.md b/docs/design_docs/20230418-querynode_v2.md
similarity index 100%
rename from examples/data/docs/design_docs/20230418-querynode_v2.md
rename to docs/design_docs/20230418-querynode_v2.md
diff --git a/examples/data/docs/design_docs/20230511-collection_level_autocompaction_switch.md b/docs/design_docs/20230511-collection_level_autocompaction_switch.md
similarity index 100%
rename from examples/data/docs/design_docs/20230511-collection_level_autocompaction_switch.md
rename to docs/design_docs/20230511-collection_level_autocompaction_switch.md
diff --git a/examples/data/docs/design_docs/20230918-datanode_remove_datacoord_dependency.md b/docs/design_docs/20230918-datanode_remove_datacoord_dependency.md
similarity index 100%
rename from examples/data/docs/design_docs/20230918-datanode_remove_datacoord_dependency.md
rename to docs/design_docs/20230918-datanode_remove_datacoord_dependency.md
diff --git a/examples/data/docs/design_docs/20250610-rls_designmd b/docs/design_docs/20250610-rls_designmd
similarity index 100%
rename from examples/data/docs/design_docs/20250610-rls_designmd
rename to docs/design_docs/20250610-rls_designmd
diff --git a/examples/data/WhatisMilvus.pdf b/docs/design_docs/WhatisMilvus.pdf
similarity index 100%
rename from examples/data/WhatisMilvus.pdf
rename to docs/design_docs/WhatisMilvus.pdf
diff --git a/examples/data/docs/design_docs/json_storage.md b/docs/design_docs/json_storage.md
similarity index 100%
rename from examples/data/docs/design_docs/json_storage.md
rename to docs/design_docs/json_storage.md
diff --git a/examples/data/docs/design_docs/primarykey_index.md b/docs/design_docs/primarykey_index.md
similarity index 100%
rename from examples/data/docs/design_docs/primarykey_index.md
rename to docs/design_docs/primarykey_index.md
diff --git a/examples/data/docs/design_docs/segcore/Search.md b/docs/design_docs/segcore/Search.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/Search.md
rename to docs/design_docs/segcore/Search.md
diff --git a/examples/data/docs/design_docs/segcore/basic_types.md b/docs/design_docs/segcore/basic_types.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/basic_types.md
rename to docs/design_docs/segcore/basic_types.md
diff --git a/examples/data/docs/design_docs/segcore/scripts_and_tools.md b/docs/design_docs/segcore/scripts_and_tools.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/scripts_and_tools.md
rename to docs/design_docs/segcore/scripts_and_tools.md
diff --git a/examples/data/docs/design_docs/segcore/segment_growing.md b/docs/design_docs/segcore/segment_growing.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/segment_growing.md
rename to docs/design_docs/segcore/segment_growing.md
diff --git a/examples/data/docs/design_docs/segcore/segment_interface.md b/docs/design_docs/segcore/segment_interface.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/segment_interface.md
rename to docs/design_docs/segcore/segment_interface.md
diff --git a/examples/data/docs/design_docs/segcore/segment_overview.md b/docs/design_docs/segcore/segment_overview.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/segment_overview.md
rename to docs/design_docs/segcore/segment_overview.md
diff --git a/examples/data/docs/design_docs/segcore/segment_sealed.md b/docs/design_docs/segcore/segment_sealed.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/segment_sealed.md
rename to docs/design_docs/segcore/segment_sealed.md
diff --git a/examples/data/docs/design_docs/segcore/timetravel.md b/docs/design_docs/segcore/timetravel.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/timetravel.md
rename to docs/design_docs/segcore/timetravel.md
diff --git a/examples/data/docs/design_docs/segcore/visitor.md b/docs/design_docs/segcore/visitor.md
similarity index 100%
rename from examples/data/docs/design_docs/segcore/visitor.md
rename to docs/design_docs/segcore/visitor.md
diff --git a/examples/data/docs/developer_guides/appendix_a_basic_components.md b/docs/developer_guides/appendix_a_basic_components.md
similarity index 100%
rename from examples/data/docs/developer_guides/appendix_a_basic_components.md
rename to docs/developer_guides/appendix_a_basic_components.md
diff --git a/examples/data/docs/developer_guides/appendix_b_api_reference.md b/docs/developer_guides/appendix_b_api_reference.md
similarity index 100%
rename from examples/data/docs/developer_guides/appendix_b_api_reference.md
rename to docs/developer_guides/appendix_b_api_reference.md
diff --git a/examples/data/docs/developer_guides/appendix_c_system_configurations.md b/docs/developer_guides/appendix_c_system_configurations.md
similarity index 100%
rename from examples/data/docs/developer_guides/appendix_c_system_configurations.md
rename to docs/developer_guides/appendix_c_system_configurations.md
diff --git a/examples/data/docs/developer_guides/appendix_d_error_code.md b/docs/developer_guides/appendix_d_error_code.md
similarity index 100%
rename from examples/data/docs/developer_guides/appendix_d_error_code.md
rename to docs/developer_guides/appendix_d_error_code.md
diff --git a/examples/data/docs/developer_guides/appendix_e_statistics.md b/docs/developer_guides/appendix_e_statistics.md
similarity index 100%
rename from examples/data/docs/developer_guides/appendix_e_statistics.md
rename to docs/developer_guides/appendix_e_statistics.md
diff --git a/examples/data/docs/developer_guides/chap01_system_overview.md b/docs/developer_guides/chap01_system_overview.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap01_system_overview.md
rename to docs/developer_guides/chap01_system_overview.md
diff --git a/examples/data/docs/developer_guides/chap02_schema.md b/docs/developer_guides/chap02_schema.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap02_schema.md
rename to docs/developer_guides/chap02_schema.md
diff --git a/examples/data/docs/developer_guides/chap03_index_service.md b/docs/developer_guides/chap03_index_service.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap03_index_service.md
rename to docs/developer_guides/chap03_index_service.md
diff --git a/examples/data/docs/developer_guides/chap04_message_stream.md b/docs/developer_guides/chap04_message_stream.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap04_message_stream.md
rename to docs/developer_guides/chap04_message_stream.md
diff --git a/examples/data/docs/developer_guides/chap05_proxy.md b/docs/developer_guides/chap05_proxy.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap05_proxy.md
rename to docs/developer_guides/chap05_proxy.md
diff --git a/examples/data/docs/developer_guides/chap06_root_coordinator.md b/docs/developer_guides/chap06_root_coordinator.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap06_root_coordinator.md
rename to docs/developer_guides/chap06_root_coordinator.md
diff --git a/examples/data/docs/developer_guides/chap07_query_coordinator.md b/docs/developer_guides/chap07_query_coordinator.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap07_query_coordinator.md
rename to docs/developer_guides/chap07_query_coordinator.md
diff --git a/examples/data/docs/developer_guides/chap08_binlog.md b/docs/developer_guides/chap08_binlog.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap08_binlog.md
rename to docs/developer_guides/chap08_binlog.md
diff --git a/examples/data/docs/developer_guides/chap09_data_coord.md b/docs/developer_guides/chap09_data_coord.md
similarity index 100%
rename from examples/data/docs/developer_guides/chap09_data_coord.md
rename to docs/developer_guides/chap09_data_coord.md
diff --git a/examples/data/docs/developer_guides/developer_guides.md b/docs/developer_guides/developer_guides.md
similarity index 100%
rename from examples/data/docs/developer_guides/developer_guides.md
rename to docs/developer_guides/developer_guides.md
diff --git a/examples/data/docs/developer_guides/how-guarantee-ts-works-cn.md b/docs/developer_guides/how-guarantee-ts-works-cn.md
similarity index 100%
rename from examples/data/docs/developer_guides/how-guarantee-ts-works-cn.md
rename to docs/developer_guides/how-guarantee-ts-works-cn.md
diff --git a/examples/data/docs/developer_guides/how-guarantee-ts-works.md b/docs/developer_guides/how-guarantee-ts-works.md
similarity index 100%
rename from examples/data/docs/developer_guides/how-guarantee-ts-works.md
rename to docs/developer_guides/how-guarantee-ts-works.md
diff --git a/examples/data/docs/developer_guides/how_to_develop_with_local_milvus_proto.md b/docs/developer_guides/how_to_develop_with_local_milvus_proto.md
similarity index 100%
rename from examples/data/docs/developer_guides/how_to_develop_with_local_milvus_proto.md
rename to docs/developer_guides/how_to_develop_with_local_milvus_proto.md
diff --git a/examples/data/docs/developer_guides/proxy-reduce-cn.md b/docs/developer_guides/proxy-reduce-cn.md
similarity index 100%
rename from examples/data/docs/developer_guides/proxy-reduce-cn.md
rename to docs/developer_guides/proxy-reduce-cn.md
diff --git a/examples/data/docs/developer_guides/proxy-reduce.md b/docs/developer_guides/proxy-reduce.md
similarity index 100%
rename from examples/data/docs/developer_guides/proxy-reduce.md
rename to docs/developer_guides/proxy-reduce.md
diff --git a/examples/data/docs/jaeger_guides/opentracing_user_guide.md b/docs/jaeger_guides/opentracing_user_guide.md
similarity index 100%
rename from examples/data/docs/jaeger_guides/opentracing_user_guide.md
rename to docs/jaeger_guides/opentracing_user_guide.md
diff --git a/examples/data/docs/user_guides/clustering_compaction.md b/docs/user_guides/clustering_compaction.md
similarity index 100%
rename from examples/data/docs/user_guides/clustering_compaction.md
rename to docs/user_guides/clustering_compaction.md
diff --git a/examples/data/docs/user_guides/collection_ttl.md b/docs/user_guides/collection_ttl.md
similarity index 100%
rename from examples/data/docs/user_guides/collection_ttl.md
rename to docs/user_guides/collection_ttl.md
diff --git a/examples/data/docs/user_guides/tls_proxy.md b/docs/user_guides/tls_proxy.md
similarity index 100%
rename from examples/data/docs/user_guides/tls_proxy.md
rename to docs/user_guides/tls_proxy.md
diff --git a/examples/basic_example.py b/examples/basic_example.py
deleted file mode 100644
index 59dee0b..0000000
--- a/examples/basic_example.py
+++ /dev/null
@@ -1,35 +0,0 @@
-import logging
-import os
-
-from deepsearcher.offline_loading import load_from_local_files
-from deepsearcher.online_query import query
-from deepsearcher.configuration import Configuration, init_config
-
-httpx_logger = logging.getLogger("httpx") # disable openai's logger output
-httpx_logger.setLevel(logging.WARNING)
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-
-config = Configuration() # Customize your config here
-init_config(config=config)
-
-
-# You should clone the milvus docs repo to your local machine first, execute:
-# git clone https://github.com/milvus-io/milvus-docs.git
-# Then replace the path below with the path to the milvus-docs repo on your local machine
-# import glob
-# all_md_files = glob.glob('xxx/milvus-docs/site/en/**/*.md', recursive=True)
-# load_from_local_files(paths_or_directory=all_md_files, collection_name="milvus_docs", collection_description="All Milvus Documents")
-
-# Hint: You can also load a single file, please execute it in the root directory of the deep searcher project
-load_from_local_files(
- paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"),
- collection_name="milvus_docs",
- collection_description="All Milvus Documents",
- # force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True
-)
-
-question = "Write a report comparing Milvus with other vector databases."
-
-_, _, consumed_token = query(question, max_iter=1)
-print(f"Consumed tokens: {consumed_token}")
diff --git a/examples/basic_example_azuresearch.py b/examples/basic_example_azuresearch.py
deleted file mode 100644
index 05001b6..0000000
--- a/examples/basic_example_azuresearch.py
+++ /dev/null
@@ -1,68 +0,0 @@
-import logging
-import os
-import time
-
-from deepsearcher.configuration import Configuration, init_config
-from deepsearcher.online_query import query
-
-# Configure logging
-logging.basicConfig(
- level=logging.INFO,
- format='%(asctime)s - %(levelname)s - %(message)s',
- datefmt='%Y-%m-%d %H:%M:%S'
-)
-logger = logging.getLogger(__name__)
-
-
-
-logger.info("Initializing DeepSearcher configuration")
-config = Configuration()
-config.set_provider_config("llm", "AzureOpenAI", {
- "model": "gpt-4.1",
- "api_key": "",
- "base_url": "https://.openai.azure.com/openai/",
- "api_version": "2024-12-01-preview"
-})
-config.set_provider_config("embedding", "OpenAIEmbedding", {
- "model": "text-embedding-ada-002",
- "api_key": "",
- "azure_endpoint": "https://.openai.azure.com/",
- "api_version": "2023-05-15"
- # Remove api_version and other Azure-specific parameters
-})
-config.set_provider_config("vector_db", "AzureSearch", {
- "endpoint": "https://.search.windows.net",
- "index_name": "",
- "api_key": "",
- "vector_field": "content_vector"
-})
-
-logger.info("Configuration initialized successfully")
-
-try:
- logger.info("Applying global configuration")
- init_config(config)
- logger.info("Configuration applied globally")
-
- # Example question
- question = "Create a detailed report about what Python is all about"
- logger.info(f"Processing query: '{question}'")
-
- start_time = time.time()
- result = query(question)
- query_time = time.time() - start_time
- logger.info(f"Query processed in {query_time:.2f} seconds")
-
- logger.info("Retrieved result successfully")
- print(result[0]) # Print the first element of the tuple
-
- # Check if there's a second element in the tuple that contains source documents
- if len(result) > 1 and hasattr(result[1], "__len__"):
- logger.info(f"Found {len(result[1])} source documents")
- for i, doc in enumerate(result[1]):
- if hasattr(doc, "metadata") and "source" in doc.metadata:
- logger.info(f"Source {i+1}: {doc.metadata['source']}")
-except Exception as e:
- logger.error(f"Error executing query: {str(e)}")
- import traceback
- logger.error(traceback.format_exc())
\ No newline at end of file
diff --git a/examples/basic_example_oracle.py b/examples/basic_example_oracle.py
deleted file mode 100644
index 2d96494..0000000
--- a/examples/basic_example_oracle.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import sys, os
-from pathlib import Path
-script_directory = Path(__file__).resolve().parent.parent
-sys.path.append(os.path.abspath(script_directory))
-
-import logging
-
-httpx_logger = logging.getLogger("httpx") # disable openai's logger output
-httpx_logger.setLevel(logging.WARNING)
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-
-# Customize your config here
-from deepsearcher.configuration import Configuration, init_config
-
-config = Configuration()
-init_config(config=config)
-
-# # Load your local data
-# # Hint: You can load from a directory or a single file, please execute it in the root directory of the deep searcher project
-
-from deepsearcher.offline_loading import load_from_local_files
-
-load_from_local_files(
- paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"),
- collection_name="milvus_docs",
- collection_description="All Milvus Documents",
- # force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True
-)
-
-# Query
-from deepsearcher.online_query import query
-
-question = 'Write a report comparing Milvus with other vector databases.'
-answer, retrieved_results, consumed_token = query(question)
-print(answer)
-
-# # get consumed tokens, about: 2.5~3w tokens when using openai gpt-4o model
-# print(f"Consumed tokens: {consumed_token}")
-
diff --git a/examples/basic_watsonx_example.py b/examples/basic_watsonx_example.py
deleted file mode 100644
index 78b5490..0000000
--- a/examples/basic_watsonx_example.py
+++ /dev/null
@@ -1,126 +0,0 @@
-"""
-Example usage of WatsonX embedding and LLM in DeepSearcher.
-
-This example demonstrates how to configure and use IBM WatsonX
-embedding models and language models with DeepSearcher.
-"""
-
-import os
-from deepsearcher.configuration import Configuration
-
-def main():
- """Example of using WatsonX with DeepSearcher."""
-
- # Initialize configuration
- config = Configuration()
-
- # Set up environment variables (alternatively, set these in your shell)
- # os.environ["WATSONX_APIKEY"] = "your-watsonx-api-key"
- # os.environ["WATSONX_URL"] = "https://your-watsonx-instance.com"
- # os.environ["WATSONX_PROJECT_ID"] = "your-project-id"
-
- # Example 1: Configure WatsonX Embedding
- print("=== WatsonX Embedding Configuration ===")
-
- # Basic configuration with default model
- config.set_provider_config("embedding", "WatsonXEmbedding", {})
-
- # Configuration with custom model
- config.set_provider_config("embedding", "WatsonXEmbedding", {
- "model": "ibm/slate-125m-english-rtrvr-v2"
- })
-
- # Configuration with explicit credentials
- # config.set_provider_config("embedding", "WatsonXEmbedding", {
- # "model": "sentence-transformers/all-minilm-l6-v2",
- # "api_key": "your-api-key",
- # "url": "https://your-watsonx-instance.com",
- # "project_id": "your-project-id"
- # })
-
- print("WatsonX Embedding configured successfully!")
-
- # Example 2: Configure WatsonX LLM
- print("\n=== WatsonX LLM Configuration ===")
-
- # Basic configuration with default model
- config.set_provider_config("llm", "WatsonX", {})
-
- # Configuration with custom model and parameters
- config.set_provider_config("llm", "WatsonX", {
- "model": "ibm/granite-3-3-8b-instruct",
- "max_new_tokens": 1000,
- "temperature": 0.7,
- "top_p": 0.9,
- "top_k": 50
- })
-
- # Configuration with IBM Granite model
- config.set_provider_config("llm", "WatsonX", {
- "model": "ibm/granite-3-3-8b-instruct",
- "max_new_tokens": 512,
- "temperature": 0.1
- })
-
- print("WatsonX LLM configured successfully!")
-
- # Example 3: Test embedding functionality
- print("\n=== Testing WatsonX Embedding ===")
- try:
- from deepsearcher.embedding.watsonx_embedding import WatsonXEmbedding
-
- # Check if environment variables are set
- if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]):
- embedding = WatsonXEmbedding()
-
- # Test single query embedding
- query = "What is artificial intelligence?"
- query_embedding = embedding.embed_query(query)
- print(f"Query embedding dimension: {len(query_embedding)}")
-
- # Test document embeddings
- documents = [
- "Artificial intelligence is a branch of computer science.",
- "Machine learning is a subset of AI.",
- "Deep learning uses neural networks."
- ]
- doc_embeddings = embedding.embed_documents(documents)
- print(f"Document embeddings: {len(doc_embeddings)} vectors of dimension {len(doc_embeddings[0])}")
-
- else:
- print("Environment variables not set. Skipping embedding test.")
-
- except ImportError:
- print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai")
- except Exception as e:
- print(f"Error testing embedding: {e}")
-
- # Example 4: Test LLM functionality
- print("\n=== Testing WatsonX LLM ===")
- try:
- from deepsearcher.llm.watsonx import WatsonX
-
- # Check if environment variables are set
- if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]):
- llm = WatsonX()
-
- # Test chat functionality
- messages = [
- {"role": "system", "content": "You are a helpful AI assistant."},
- {"role": "user", "content": "Explain what artificial intelligence is in one sentence."}
- ]
-
- response = llm.chat(messages)
- print(f"LLM Response: {response.content}")
- print(f"Tokens used: {response.total_tokens}")
-
- else:
- print("Environment variables not set. Skipping LLM test.")
-
- except ImportError:
- print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai")
- except Exception as e:
- print(f"Error testing LLM: {e}")
-
-if __name__ == "__main__":
- main()
diff --git a/examples/data/docs/OWNERS b/examples/data/docs/OWNERS
deleted file mode 100644
index a89b070..0000000
--- a/examples/data/docs/OWNERS
+++ /dev/null
@@ -1,7 +0,0 @@
-reviewers:
- - czs007
- - xiaofan-luan
- - scsven
-
-approvers:
- - maintainers
diff --git a/examples/data/docs/design_docs/bucket.png b/examples/data/docs/design_docs/bucket.png
deleted file mode 100644
index bdb1b7d..0000000
Binary files a/examples/data/docs/design_docs/bucket.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/IndexState.png b/examples/data/docs/design_docs/graphs/IndexState.png
deleted file mode 100644
index 3cd935a..0000000
Binary files a/examples/data/docs/design_docs/graphs/IndexState.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/collection_dm_channels.png b/examples/data/docs/design_docs/graphs/collection_dm_channels.png
deleted file mode 100644
index b66319b..0000000
Binary files a/examples/data/docs/design_docs/graphs/collection_dm_channels.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/collection_flowgraph_1_1.jpg b/examples/data/docs/design_docs/graphs/collection_flowgraph_1_1.jpg
deleted file mode 100644
index a4f06ac..0000000
Binary files a/examples/data/docs/design_docs/graphs/collection_flowgraph_1_1.jpg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/collection_flowgraph_1_n.png b/examples/data/docs/design_docs/graphs/collection_flowgraph_1_n.png
deleted file mode 100644
index 1b27d70..0000000
Binary files a/examples/data/docs/design_docs/graphs/collection_flowgraph_1_n.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/collection_flowgraph_n_1.jpg b/examples/data/docs/design_docs/graphs/collection_flowgraph_n_1.jpg
deleted file mode 100644
index 169ce6b..0000000
Binary files a/examples/data/docs/design_docs/graphs/collection_flowgraph_n_1.jpg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/collection_flowgraph_n_n.jpg b/examples/data/docs/design_docs/graphs/collection_flowgraph_n_n.jpg
deleted file mode 100644
index fbe1a93..0000000
Binary files a/examples/data/docs/design_docs/graphs/collection_flowgraph_n_n.jpg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/collection_flowgraph_relation.png b/examples/data/docs/design_docs/graphs/collection_flowgraph_relation.png
deleted file mode 100644
index 5f187e5..0000000
Binary files a/examples/data/docs/design_docs/graphs/collection_flowgraph_relation.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/create_index.png b/examples/data/docs/design_docs/graphs/create_index.png
deleted file mode 100644
index 2f6b3a5..0000000
Binary files a/examples/data/docs/design_docs/graphs/create_index.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/datanode_design_01.jpg b/examples/data/docs/design_docs/graphs/datanode_design_01.jpg
deleted file mode 100644
index 4866b83..0000000
Binary files a/examples/data/docs/design_docs/graphs/datanode_design_01.jpg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/decouple.jpeg b/examples/data/docs/design_docs/graphs/decouple.jpeg
deleted file mode 100644
index 772b389..0000000
Binary files a/examples/data/docs/design_docs/graphs/decouple.jpeg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/dml_create_collection.png b/examples/data/docs/design_docs/graphs/dml_create_collection.png
deleted file mode 100644
index a0aa919..0000000
Binary files a/examples/data/docs/design_docs/graphs/dml_create_collection.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/dml_drop_collection.png b/examples/data/docs/design_docs/graphs/dml_drop_collection.png
deleted file mode 100644
index 7b9d54a..0000000
Binary files a/examples/data/docs/design_docs/graphs/dml_drop_collection.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/dml_release_collection.png b/examples/data/docs/design_docs/graphs/dml_release_collection.png
deleted file mode 100644
index 501aea8..0000000
Binary files a/examples/data/docs/design_docs/graphs/dml_release_collection.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/dml_release_flow_graph_on_data_node.png b/examples/data/docs/design_docs/graphs/dml_release_flow_graph_on_data_node.png
deleted file mode 100644
index 9c9cd8e..0000000
Binary files a/examples/data/docs/design_docs/graphs/dml_release_flow_graph_on_data_node.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/dynamic_config_flowchart.jpg b/examples/data/docs/design_docs/graphs/dynamic_config_flowchart.jpg
deleted file mode 100644
index b288f64..0000000
Binary files a/examples/data/docs/design_docs/graphs/dynamic_config_flowchart.jpg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/flowgraph_recovery_design.png b/examples/data/docs/design_docs/graphs/flowgraph_recovery_design.png
deleted file mode 100644
index c151465..0000000
Binary files a/examples/data/docs/design_docs/graphs/flowgraph_recovery_design.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/flush_data_coord.png b/examples/data/docs/design_docs/graphs/flush_data_coord.png
deleted file mode 100644
index 2bf98c4..0000000
Binary files a/examples/data/docs/design_docs/graphs/flush_data_coord.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/indexcoord_design.png b/examples/data/docs/design_docs/graphs/indexcoord_design.png
deleted file mode 100644
index ec2e580..0000000
Binary files a/examples/data/docs/design_docs/graphs/indexcoord_design.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/knn_query.png b/examples/data/docs/design_docs/graphs/knn_query.png
deleted file mode 100644
index 4ac9a5b..0000000
Binary files a/examples/data/docs/design_docs/graphs/knn_query.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/knowhere_framework.png b/examples/data/docs/design_docs/graphs/knowhere_framework.png
deleted file mode 100644
index 2f5e81c..0000000
Binary files a/examples/data/docs/design_docs/graphs/knowhere_framework.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/milvus_create_index.png b/examples/data/docs/design_docs/graphs/milvus_create_index.png
deleted file mode 100644
index 3efe6a2..0000000
Binary files a/examples/data/docs/design_docs/graphs/milvus_create_index.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/milvus_create_index_data_coord_flushed.png b/examples/data/docs/design_docs/graphs/milvus_create_index_data_coord_flushed.png
deleted file mode 100644
index 0330152..0000000
Binary files a/examples/data/docs/design_docs/graphs/milvus_create_index_data_coord_flushed.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/milvus_create_index_index_coord.png b/examples/data/docs/design_docs/graphs/milvus_create_index_index_coord.png
deleted file mode 100644
index 11f3a31..0000000
Binary files a/examples/data/docs/design_docs/graphs/milvus_create_index_index_coord.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/milvus_create_index_root_coord_check.png b/examples/data/docs/design_docs/graphs/milvus_create_index_root_coord_check.png
deleted file mode 100644
index 182253d..0000000
Binary files a/examples/data/docs/design_docs/graphs/milvus_create_index_root_coord_check.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/pk_oracle.png b/examples/data/docs/design_docs/graphs/pk_oracle.png
deleted file mode 100644
index da0d655..0000000
Binary files a/examples/data/docs/design_docs/graphs/pk_oracle.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/proxy.png b/examples/data/docs/design_docs/graphs/proxy.png
deleted file mode 100644
index e4a1804..0000000
Binary files a/examples/data/docs/design_docs/graphs/proxy.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/proxy_channels.png b/examples/data/docs/design_docs/graphs/proxy_channels.png
deleted file mode 100644
index 4b8a59a..0000000
Binary files a/examples/data/docs/design_docs/graphs/proxy_channels.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/segments.png b/examples/data/docs/design_docs/graphs/segments.png
deleted file mode 100644
index e6c5c98..0000000
Binary files a/examples/data/docs/design_docs/graphs/segments.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/snapshot_1.png b/examples/data/docs/design_docs/graphs/snapshot_1.png
deleted file mode 100644
index 6d1d7e4..0000000
Binary files a/examples/data/docs/design_docs/graphs/snapshot_1.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/snapshot_2.png b/examples/data/docs/design_docs/graphs/snapshot_2.png
deleted file mode 100644
index bebc5bb..0000000
Binary files a/examples/data/docs/design_docs/graphs/snapshot_2.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/task_scheduler_1.png b/examples/data/docs/design_docs/graphs/task_scheduler_1.png
deleted file mode 100644
index 2b49631..0000000
Binary files a/examples/data/docs/design_docs/graphs/task_scheduler_1.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/task_scheduler_2.png b/examples/data/docs/design_docs/graphs/task_scheduler_2.png
deleted file mode 100644
index d08455d..0000000
Binary files a/examples/data/docs/design_docs/graphs/task_scheduler_2.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/time_stamp_struct.jpg b/examples/data/docs/design_docs/graphs/time_stamp_struct.jpg
deleted file mode 100644
index 2226fb7..0000000
Binary files a/examples/data/docs/design_docs/graphs/time_stamp_struct.jpg and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/timesync_msgstream.png b/examples/data/docs/design_docs/graphs/timesync_msgstream.png
deleted file mode 100644
index 1f1a304..0000000
Binary files a/examples/data/docs/design_docs/graphs/timesync_msgstream.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/timesync_msgtream_timetick.png b/examples/data/docs/design_docs/graphs/timesync_msgtream_timetick.png
deleted file mode 100644
index f3a1217..0000000
Binary files a/examples/data/docs/design_docs/graphs/timesync_msgtream_timetick.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/timesync_proxy_insert_msg.png b/examples/data/docs/design_docs/graphs/timesync_proxy_insert_msg.png
deleted file mode 100644
index c7f421f..0000000
Binary files a/examples/data/docs/design_docs/graphs/timesync_proxy_insert_msg.png and /dev/null differ
diff --git a/examples/data/docs/design_docs/graphs/timesync_proxy_upload_time_tick.png b/examples/data/docs/design_docs/graphs/timesync_proxy_upload_time_tick.png
deleted file mode 100644
index 49444b3..0000000
Binary files a/examples/data/docs/design_docs/graphs/timesync_proxy_upload_time_tick.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/data_coord.png b/examples/data/docs/developer_guides/figs/data_coord.png
deleted file mode 100644
index 8934136..0000000
Binary files a/examples/data/docs/developer_guides/figs/data_coord.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/data_organization.png b/examples/data/docs/developer_guides/figs/data_organization.png
deleted file mode 100644
index 4999f82..0000000
Binary files a/examples/data/docs/developer_guides/figs/data_organization.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/figs.graffle b/examples/data/docs/developer_guides/figs/figs.graffle
deleted file mode 100644
index b1148ab..0000000
Binary files a/examples/data/docs/developer_guides/figs/figs.graffle and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/fork-and-pull.png b/examples/data/docs/developer_guides/figs/fork-and-pull.png
deleted file mode 100644
index 325c218..0000000
Binary files a/examples/data/docs/developer_guides/figs/fork-and-pull.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/guarantee-ts-consistency-relationship.png b/examples/data/docs/developer_guides/figs/guarantee-ts-consistency-relationship.png
deleted file mode 100644
index 1625a3b..0000000
Binary files a/examples/data/docs/developer_guides/figs/guarantee-ts-consistency-relationship.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/guarantee-ts-do-search-right-now.png b/examples/data/docs/developer_guides/figs/guarantee-ts-do-search-right-now.png
deleted file mode 100644
index 269d46f..0000000
Binary files a/examples/data/docs/developer_guides/figs/guarantee-ts-do-search-right-now.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/guarantee-ts-ts-mask.png b/examples/data/docs/developer_guides/figs/guarantee-ts-ts-mask.png
deleted file mode 100644
index 6703d9c..0000000
Binary files a/examples/data/docs/developer_guides/figs/guarantee-ts-ts-mask.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/guarantee-ts-wait-for-service-time.png b/examples/data/docs/developer_guides/figs/guarantee-ts-wait-for-service-time.png
deleted file mode 100644
index 27edffc..0000000
Binary files a/examples/data/docs/developer_guides/figs/guarantee-ts-wait-for-service-time.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/hard_time_tick_barrier.png b/examples/data/docs/developer_guides/figs/hard_time_tick_barrier.png
deleted file mode 100644
index d28ef24..0000000
Binary files a/examples/data/docs/developer_guides/figs/hard_time_tick_barrier.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/hlc.png b/examples/data/docs/developer_guides/figs/hlc.png
deleted file mode 100644
index df147bc..0000000
Binary files a/examples/data/docs/developer_guides/figs/hlc.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/ide_with_newdef.png b/examples/data/docs/developer_guides/figs/ide_with_newdef.png
deleted file mode 100644
index 48d6321..0000000
Binary files a/examples/data/docs/developer_guides/figs/ide_with_newdef.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/index_coord.png b/examples/data/docs/developer_guides/figs/index_coord.png
deleted file mode 100644
index d24df23..0000000
Binary files a/examples/data/docs/developer_guides/figs/index_coord.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/local-develop-steps.png b/examples/data/docs/developer_guides/figs/local-develop-steps.png
deleted file mode 100644
index 1e504a3..0000000
Binary files a/examples/data/docs/developer_guides/figs/local-develop-steps.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/local_distributed_cloud.jpeg b/examples/data/docs/developer_guides/figs/local_distributed_cloud.jpeg
deleted file mode 100644
index 476921d..0000000
Binary files a/examples/data/docs/developer_guides/figs/local_distributed_cloud.jpeg and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/master.jpeg b/examples/data/docs/developer_guides/figs/master.jpeg
deleted file mode 100644
index 00b0adc..0000000
Binary files a/examples/data/docs/developer_guides/figs/master.jpeg and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/msg_stream_input_output.jpeg b/examples/data/docs/developer_guides/figs/msg_stream_input_output.jpeg
deleted file mode 100644
index 9230491..0000000
Binary files a/examples/data/docs/developer_guides/figs/msg_stream_input_output.jpeg and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/nq_topk_search_results.png b/examples/data/docs/developer_guides/figs/nq_topk_search_results.png
deleted file mode 100644
index 7f9e915..0000000
Binary files a/examples/data/docs/developer_guides/figs/nq_topk_search_results.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/proxy.png b/examples/data/docs/developer_guides/figs/proxy.png
deleted file mode 100644
index 0bd7750..0000000
Binary files a/examples/data/docs/developer_guides/figs/proxy.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/query_coord.png b/examples/data/docs/developer_guides/figs/query_coord.png
deleted file mode 100644
index ba409d3..0000000
Binary files a/examples/data/docs/developer_guides/figs/query_coord.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/query_coordinator.png b/examples/data/docs/developer_guides/figs/query_coordinator.png
deleted file mode 100644
index 12499ad..0000000
Binary files a/examples/data/docs/developer_guides/figs/query_coordinator.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/reduce_results.png b/examples/data/docs/developer_guides/figs/reduce_results.png
deleted file mode 100644
index 602c9d9..0000000
Binary files a/examples/data/docs/developer_guides/figs/reduce_results.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/root_coord.png b/examples/data/docs/developer_guides/figs/root_coord.png
deleted file mode 100644
index 9c8172c..0000000
Binary files a/examples/data/docs/developer_guides/figs/root_coord.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/root_coord_create_collection.png b/examples/data/docs/developer_guides/figs/root_coord_create_collection.png
deleted file mode 100644
index 2362dc1..0000000
Binary files a/examples/data/docs/developer_guides/figs/root_coord_create_collection.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/root_coord_create_index.png b/examples/data/docs/developer_guides/figs/root_coord_create_index.png
deleted file mode 100644
index 19eaabb..0000000
Binary files a/examples/data/docs/developer_guides/figs/root_coord_create_index.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/root_coord_create_index_automatically.png b/examples/data/docs/developer_guides/figs/root_coord_create_index_automatically.png
deleted file mode 100644
index 7092422..0000000
Binary files a/examples/data/docs/developer_guides/figs/root_coord_create_index_automatically.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/root_coord_time_sync.png b/examples/data/docs/developer_guides/figs/root_coord_time_sync.png
deleted file mode 100644
index 49444b3..0000000
Binary files a/examples/data/docs/developer_guides/figs/root_coord_time_sync.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/soft_time_tick_barrier.png b/examples/data/docs/developer_guides/figs/soft_time_tick_barrier.png
deleted file mode 100644
index 99f7bf4..0000000
Binary files a/examples/data/docs/developer_guides/figs/soft_time_tick_barrier.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/state_sync.png b/examples/data/docs/developer_guides/figs/state_sync.png
deleted file mode 100644
index 56b7bbf..0000000
Binary files a/examples/data/docs/developer_guides/figs/state_sync.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/system_framework.png b/examples/data/docs/developer_guides/figs/system_framework.png
deleted file mode 100644
index 157ac78..0000000
Binary files a/examples/data/docs/developer_guides/figs/system_framework.png and /dev/null differ
diff --git a/examples/data/docs/developer_guides/figs/time_sync_msg_producer.png b/examples/data/docs/developer_guides/figs/time_sync_msg_producer.png
deleted file mode 100644
index 4c42634..0000000
Binary files a/examples/data/docs/developer_guides/figs/time_sync_msg_producer.png and /dev/null differ
diff --git a/examples/data/docs/imgs/bar.png b/examples/data/docs/imgs/bar.png
deleted file mode 100644
index 3d2ec6b..0000000
Binary files a/examples/data/docs/imgs/bar.png and /dev/null differ
diff --git a/examples/data/docs/imgs/remote.png b/examples/data/docs/imgs/remote.png
deleted file mode 100644
index de400c2..0000000
Binary files a/examples/data/docs/imgs/remote.png and /dev/null differ
diff --git a/examples/data/docs/imgs/settings.png b/examples/data/docs/imgs/settings.png
deleted file mode 100644
index c6237a4..0000000
Binary files a/examples/data/docs/imgs/settings.png and /dev/null differ
diff --git a/examples/data/docs/imgs/terminal.png b/examples/data/docs/imgs/terminal.png
deleted file mode 100644
index cfa7753..0000000
Binary files a/examples/data/docs/imgs/terminal.png and /dev/null differ
diff --git a/examples/data/docs/imgs/vscode.png b/examples/data/docs/imgs/vscode.png
deleted file mode 100644
index c1e9907..0000000
Binary files a/examples/data/docs/imgs/vscode.png and /dev/null differ
diff --git a/examples/data/docs/jaeger_guides/figs/jaeger_detailed_trace_info.png b/examples/data/docs/jaeger_guides/figs/jaeger_detailed_trace_info.png
deleted file mode 100644
index e895a2f..0000000
Binary files a/examples/data/docs/jaeger_guides/figs/jaeger_detailed_trace_info.png and /dev/null differ
diff --git a/examples/data/docs/jaeger_guides/figs/jaeger_home_page.png b/examples/data/docs/jaeger_guides/figs/jaeger_home_page.png
deleted file mode 100644
index c04e54a..0000000
Binary files a/examples/data/docs/jaeger_guides/figs/jaeger_home_page.png and /dev/null differ
diff --git a/examples/data/docs/jaeger_guides/figs/jaeger_single_search_result.png b/examples/data/docs/jaeger_guides/figs/jaeger_single_search_result.png
deleted file mode 100644
index d9dd656..0000000
Binary files a/examples/data/docs/jaeger_guides/figs/jaeger_single_search_result.png and /dev/null differ
diff --git a/examples/data/docs/user_guides/figs/clustering_compaction.png b/examples/data/docs/user_guides/figs/clustering_compaction.png
deleted file mode 100644
index 0934201..0000000
Binary files a/examples/data/docs/user_guides/figs/clustering_compaction.png and /dev/null differ
diff --git a/examples/load_and_crawl_using_docling.py b/examples/load_and_crawl_using_docling.py
deleted file mode 100644
index f02d7ad..0000000
--- a/examples/load_and_crawl_using_docling.py
+++ /dev/null
@@ -1,74 +0,0 @@
-import logging
-import os
-from deepsearcher.offline_loading import load_from_local_files, load_from_website
-from deepsearcher.online_query import query
-from deepsearcher.configuration import Configuration, init_config
-
-# Suppress unnecessary logging from third-party libraries
-logging.getLogger("httpx").setLevel(logging.WARNING)
-
-def main():
- # Step 1: Initialize configuration
- config = Configuration()
-
- # Configure Vector Database and Docling providers
- config.set_provider_config("vector_db", "Milvus", {})
- config.set_provider_config("file_loader", "DoclingLoader", {})
- config.set_provider_config("web_crawler", "DoclingCrawler", {})
-
- # Apply the configuration
- init_config(config)
-
- # Step 2a: Load data from a local file using DoclingLoader
- local_file = "your_local_file_or_directory"
- local_collection_name = "DoclingLocalFiles"
- local_collection_description = "Milvus Documents loaded using DoclingLoader"
-
- print("\n=== Loading local files using DoclingLoader ===")
-
- try:
- load_from_local_files(
- paths_or_directory=local_file,
- collection_name=local_collection_name,
- collection_description=local_collection_description,
- force_new_collection=True
- )
- print(f"Successfully loaded: {local_file}")
- except ValueError as e:
- print(f"Validation error: {str(e)}")
- except Exception as e:
- print(f"Error: {str(e)}")
-
- print("Successfully loaded all local files")
-
- # Step 2b: Crawl URLs using DoclingCrawler
- urls = [
- # Markdown documentation files
- "https://milvus.io/docs/quickstart.md",
- "https://milvus.io/docs/overview.md",
- # PDF example - can handle various URL formats
- "https://arxiv.org/pdf/2408.09869",
- ]
- web_collection_name = "DoclingWebCrawl"
- web_collection_description = "Milvus Documentation crawled using DoclingCrawler"
-
- print("\n=== Crawling web pages using DoclingCrawler ===")
-
-
- load_from_website(
- urls=urls,
- collection_name=web_collection_name,
- collection_description=web_collection_description,
- force_new_collection=True
- )
- print("Successfully crawled all URLs")
-
-
- # Step 3: Query the loaded data
- question = "What is Milvus?"
- result = query(question)
- print(result)
-
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/examples/load_local_file_using_unstructured.py b/examples/load_local_file_using_unstructured.py
deleted file mode 100644
index 4131cdd..0000000
--- a/examples/load_local_file_using_unstructured.py
+++ /dev/null
@@ -1,40 +0,0 @@
-import logging
-import os
-from deepsearcher.offline_loading import load_from_local_files
-from deepsearcher.online_query import query
-from deepsearcher.configuration import Configuration, init_config
-
-# Suppress unnecessary logging from third-party libraries
-logging.getLogger("httpx").setLevel(logging.WARNING)
-
-# (Optional) Set API keys (ensure these are set securely in real applications)
-os.environ['UNSTRUCTURED_API_KEY'] = '***************'
-os.environ['UNSTRUCTURED_API_URL'] = '***************'
-
-
-def main():
- # Step 1: Initialize configuration
- config = Configuration()
-
- # Configure Vector Database (Milvus) and File Loader (UnstructuredLoader)
- config.set_provider_config("vector_db", "Milvus", {})
- config.set_provider_config("file_loader", "UnstructuredLoader", {})
-
- # Apply the configuration
- init_config(config)
-
- # Step 2: Load data from a local file or directory into Milvus
- input_file = "your_local_file_or_directory" # Replace with your actual file path
- collection_name = "Unstructured"
- collection_description = "All Milvus Documents"
-
- load_from_local_files(paths_or_directory=input_file, collection_name=collection_name, collection_description=collection_description)
-
- # Step 3: Query the loaded data
- question = "What is Milvus?" # Replace with your actual question
- result = query(question)
- print(result)
-
-
-if __name__ == "__main__":
- main()
\ No newline at end of file
diff --git a/examples/load_website_using_firecrawl.py b/examples/load_website_using_firecrawl.py
deleted file mode 100644
index fbbdf94..0000000
--- a/examples/load_website_using_firecrawl.py
+++ /dev/null
@@ -1,43 +0,0 @@
-import logging
-import os
-from deepsearcher.offline_loading import load_from_website
-from deepsearcher.online_query import query
-from deepsearcher.configuration import Configuration, init_config
-
-# Suppress unnecessary logging from third-party libraries
-logging.getLogger("httpx").setLevel(logging.WARNING)
-
-# Set API keys (ensure these are set securely in real applications)
-os.environ['OPENAI_API_KEY'] = 'sk-***************'
-os.environ['FIRECRAWL_API_KEY'] = 'fc-***************'
-
-
-def main():
- # Step 1: Initialize configuration
- config = Configuration()
-
- # Set up Vector Database (Milvus) and Web Crawler (FireCrawlCrawler)
- config.set_provider_config("vector_db", "Milvus", {})
- config.set_provider_config("web_crawler", "FireCrawlCrawler", {})
-
- # Apply the configuration
- init_config(config)
-
- # Step 2: Load data from a website into Milvus
- website_url = "https://example.com" # Replace with your target website
- collection_name = "FireCrawl"
- collection_description = "All Milvus Documents"
-
- # crawl a single webpage
- load_from_website(urls=website_url, collection_name=collection_name, collection_description=collection_description)
- # only applicable if using Firecrawl: deepsearcher can crawl multiple webpages, by setting max_depth, limit, allow_backward_links
- # load_from_website(urls=website_url, max_depth=2, limit=20, allow_backward_links=True, collection_name=collection_name, collection_description=collection_description)
-
- # Step 3: Query the loaded data
- question = "What is Milvus?" # Replace with your actual question
- result = query(question)
- print(result)
-
-
-if __name__ == "__main__":
- main()
diff --git a/test.py b/test.py
index 0e78fc8..aee442d 100644
--- a/test.py
+++ b/test.py
@@ -12,7 +12,7 @@ init_config(config = config)
# Load your local data
load_from_local_files(
- paths_or_directory="examples/data",
+ paths_or_directory="docs",
collection_name="default",
collection_description="a general collection for all documents",
force_rebuild=True, batch_size=8