更改<chunk><reference>的格式，文末可以生成更准确的referece

6 days ago · c221188282
148 changed files with 51 additions and 473 deletions
--- a/deepsearcher/agent/deep_search.py
+++ b/deepsearcher/agent/deep_search.py
@ -21,7 +21,7 @@ Your selected collection name list is:

 SUB_QUERY_PROMPT = """
 To answer this question more comprehensively, please break down the original question into few numbers of sub-questions
-(the less the better, but more if nesscessary to ensure answering the original question).
+(the less the better, but you can give more if nesscessary, to ensure answering the original question).
 If this is a very simple question and no decomposition is necessary, then keep the only one original question.
 Make sure each sub-question is clear, concise and atomic.
 Return as list of str in python style and json convertable.
@ -36,6 +36,7 @@ Example input:
 Example output:
 [
    "What is deep learning?",
+    "What is deep learning's purpose?",
    "What is the difference between deep learning and machine learning?",
    "What is the history of deep learning?"
 ]
@ -52,7 +53,7 @@ For each chunk, you must return "YES" or "NO" python style list without any othe
 Query: {query}

 Retrieved Chunks:
-{retrieved_chunks}
+{chunks}

 Respond with a list of "YES" or "NO" values, one for each chunk, in the same order as the chunks are listed.
 For example, if there is a list of four chunks, the answer could be: ["YES", "NO", "YES", "YES"]
@ -82,16 +83,33 @@ You are a AI content analysis expert.
 Please generate a long, specific and detailed answer or report based on the previous queries and the retrieved document chunks.
 If the chunks are not enough to answer the query or additional information is needed to enhance the content, you should answer with your own knowledge.
 In this case, mark the part(s) that generated by your own with <unref>your knowledge here</unref>
-(Don't place <unref></unref> part(s) individually into one paragraph, but insert it the proper place of the report)
+(unref blocks don't need to be indexed and multiple unref blocks are supposed to be split into multiple paragraphs)
 Plus, you should quote chunk references and give a list of references at the end of the report.

 Here is an example:
 <EXAMPLE>
-Quote example (an upper quote anchor, strictly apply the format below):
-XGBoost is a powerful ensemble learning method[<sup>[2]</sup>](#2)

-Reference list example (should be exact the same as the <reference><reference>):
-<div id="2"><a href="MachineLearning.pdf">[2] MachineLearning.pdf</a></div>
+Quote example (an upper quote anchor, strictly apply the html format below):
+
+XGBoost is a powerful ensemble learning method [<sup>[2]</sup>](#2)
+(It must be quote using "[<sup>[2]</sup>](#2)" instead of "[2]")
+
+Reference list example:
+(The href should be exact the same as the what is quoted inside <reference></reference>, full path is required and no simplification is allowed)
+
+given chunks and references:
+<chunk 1>Random Foreset uses ensembling learning model using bagging strategy</chunk 1>
+<reference 1>lab/EnsembleLearning.pdf</reference 1>
+<chunk 2>XGBoost is a powerful ensemble learning method that is widely used in machine learning</chunk 2>
+<reference 2>DB/data/MachineLearning.pdf</reference 2>
+<chunk 3>Gradient Boost requires a fake residual to optimize the parameters</chunk 3>
+<reference 3>files/chap_001_models.md</reference 3>
+
+the reference at the end:
+<div id="1"><a href="lab/EnsembleLearning.pdf">[1] EnsembleLearning.pdf</a></div>
+<div id="2"><a href="DB/data/MachineLearning.pdf">[2] MachineLearning.pdf</a></div>
+<div id="3"><a href="files/docs/chap_001_003_models.md">[3] chap_001_models.md</a></div>
+
 </EXAMPLE>

 Original Query: {original_query}
@ -233,12 +251,7 @@ class DeepSearch(BaseAgent):
                continue

            # Format all chunks for batch processing
-            formatted_chunks = ""
-            for i, retrieved_result in enumerate(retrieved_results):
-                formatted_chunks += f'''
-                <chunk_{i + 1}>\n{retrieved_result.text}\n</chunk_{i + 1}>\n
-                <reference_{i + 1}>\n{retrieved_result.reference}\n</reference_{i + 1}>
-                '''
+            chunks = self._format_chunks(retrieved_results)

            # Batch process all chunks with a single LLM call
            content = self.llm.chat(
@ -247,7 +260,7 @@ class DeepSearch(BaseAgent):
                        "role": "user",
                        "content": RERANK_PROMPT.format(
                            query=query,
-                            retrieved_chunks=formatted_chunks,
+                            chunks=chunks,
                        ),
                    }
                ]
@ -297,12 +310,7 @@ class DeepSearch(BaseAgent):
    def _generate_more_sub_queries(
        self, original_query: str, all_sub_queries: list[str], all_retrieved_results: list[RetrievalResult]
    ) -> list[str]:
-        chunks = []
-        for i, chunk in enumerate(all_retrieved_results):
-            if self.text_window_splitter and "wider_text" in chunk.metadata:
-                chunks.append(chunk.metadata["wider_text"])
-            else:
-                chunks.append(f'''<chunk {i + 1}>{chunk.text}</chunk {i + 1}><reference {i + 1}>{chunk.reference}</reference {i + 1}>''')
+        chunks = self._format_chunks(all_retrieved_results)
        reflect_prompt = REFLECT_PROMPT.format(
            original_query=original_query,
            all_sub_queries=all_sub_queries,
@ -356,7 +364,7 @@ class DeepSearch(BaseAgent):
                deduped_len = len(all_search_results)
                if undeduped_len - deduped_len != 0:
                    log.color_print(
-                        f"<search> Removed {undeduped_len - deduped_len} duplicates </search>  "
+                        f"<search> Remove {undeduped_len - deduped_len} duplicates </search>  "
                    )
            # search_res_from_internet = deduplicate_results(search_res_from_internet)
            # all_search_res.extend(search_res_from_vectordb + search_res_from_internet)
@ -397,13 +405,9 @@ class DeepSearch(BaseAgent):
        """
        all_retrieved_results, all_sub_queries = self.retrieve(original_query, **kwargs)
        if not all_retrieved_results or len(all_retrieved_results) == 0:
-            return f"No relevant information found for query '{original_query}'.", []
-        chunks = []  # type: list[str]
-        for i, chunk in enumerate(all_retrieved_results):
-            if self.text_window_splitter and "wider_text" in chunk.metadata:
-                chunks.append(chunk.metadata["wider_text"])
-            else:
-                chunks.append(f'''<chunk {i + 1}>{chunk.text}</chunk {i + 1}><reference {i + 1}>{chunk.reference}</reference {i + 1}>''')
+            log.color_print(f"No relevant information found for query '{original_query}'.")
+            return "", []
+        chunks = self._format_chunks(all_retrieved_results)
        log.color_print(
            f"<think> Summarize answer from all {len(all_retrieved_results)} retrieved chunks... </think>\n"
        )
@ -416,3 +420,14 @@ class DeepSearch(BaseAgent):
        log.color_print("\n==== FINAL ANSWER====\n")
        log.color_print(self.llm.remove_think(response))
        return self.llm.remove_think(response), all_retrieved_results
+
+    def _format_chunks(self, retrieved_results: list[RetrievalResult]):
+        chunks = []  # type: list[str]
+        for i, chunk in enumerate(retrieved_results):
+            chunks.append(
+                f'''
+                <chunk {i + 1}>\n{chunk.text}\n</chunk {i + 1}>\n
+                <reference {i + 1}>\n{chunk.reference}\n</reference {i + 1}>\n
+                '''
+            )
+        return "".join(chunks)
--- a/deepsearcher/config.yaml
+++ b/deepsearcher/config.yaml
@ -2,7 +2,7 @@ provide_settings:
  llm:
    provider: "OpenAILLM"
    config:
-      model: "Qwen/Qwen3-235B-A22B"
+      model: "Qwen/Qwen3-30B-A3B-Thinking-2507"
      api_key: "sk-fpzwvagjkhwysjsozfybvtjzongatcwqdihdxzuijnfdrjzt"
      base_url: "https://api.siliconflow.cn/v1"

@ -83,5 +83,5 @@ query_settings:
  max_iter: 1

 load_settings:
-  chunk_size: 2048
-  chunk_overlap: 128
+  chunk_size: 1024
+  chunk_overlap: 256
--- a/deepsearcher/online_query.py
+++ b/deepsearcher/online_query.py
@ -3,7 +3,7 @@ from deepsearcher import configuration
 from deepsearcher.vector_db.base import RetrievalResult


-def query(original_query: str, max_iter: int = 3) -> tuple[str, list[RetrievalResult]]:
+def query(original_query: str) -> tuple[str, list[RetrievalResult]]:
    """
    Query the knowledge base with a question and get an answer.

@ -20,12 +20,10 @@ def query(original_query: str, max_iter: int = 3) -> tuple[str, list[RetrievalRe
            - A list of retrieval results that were used to generate the answer
    """
    default_searcher = configuration.default_searcher
-    return default_searcher.query(original_query, max_iter=max_iter)
+    return default_searcher.query(original_query)


-def retrieve(
-    original_query: str, max_iter: int = 3
-) -> tuple[list[RetrievalResult], list[str]]:
+def retrieve(original_query: str) -> tuple[list[RetrievalResult], list[str]]:
    """
    Retrieve relevant information from the knowledge base without generating an answer.

@ -42,7 +40,5 @@ def retrieve(
            - A list of strings representing consumed tokens
    """
    default_searcher = configuration.default_searcher
-    retrieved_results, metadata = default_searcher.retrieve(
-        original_query, max_iter=max_iter
-    )
+    retrieved_results, metadata = default_searcher.retrieve(original_query)
    return retrieved_results
--- a/examples/data/docs/design_docs/00000000-MEP-Template.md
+++ b/examples/data/docs/design_docs/00000000-MEP-Template.md
--- a/examples/data/docs/design_docs/20210521-datanode_recovery_design.md
+++ b/examples/data/docs/design_docs/20210521-datanode_recovery_design.md
--- a/examples/data/docs/design_docs/20210604-datanode_flowgraph_recovery_design.md
+++ b/examples/data/docs/design_docs/20210604-datanode_flowgraph_recovery_design.md
--- a/examples/data/docs/design_docs/20210731-index_design.md
+++ b/examples/data/docs/design_docs/20210731-index_design.md
--- a/examples/data/docs/design_docs/20211109-milvus_flush_collections.md
+++ b/examples/data/docs/design_docs/20211109-milvus_flush_collections.md
--- a/examples/data/docs/design_docs/20211115-milvus_drop_collection.md
+++ b/examples/data/docs/design_docs/20211115-milvus_drop_collection.md
--- a/examples/data/docs/design_docs/20211214-milvus_hybrid_ts.md
+++ b/examples/data/docs/design_docs/20211214-milvus_hybrid_ts.md
--- a/examples/data/docs/design_docs/20211215-milvus_timesync.md
+++ b/examples/data/docs/design_docs/20211215-milvus_timesync.md
--- a/examples/data/docs/design_docs/20211217-milvus_create_collection.md
+++ b/examples/data/docs/design_docs/20211217-milvus_create_collection.md
--- a/examples/data/docs/design_docs/20211221-retrieve_entity.md
+++ b/examples/data/docs/design_docs/20211221-retrieve_entity.md
--- a/examples/data/docs/design_docs/20211223-knowhere_design.md
+++ b/examples/data/docs/design_docs/20211223-knowhere_design.md
--- a/examples/data/docs/design_docs/20211224-drop_collection_release_resources.md
+++ b/examples/data/docs/design_docs/20211224-drop_collection_release_resources.md
--- a/examples/data/docs/design_docs/20211227-milvus_create_index.md
+++ b/examples/data/docs/design_docs/20211227-milvus_create_index.md
--- a/examples/data/docs/design_docs/20220105-proxy.md
+++ b/examples/data/docs/design_docs/20220105-proxy.md
--- a/examples/data/docs/design_docs/20220105-query_boolean_expr.md
+++ b/examples/data/docs/design_docs/20220105-query_boolean_expr.md
--- a/examples/data/docs/design_docs/20220105-root_coordinator_recovery_on_power_failure.md
+++ b/examples/data/docs/design_docs/20220105-root_coordinator_recovery_on_power_failure.md
--- a/examples/data/docs/design_docs/20220725-dynamic-config.md
+++ b/examples/data/docs/design_docs/20220725-dynamic-config.md
--- a/examples/data/docs/design_docs/20230403-search_by_pk.md
+++ b/examples/data/docs/design_docs/20230403-search_by_pk.md
--- a/examples/data/docs/design_docs/20230405-default_value.md
+++ b/examples/data/docs/design_docs/20230405-default_value.md
--- a/examples/data/docs/design_docs/20230418-querynode_v2.md
+++ b/examples/data/docs/design_docs/20230418-querynode_v2.md
--- a/examples/data/docs/design_docs/20230511-collection_level_autocompaction_switch.md
+++ b/examples/data/docs/design_docs/20230511-collection_level_autocompaction_switch.md
--- a/examples/data/docs/design_docs/20230918-datanode_remove_datacoord_dependency.md
+++ b/examples/data/docs/design_docs/20230918-datanode_remove_datacoord_dependency.md
--- a/examples/data/docs/design_docs/20250610-rls_designmd
+++ b/examples/data/docs/design_docs/20250610-rls_designmd
--- a/docs/design_docs/WhatisMilvus.pdf
+++ b/docs/design_docs/WhatisMilvus.pdf
--- a/examples/data/docs/design_docs/json_storage.md
+++ b/examples/data/docs/design_docs/json_storage.md
--- a/examples/data/docs/design_docs/primarykey_index.md
+++ b/examples/data/docs/design_docs/primarykey_index.md
--- a/examples/data/docs/design_docs/segcore/Search.md
+++ b/examples/data/docs/design_docs/segcore/Search.md
--- a/examples/data/docs/design_docs/segcore/basic_types.md
+++ b/examples/data/docs/design_docs/segcore/basic_types.md
--- a/examples/data/docs/design_docs/segcore/scripts_and_tools.md
+++ b/examples/data/docs/design_docs/segcore/scripts_and_tools.md
--- a/examples/data/docs/design_docs/segcore/segment_growing.md
+++ b/examples/data/docs/design_docs/segcore/segment_growing.md
--- a/examples/data/docs/design_docs/segcore/segment_interface.md
+++ b/examples/data/docs/design_docs/segcore/segment_interface.md
--- a/examples/data/docs/design_docs/segcore/segment_overview.md
+++ b/examples/data/docs/design_docs/segcore/segment_overview.md
--- a/examples/data/docs/design_docs/segcore/segment_sealed.md
+++ b/examples/data/docs/design_docs/segcore/segment_sealed.md
--- a/examples/data/docs/design_docs/segcore/timetravel.md
+++ b/examples/data/docs/design_docs/segcore/timetravel.md
--- a/examples/data/docs/design_docs/segcore/visitor.md
+++ b/examples/data/docs/design_docs/segcore/visitor.md
--- a/examples/data/docs/developer_guides/appendix_a_basic_components.md
+++ b/examples/data/docs/developer_guides/appendix_a_basic_components.md
--- a/examples/data/docs/developer_guides/appendix_b_api_reference.md
+++ b/examples/data/docs/developer_guides/appendix_b_api_reference.md
--- a/examples/data/docs/developer_guides/appendix_c_system_configurations.md
+++ b/examples/data/docs/developer_guides/appendix_c_system_configurations.md
--- a/examples/data/docs/developer_guides/appendix_d_error_code.md
+++ b/examples/data/docs/developer_guides/appendix_d_error_code.md
--- a/examples/data/docs/developer_guides/appendix_e_statistics.md
+++ b/examples/data/docs/developer_guides/appendix_e_statistics.md
--- a/examples/data/docs/developer_guides/chap01_system_overview.md
+++ b/examples/data/docs/developer_guides/chap01_system_overview.md
--- a/examples/data/docs/developer_guides/chap02_schema.md
+++ b/examples/data/docs/developer_guides/chap02_schema.md
--- a/examples/data/docs/developer_guides/chap03_index_service.md
+++ b/examples/data/docs/developer_guides/chap03_index_service.md
--- a/examples/data/docs/developer_guides/chap04_message_stream.md
+++ b/examples/data/docs/developer_guides/chap04_message_stream.md
--- a/examples/data/docs/developer_guides/chap05_proxy.md
+++ b/examples/data/docs/developer_guides/chap05_proxy.md
--- a/examples/data/docs/developer_guides/chap06_root_coordinator.md
+++ b/examples/data/docs/developer_guides/chap06_root_coordinator.md
--- a/examples/data/docs/developer_guides/chap07_query_coordinator.md
+++ b/examples/data/docs/developer_guides/chap07_query_coordinator.md
--- a/examples/data/docs/developer_guides/chap08_binlog.md
+++ b/examples/data/docs/developer_guides/chap08_binlog.md
--- a/examples/data/docs/developer_guides/chap09_data_coord.md
+++ b/examples/data/docs/developer_guides/chap09_data_coord.md
--- a/examples/data/docs/developer_guides/developer_guides.md
+++ b/examples/data/docs/developer_guides/developer_guides.md
--- a/examples/data/docs/developer_guides/how-guarantee-ts-works-cn.md
+++ b/examples/data/docs/developer_guides/how-guarantee-ts-works-cn.md
--- a/examples/data/docs/developer_guides/how-guarantee-ts-works.md
+++ b/examples/data/docs/developer_guides/how-guarantee-ts-works.md
--- a/examples/data/docs/developer_guides/how_to_develop_with_local_milvus_proto.md
+++ b/examples/data/docs/developer_guides/how_to_develop_with_local_milvus_proto.md
--- a/examples/data/docs/developer_guides/proxy-reduce-cn.md
+++ b/examples/data/docs/developer_guides/proxy-reduce-cn.md
--- a/examples/data/docs/developer_guides/proxy-reduce.md
+++ b/examples/data/docs/developer_guides/proxy-reduce.md
--- a/examples/data/docs/jaeger_guides/opentracing_user_guide.md
+++ b/examples/data/docs/jaeger_guides/opentracing_user_guide.md
--- a/examples/data/docs/user_guides/clustering_compaction.md
+++ b/examples/data/docs/user_guides/clustering_compaction.md
--- a/examples/data/docs/user_guides/collection_ttl.md
+++ b/examples/data/docs/user_guides/collection_ttl.md
--- a/examples/data/docs/user_guides/tls_proxy.md
+++ b/examples/data/docs/user_guides/tls_proxy.md
--- a/examples/basic_example.py
+++ b/examples/basic_example.py
@ -1,35 +0,0 @@
-import logging
-import os
-
-from deepsearcher.offline_loading import load_from_local_files
-from deepsearcher.online_query import query
-from deepsearcher.configuration import Configuration, init_config
-
-httpx_logger = logging.getLogger("httpx")  # disable openai's logger output
-httpx_logger.setLevel(logging.WARNING)
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-
-config = Configuration()  # Customize your config here
-init_config(config=config)
-
-
-# You should clone the milvus docs repo to your local machine first, execute:
-# git clone https://github.com/milvus-io/milvus-docs.git
-# Then replace the path below with the path to the milvus-docs repo on your local machine
-# import glob
-# all_md_files = glob.glob('xxx/milvus-docs/site/en/**/*.md', recursive=True)
-# load_from_local_files(paths_or_directory=all_md_files, collection_name="milvus_docs", collection_description="All Milvus Documents")
-
-# Hint: You can also load a single file, please execute it in the root directory of the deep searcher project
-load_from_local_files(
-    paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"),
-    collection_name="milvus_docs",
-    collection_description="All Milvus Documents",
-    # force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True
-)
-
-question = "Write a report comparing Milvus with other vector databases."
-
-_, _, consumed_token = query(question, max_iter=1)
-print(f"Consumed tokens: {consumed_token}")
--- a/examples/basic_example_azuresearch.py
+++ b/examples/basic_example_azuresearch.py
@ -1,68 +0,0 @@
-import logging
-import os
-import time
-
-from deepsearcher.configuration import Configuration, init_config
-from deepsearcher.online_query import query
-
-# Configure logging
-logging.basicConfig(
-    level=logging.INFO,
-    format='%(asctime)s - %(levelname)s - %(message)s',
-    datefmt='%Y-%m-%d %H:%M:%S'
-)
-logger = logging.getLogger(__name__)
-
-
-
-logger.info("Initializing DeepSearcher configuration")
-config = Configuration()
-config.set_provider_config("llm", "AzureOpenAI", {
-    "model": "gpt-4.1",
-    "api_key": "<yourkey>",
-    "base_url": "https://<youraifoundry>.openai.azure.com/openai/",
-    "api_version": "2024-12-01-preview"
-})
-config.set_provider_config("embedding", "OpenAIEmbedding", {
-    "model": "text-embedding-ada-002",
-    "api_key": "<yourkey>",
-    "azure_endpoint": "https://<youraifoundry>.openai.azure.com/",
-    "api_version": "2023-05-15"
-    # Remove api_version and other Azure-specific parameters
-})
-config.set_provider_config("vector_db", "AzureSearch", {
-    "endpoint": "https://<yourazureaisearch>.search.windows.net",
-    "index_name": "<yourindex>",
-    "api_key": "<yourkey>",
-    "vector_field": "content_vector"
-})
-
-logger.info("Configuration initialized successfully")
-
-try:
-    logger.info("Applying global configuration")
-    init_config(config)
-    logger.info("Configuration applied globally")
-
-    # Example question
-    question = "Create a detailed report about what Python is all about"
-    logger.info(f"Processing query: '{question}'")
-
-    start_time = time.time()
-    result = query(question)
-    query_time = time.time() - start_time
-    logger.info(f"Query processed in {query_time:.2f} seconds")
-
-    logger.info("Retrieved result successfully")
-    print(result[0])  # Print the first element of the tuple
-
-    # Check if there's a second element in the tuple that contains source documents
-    if len(result) > 1 and hasattr(result[1], "__len__"):
-        logger.info(f"Found {len(result[1])} source documents")
-        for i, doc in enumerate(result[1]):
-            if hasattr(doc, "metadata") and "source" in doc.metadata:
-                logger.info(f"Source {i+1}: {doc.metadata['source']}")
-except Exception as e:
-    logger.error(f"Error executing query: {str(e)}")
-    import traceback
-    logger.error(traceback.format_exc())
--- a/examples/basic_example_oracle.py
+++ b/examples/basic_example_oracle.py
@ -1,40 +0,0 @@
-import sys, os
-from pathlib import Path
-script_directory = Path(__file__).resolve().parent.parent
-sys.path.append(os.path.abspath(script_directory))
-
-import logging
-
-httpx_logger = logging.getLogger("httpx")  # disable openai's logger output
-httpx_logger.setLevel(logging.WARNING)
-
-current_dir = os.path.dirname(os.path.abspath(__file__))
-
-# Customize your config here
-from deepsearcher.configuration import Configuration, init_config
-
-config = Configuration()
-init_config(config=config)
-
-# # Load your local data
-# # Hint: You can load from a directory or a single file, please execute it in the root directory of the deep searcher project
-
-from deepsearcher.offline_loading import load_from_local_files
-
-load_from_local_files(
-    paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"),
-    collection_name="milvus_docs",
-    collection_description="All Milvus Documents",
-    # force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True
-)
-
-# Query
-from deepsearcher.online_query import query
-
-question = 'Write a report comparing Milvus with other vector databases.'
-answer, retrieved_results, consumed_token = query(question)
-print(answer)
-
-# # get consumed tokens, about: 2.5~3w tokens when using openai gpt-4o model
-# print(f"Consumed tokens: {consumed_token}")
-
--- a/examples/basic_watsonx_example.py
+++ b/examples/basic_watsonx_example.py
@ -1,126 +0,0 @@
-"""
-Example usage of WatsonX embedding and LLM in DeepSearcher.
-
-This example demonstrates how to configure and use IBM WatsonX
-embedding models and language models with DeepSearcher.
-"""
-
-import os
-from deepsearcher.configuration import Configuration
-
-def main():
-    """Example of using WatsonX with DeepSearcher."""
-
-    # Initialize configuration
-    config = Configuration()
-
-    # Set up environment variables (alternatively, set these in your shell)
-    # os.environ["WATSONX_APIKEY"] = "your-watsonx-api-key"
-    # os.environ["WATSONX_URL"] = "https://your-watsonx-instance.com"
-    # os.environ["WATSONX_PROJECT_ID"] = "your-project-id"
-
-    # Example 1: Configure WatsonX Embedding
-    print("=== WatsonX Embedding Configuration ===")
-
-    # Basic configuration with default model
-    config.set_provider_config("embedding", "WatsonXEmbedding", {})
-
-    # Configuration with custom model
-    config.set_provider_config("embedding", "WatsonXEmbedding", {
-        "model": "ibm/slate-125m-english-rtrvr-v2"
-    })
-
-    # Configuration with explicit credentials
-    # config.set_provider_config("embedding", "WatsonXEmbedding", {
-    #     "model": "sentence-transformers/all-minilm-l6-v2",
-    #     "api_key": "your-api-key",
-    #     "url": "https://your-watsonx-instance.com",
-    #     "project_id": "your-project-id"
-    # })
-
-    print("WatsonX Embedding configured successfully!")
-
-    # Example 2: Configure WatsonX LLM
-    print("\n=== WatsonX LLM Configuration ===")
-
-    # Basic configuration with default model
-    config.set_provider_config("llm", "WatsonX", {})
-
-    # Configuration with custom model and parameters
-    config.set_provider_config("llm", "WatsonX", {
-        "model": "ibm/granite-3-3-8b-instruct",
-        "max_new_tokens": 1000,
-        "temperature": 0.7,
-        "top_p": 0.9,
-        "top_k": 50
-    })
-
-    # Configuration with IBM Granite model
-    config.set_provider_config("llm", "WatsonX", {
-        "model": "ibm/granite-3-3-8b-instruct",
-        "max_new_tokens": 512,
-        "temperature": 0.1
-    })
-
-    print("WatsonX LLM configured successfully!")
-
-    # Example 3: Test embedding functionality
-    print("\n=== Testing WatsonX Embedding ===")
-    try:
-        from deepsearcher.embedding.watsonx_embedding import WatsonXEmbedding
-
-        # Check if environment variables are set
-        if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]):
-            embedding = WatsonXEmbedding()
-
-            # Test single query embedding
-            query = "What is artificial intelligence?"
-            query_embedding = embedding.embed_query(query)
-            print(f"Query embedding dimension: {len(query_embedding)}")
-
-            # Test document embeddings
-            documents = [
-                "Artificial intelligence is a branch of computer science.",
-                "Machine learning is a subset of AI.",
-                "Deep learning uses neural networks."
-            ]
-            doc_embeddings = embedding.embed_documents(documents)
-            print(f"Document embeddings: {len(doc_embeddings)} vectors of dimension {len(doc_embeddings[0])}")
-
-        else:
-            print("Environment variables not set. Skipping embedding test.")
-
-    except ImportError:
-        print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai")
-    except Exception as e:
-        print(f"Error testing embedding: {e}")
-
-    # Example 4: Test LLM functionality
-    print("\n=== Testing WatsonX LLM ===")
-    try:
-        from deepsearcher.llm.watsonx import WatsonX
-
-        # Check if environment variables are set
-        if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]):
-            llm = WatsonX()
-
-            # Test chat functionality
-            messages = [
-                {"role": "system", "content": "You are a helpful AI assistant."},
-                {"role": "user", "content": "Explain what artificial intelligence is in one sentence."}
-            ]
-
-            response = llm.chat(messages)
-            print(f"LLM Response: {response.content}")
-            print(f"Tokens used: {response.total_tokens}")
-
-        else:
-            print("Environment variables not set. Skipping LLM test.")
-
-    except ImportError:
-        print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai")
-    except Exception as e:
-        print(f"Error testing LLM: {e}")
-
-if __name__ == "__main__":
-    main()
--- a/examples/data/docs/OWNERS
+++ b/examples/data/docs/OWNERS
@ -1,7 +0,0 @@
-reviewers:
-  - czs007
-  - xiaofan-luan
-  - scsven
-
-approvers:
-  - maintainers
--- a/examples/data/docs/design_docs/bucket.png
+++ b/examples/data/docs/design_docs/bucket.png
--- a/examples/data/docs/design_docs/graphs/IndexState.png
+++ b/examples/data/docs/design_docs/graphs/IndexState.png
--- a/examples/data/docs/design_docs/graphs/collection_dm_channels.png
+++ b/examples/data/docs/design_docs/graphs/collection_dm_channels.png
--- a/examples/data/docs/design_docs/graphs/collection_flowgraph_1_1.jpg
+++ b/examples/data/docs/design_docs/graphs/collection_flowgraph_1_1.jpg
--- a/examples/data/docs/design_docs/graphs/collection_flowgraph_1_n.png
+++ b/examples/data/docs/design_docs/graphs/collection_flowgraph_1_n.png
--- a/examples/data/docs/design_docs/graphs/collection_flowgraph_n_1.jpg
+++ b/examples/data/docs/design_docs/graphs/collection_flowgraph_n_1.jpg
--- a/examples/data/docs/design_docs/graphs/collection_flowgraph_n_n.jpg
+++ b/examples/data/docs/design_docs/graphs/collection_flowgraph_n_n.jpg
--- a/examples/data/docs/design_docs/graphs/collection_flowgraph_relation.png
+++ b/examples/data/docs/design_docs/graphs/collection_flowgraph_relation.png
--- a/examples/data/docs/design_docs/graphs/create_index.png
+++ b/examples/data/docs/design_docs/graphs/create_index.png
--- a/examples/data/docs/design_docs/graphs/datanode_design_01.jpg
+++ b/examples/data/docs/design_docs/graphs/datanode_design_01.jpg
--- a/examples/data/docs/design_docs/graphs/decouple.jpeg
+++ b/examples/data/docs/design_docs/graphs/decouple.jpeg
--- a/examples/data/docs/design_docs/graphs/dml_create_collection.png
+++ b/examples/data/docs/design_docs/graphs/dml_create_collection.png
--- a/examples/data/docs/design_docs/graphs/dml_drop_collection.png
+++ b/examples/data/docs/design_docs/graphs/dml_drop_collection.png
--- a/examples/data/docs/design_docs/graphs/dml_release_collection.png
+++ b/examples/data/docs/design_docs/graphs/dml_release_collection.png
--- a/examples/data/docs/design_docs/graphs/dml_release_flow_graph_on_data_node.png
+++ b/examples/data/docs/design_docs/graphs/dml_release_flow_graph_on_data_node.png
--- a/examples/data/docs/design_docs/graphs/dynamic_config_flowchart.jpg
+++ b/examples/data/docs/design_docs/graphs/dynamic_config_flowchart.jpg
--- a/examples/data/docs/design_docs/graphs/flowgraph_recovery_design.png
+++ b/examples/data/docs/design_docs/graphs/flowgraph_recovery_design.png
--- a/examples/data/docs/design_docs/graphs/flush_data_coord.png
+++ b/examples/data/docs/design_docs/graphs/flush_data_coord.png
--- a/examples/data/docs/design_docs/graphs/indexcoord_design.png
+++ b/examples/data/docs/design_docs/graphs/indexcoord_design.png
--- a/examples/data/docs/design_docs/graphs/knn_query.png
+++ b/examples/data/docs/design_docs/graphs/knn_query.png
--- a/examples/data/docs/design_docs/graphs/knowhere_framework.png
+++ b/examples/data/docs/design_docs/graphs/knowhere_framework.png
--- a/examples/data/docs/design_docs/graphs/milvus_create_index.png
+++ b/examples/data/docs/design_docs/graphs/milvus_create_index.png
--- a/examples/data/docs/design_docs/graphs/milvus_create_index_data_coord_flushed.png
+++ b/examples/data/docs/design_docs/graphs/milvus_create_index_data_coord_flushed.png
--- a/examples/data/docs/design_docs/graphs/milvus_create_index_index_coord.png
+++ b/examples/data/docs/design_docs/graphs/milvus_create_index_index_coord.png
--- a/examples/data/docs/design_docs/graphs/milvus_create_index_root_coord_check.png
+++ b/examples/data/docs/design_docs/graphs/milvus_create_index_root_coord_check.png
--- a/examples/data/docs/design_docs/graphs/pk_oracle.png
+++ b/examples/data/docs/design_docs/graphs/pk_oracle.png
--- a/examples/data/docs/design_docs/graphs/proxy.png
+++ b/examples/data/docs/design_docs/graphs/proxy.png
--- a/examples/data/docs/design_docs/graphs/proxy_channels.png
+++ b/examples/data/docs/design_docs/graphs/proxy_channels.png
--- a/examples/data/docs/design_docs/graphs/segments.png
+++ b/examples/data/docs/design_docs/graphs/segments.png
--- a/examples/data/docs/design_docs/graphs/snapshot_1.png
+++ b/examples/data/docs/design_docs/graphs/snapshot_1.png
--- a/examples/data/docs/design_docs/graphs/snapshot_2.png
+++ b/examples/data/docs/design_docs/graphs/snapshot_2.png
--- a/examples/data/docs/design_docs/graphs/task_scheduler_1.png
+++ b/examples/data/docs/design_docs/graphs/task_scheduler_1.png
--- a/examples/data/docs/design_docs/graphs/task_scheduler_2.png
+++ b/examples/data/docs/design_docs/graphs/task_scheduler_2.png