@ -1,35 +0,0 @@ |
|||
import logging |
|||
import os |
|||
|
|||
from deepsearcher.offline_loading import load_from_local_files |
|||
from deepsearcher.online_query import query |
|||
from deepsearcher.configuration import Configuration, init_config |
|||
|
|||
httpx_logger = logging.getLogger("httpx") # disable openai's logger output |
|||
httpx_logger.setLevel(logging.WARNING) |
|||
|
|||
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|||
|
|||
config = Configuration() # Customize your config here |
|||
init_config(config=config) |
|||
|
|||
|
|||
# You should clone the milvus docs repo to your local machine first, execute: |
|||
# git clone https://github.com/milvus-io/milvus-docs.git |
|||
# Then replace the path below with the path to the milvus-docs repo on your local machine |
|||
# import glob |
|||
# all_md_files = glob.glob('xxx/milvus-docs/site/en/**/*.md', recursive=True) |
|||
# load_from_local_files(paths_or_directory=all_md_files, collection_name="milvus_docs", collection_description="All Milvus Documents") |
|||
|
|||
# Hint: You can also load a single file, please execute it in the root directory of the deep searcher project |
|||
load_from_local_files( |
|||
paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"), |
|||
collection_name="milvus_docs", |
|||
collection_description="All Milvus Documents", |
|||
# force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True |
|||
) |
|||
|
|||
question = "Write a report comparing Milvus with other vector databases." |
|||
|
|||
_, _, consumed_token = query(question, max_iter=1) |
|||
print(f"Consumed tokens: {consumed_token}") |
@ -1,68 +0,0 @@ |
|||
import logging |
|||
import os |
|||
import time |
|||
|
|||
from deepsearcher.configuration import Configuration, init_config |
|||
from deepsearcher.online_query import query |
|||
|
|||
# Configure logging |
|||
logging.basicConfig( |
|||
level=logging.INFO, |
|||
format='%(asctime)s - %(levelname)s - %(message)s', |
|||
datefmt='%Y-%m-%d %H:%M:%S' |
|||
) |
|||
logger = logging.getLogger(__name__) |
|||
|
|||
|
|||
|
|||
logger.info("Initializing DeepSearcher configuration") |
|||
config = Configuration() |
|||
config.set_provider_config("llm", "AzureOpenAI", { |
|||
"model": "gpt-4.1", |
|||
"api_key": "<yourkey>", |
|||
"base_url": "https://<youraifoundry>.openai.azure.com/openai/", |
|||
"api_version": "2024-12-01-preview" |
|||
}) |
|||
config.set_provider_config("embedding", "OpenAIEmbedding", { |
|||
"model": "text-embedding-ada-002", |
|||
"api_key": "<yourkey>", |
|||
"azure_endpoint": "https://<youraifoundry>.openai.azure.com/", |
|||
"api_version": "2023-05-15" |
|||
# Remove api_version and other Azure-specific parameters |
|||
}) |
|||
config.set_provider_config("vector_db", "AzureSearch", { |
|||
"endpoint": "https://<yourazureaisearch>.search.windows.net", |
|||
"index_name": "<yourindex>", |
|||
"api_key": "<yourkey>", |
|||
"vector_field": "content_vector" |
|||
}) |
|||
|
|||
logger.info("Configuration initialized successfully") |
|||
|
|||
try: |
|||
logger.info("Applying global configuration") |
|||
init_config(config) |
|||
logger.info("Configuration applied globally") |
|||
|
|||
# Example question |
|||
question = "Create a detailed report about what Python is all about" |
|||
logger.info(f"Processing query: '{question}'") |
|||
|
|||
start_time = time.time() |
|||
result = query(question) |
|||
query_time = time.time() - start_time |
|||
logger.info(f"Query processed in {query_time:.2f} seconds") |
|||
|
|||
logger.info("Retrieved result successfully") |
|||
print(result[0]) # Print the first element of the tuple |
|||
|
|||
# Check if there's a second element in the tuple that contains source documents |
|||
if len(result) > 1 and hasattr(result[1], "__len__"): |
|||
logger.info(f"Found {len(result[1])} source documents") |
|||
for i, doc in enumerate(result[1]): |
|||
if hasattr(doc, "metadata") and "source" in doc.metadata: |
|||
logger.info(f"Source {i+1}: {doc.metadata['source']}") |
|||
except Exception as e: |
|||
logger.error(f"Error executing query: {str(e)}") |
|||
import traceback |
|||
logger.error(traceback.format_exc()) |
@ -1,40 +0,0 @@ |
|||
import sys, os |
|||
from pathlib import Path |
|||
script_directory = Path(__file__).resolve().parent.parent |
|||
sys.path.append(os.path.abspath(script_directory)) |
|||
|
|||
import logging |
|||
|
|||
httpx_logger = logging.getLogger("httpx") # disable openai's logger output |
|||
httpx_logger.setLevel(logging.WARNING) |
|||
|
|||
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|||
|
|||
# Customize your config here |
|||
from deepsearcher.configuration import Configuration, init_config |
|||
|
|||
config = Configuration() |
|||
init_config(config=config) |
|||
|
|||
# # Load your local data |
|||
# # Hint: You can load from a directory or a single file, please execute it in the root directory of the deep searcher project |
|||
|
|||
from deepsearcher.offline_loading import load_from_local_files |
|||
|
|||
load_from_local_files( |
|||
paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"), |
|||
collection_name="milvus_docs", |
|||
collection_description="All Milvus Documents", |
|||
# force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True |
|||
) |
|||
|
|||
# Query |
|||
from deepsearcher.online_query import query |
|||
|
|||
question = 'Write a report comparing Milvus with other vector databases.' |
|||
answer, retrieved_results, consumed_token = query(question) |
|||
print(answer) |
|||
|
|||
# # get consumed tokens, about: 2.5~3w tokens when using openai gpt-4o model |
|||
# print(f"Consumed tokens: {consumed_token}") |
|||
|
@ -1,126 +0,0 @@ |
|||
""" |
|||
Example usage of WatsonX embedding and LLM in DeepSearcher. |
|||
|
|||
This example demonstrates how to configure and use IBM WatsonX |
|||
embedding models and language models with DeepSearcher. |
|||
""" |
|||
|
|||
import os |
|||
from deepsearcher.configuration import Configuration |
|||
|
|||
def main(): |
|||
"""Example of using WatsonX with DeepSearcher.""" |
|||
|
|||
# Initialize configuration |
|||
config = Configuration() |
|||
|
|||
# Set up environment variables (alternatively, set these in your shell) |
|||
# os.environ["WATSONX_APIKEY"] = "your-watsonx-api-key" |
|||
# os.environ["WATSONX_URL"] = "https://your-watsonx-instance.com" |
|||
# os.environ["WATSONX_PROJECT_ID"] = "your-project-id" |
|||
|
|||
# Example 1: Configure WatsonX Embedding |
|||
print("=== WatsonX Embedding Configuration ===") |
|||
|
|||
# Basic configuration with default model |
|||
config.set_provider_config("embedding", "WatsonXEmbedding", {}) |
|||
|
|||
# Configuration with custom model |
|||
config.set_provider_config("embedding", "WatsonXEmbedding", { |
|||
"model": "ibm/slate-125m-english-rtrvr-v2" |
|||
}) |
|||
|
|||
# Configuration with explicit credentials |
|||
# config.set_provider_config("embedding", "WatsonXEmbedding", { |
|||
# "model": "sentence-transformers/all-minilm-l6-v2", |
|||
# "api_key": "your-api-key", |
|||
# "url": "https://your-watsonx-instance.com", |
|||
# "project_id": "your-project-id" |
|||
# }) |
|||
|
|||
print("WatsonX Embedding configured successfully!") |
|||
|
|||
# Example 2: Configure WatsonX LLM |
|||
print("\n=== WatsonX LLM Configuration ===") |
|||
|
|||
# Basic configuration with default model |
|||
config.set_provider_config("llm", "WatsonX", {}) |
|||
|
|||
# Configuration with custom model and parameters |
|||
config.set_provider_config("llm", "WatsonX", { |
|||
"model": "ibm/granite-3-3-8b-instruct", |
|||
"max_new_tokens": 1000, |
|||
"temperature": 0.7, |
|||
"top_p": 0.9, |
|||
"top_k": 50 |
|||
}) |
|||
|
|||
# Configuration with IBM Granite model |
|||
config.set_provider_config("llm", "WatsonX", { |
|||
"model": "ibm/granite-3-3-8b-instruct", |
|||
"max_new_tokens": 512, |
|||
"temperature": 0.1 |
|||
}) |
|||
|
|||
print("WatsonX LLM configured successfully!") |
|||
|
|||
# Example 3: Test embedding functionality |
|||
print("\n=== Testing WatsonX Embedding ===") |
|||
try: |
|||
from deepsearcher.embedding.watsonx_embedding import WatsonXEmbedding |
|||
|
|||
# Check if environment variables are set |
|||
if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]): |
|||
embedding = WatsonXEmbedding() |
|||
|
|||
# Test single query embedding |
|||
query = "What is artificial intelligence?" |
|||
query_embedding = embedding.embed_query(query) |
|||
print(f"Query embedding dimension: {len(query_embedding)}") |
|||
|
|||
# Test document embeddings |
|||
documents = [ |
|||
"Artificial intelligence is a branch of computer science.", |
|||
"Machine learning is a subset of AI.", |
|||
"Deep learning uses neural networks." |
|||
] |
|||
doc_embeddings = embedding.embed_documents(documents) |
|||
print(f"Document embeddings: {len(doc_embeddings)} vectors of dimension {len(doc_embeddings[0])}") |
|||
|
|||
else: |
|||
print("Environment variables not set. Skipping embedding test.") |
|||
|
|||
except ImportError: |
|||
print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai") |
|||
except Exception as e: |
|||
print(f"Error testing embedding: {e}") |
|||
|
|||
# Example 4: Test LLM functionality |
|||
print("\n=== Testing WatsonX LLM ===") |
|||
try: |
|||
from deepsearcher.llm.watsonx import WatsonX |
|||
|
|||
# Check if environment variables are set |
|||
if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]): |
|||
llm = WatsonX() |
|||
|
|||
# Test chat functionality |
|||
messages = [ |
|||
{"role": "system", "content": "You are a helpful AI assistant."}, |
|||
{"role": "user", "content": "Explain what artificial intelligence is in one sentence."} |
|||
] |
|||
|
|||
response = llm.chat(messages) |
|||
print(f"LLM Response: {response.content}") |
|||
print(f"Tokens used: {response.total_tokens}") |
|||
|
|||
else: |
|||
print("Environment variables not set. Skipping LLM test.") |
|||
|
|||
except ImportError: |
|||
print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai") |
|||
except Exception as e: |
|||
print(f"Error testing LLM: {e}") |
|||
|
|||
if __name__ == "__main__": |
|||
main() |
@ -1,7 +0,0 @@ |
|||
reviewers: |
|||
- czs007 |
|||
- xiaofan-luan |
|||
- scsven |
|||
|
|||
approvers: |
|||
- maintainers |
Before Width: | Height: | Size: 913 KiB |
Before Width: | Height: | Size: 72 KiB |
Before Width: | Height: | Size: 19 KiB |
Before Width: | Height: | Size: 70 KiB |
Before Width: | Height: | Size: 82 KiB |
Before Width: | Height: | Size: 110 KiB |
Before Width: | Height: | Size: 105 KiB |
Before Width: | Height: | Size: 28 KiB |
Before Width: | Height: | Size: 226 KiB |
Before Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 228 KiB |
Before Width: | Height: | Size: 296 KiB |
Before Width: | Height: | Size: 261 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 231 KiB |
Before Width: | Height: | Size: 45 KiB |
Before Width: | Height: | Size: 242 KiB |
Before Width: | Height: | Size: 103 KiB |
Before Width: | Height: | Size: 232 KiB |
Before Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 162 KiB |
Before Width: | Height: | Size: 99 KiB |
Before Width: | Height: | Size: 499 KiB |
Before Width: | Height: | Size: 192 KiB |
Before Width: | Height: | Size: 181 KiB |
Before Width: | Height: | Size: 52 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 368 KiB |
Before Width: | Height: | Size: 558 KiB |