@ -1,35 +0,0 @@ |
|||||
import logging |
|
||||
import os |
|
||||
|
|
||||
from deepsearcher.offline_loading import load_from_local_files |
|
||||
from deepsearcher.online_query import query |
|
||||
from deepsearcher.configuration import Configuration, init_config |
|
||||
|
|
||||
httpx_logger = logging.getLogger("httpx") # disable openai's logger output |
|
||||
httpx_logger.setLevel(logging.WARNING) |
|
||||
|
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
||||
|
|
||||
config = Configuration() # Customize your config here |
|
||||
init_config(config=config) |
|
||||
|
|
||||
|
|
||||
# You should clone the milvus docs repo to your local machine first, execute: |
|
||||
# git clone https://github.com/milvus-io/milvus-docs.git |
|
||||
# Then replace the path below with the path to the milvus-docs repo on your local machine |
|
||||
# import glob |
|
||||
# all_md_files = glob.glob('xxx/milvus-docs/site/en/**/*.md', recursive=True) |
|
||||
# load_from_local_files(paths_or_directory=all_md_files, collection_name="milvus_docs", collection_description="All Milvus Documents") |
|
||||
|
|
||||
# Hint: You can also load a single file, please execute it in the root directory of the deep searcher project |
|
||||
load_from_local_files( |
|
||||
paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"), |
|
||||
collection_name="milvus_docs", |
|
||||
collection_description="All Milvus Documents", |
|
||||
# force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True |
|
||||
) |
|
||||
|
|
||||
question = "Write a report comparing Milvus with other vector databases." |
|
||||
|
|
||||
_, _, consumed_token = query(question, max_iter=1) |
|
||||
print(f"Consumed tokens: {consumed_token}") |
|
@ -1,68 +0,0 @@ |
|||||
import logging |
|
||||
import os |
|
||||
import time |
|
||||
|
|
||||
from deepsearcher.configuration import Configuration, init_config |
|
||||
from deepsearcher.online_query import query |
|
||||
|
|
||||
# Configure logging |
|
||||
logging.basicConfig( |
|
||||
level=logging.INFO, |
|
||||
format='%(asctime)s - %(levelname)s - %(message)s', |
|
||||
datefmt='%Y-%m-%d %H:%M:%S' |
|
||||
) |
|
||||
logger = logging.getLogger(__name__) |
|
||||
|
|
||||
|
|
||||
|
|
||||
logger.info("Initializing DeepSearcher configuration") |
|
||||
config = Configuration() |
|
||||
config.set_provider_config("llm", "AzureOpenAI", { |
|
||||
"model": "gpt-4.1", |
|
||||
"api_key": "<yourkey>", |
|
||||
"base_url": "https://<youraifoundry>.openai.azure.com/openai/", |
|
||||
"api_version": "2024-12-01-preview" |
|
||||
}) |
|
||||
config.set_provider_config("embedding", "OpenAIEmbedding", { |
|
||||
"model": "text-embedding-ada-002", |
|
||||
"api_key": "<yourkey>", |
|
||||
"azure_endpoint": "https://<youraifoundry>.openai.azure.com/", |
|
||||
"api_version": "2023-05-15" |
|
||||
# Remove api_version and other Azure-specific parameters |
|
||||
}) |
|
||||
config.set_provider_config("vector_db", "AzureSearch", { |
|
||||
"endpoint": "https://<yourazureaisearch>.search.windows.net", |
|
||||
"index_name": "<yourindex>", |
|
||||
"api_key": "<yourkey>", |
|
||||
"vector_field": "content_vector" |
|
||||
}) |
|
||||
|
|
||||
logger.info("Configuration initialized successfully") |
|
||||
|
|
||||
try: |
|
||||
logger.info("Applying global configuration") |
|
||||
init_config(config) |
|
||||
logger.info("Configuration applied globally") |
|
||||
|
|
||||
# Example question |
|
||||
question = "Create a detailed report about what Python is all about" |
|
||||
logger.info(f"Processing query: '{question}'") |
|
||||
|
|
||||
start_time = time.time() |
|
||||
result = query(question) |
|
||||
query_time = time.time() - start_time |
|
||||
logger.info(f"Query processed in {query_time:.2f} seconds") |
|
||||
|
|
||||
logger.info("Retrieved result successfully") |
|
||||
print(result[0]) # Print the first element of the tuple |
|
||||
|
|
||||
# Check if there's a second element in the tuple that contains source documents |
|
||||
if len(result) > 1 and hasattr(result[1], "__len__"): |
|
||||
logger.info(f"Found {len(result[1])} source documents") |
|
||||
for i, doc in enumerate(result[1]): |
|
||||
if hasattr(doc, "metadata") and "source" in doc.metadata: |
|
||||
logger.info(f"Source {i+1}: {doc.metadata['source']}") |
|
||||
except Exception as e: |
|
||||
logger.error(f"Error executing query: {str(e)}") |
|
||||
import traceback |
|
||||
logger.error(traceback.format_exc()) |
|
@ -1,40 +0,0 @@ |
|||||
import sys, os |
|
||||
from pathlib import Path |
|
||||
script_directory = Path(__file__).resolve().parent.parent |
|
||||
sys.path.append(os.path.abspath(script_directory)) |
|
||||
|
|
||||
import logging |
|
||||
|
|
||||
httpx_logger = logging.getLogger("httpx") # disable openai's logger output |
|
||||
httpx_logger.setLevel(logging.WARNING) |
|
||||
|
|
||||
current_dir = os.path.dirname(os.path.abspath(__file__)) |
|
||||
|
|
||||
# Customize your config here |
|
||||
from deepsearcher.configuration import Configuration, init_config |
|
||||
|
|
||||
config = Configuration() |
|
||||
init_config(config=config) |
|
||||
|
|
||||
# # Load your local data |
|
||||
# # Hint: You can load from a directory or a single file, please execute it in the root directory of the deep searcher project |
|
||||
|
|
||||
from deepsearcher.offline_loading import load_from_local_files |
|
||||
|
|
||||
load_from_local_files( |
|
||||
paths_or_directory=os.path.join(current_dir, "data/WhatisMilvus.pdf"), |
|
||||
collection_name="milvus_docs", |
|
||||
collection_description="All Milvus Documents", |
|
||||
# force_new_collection=True, # If you want to drop origin collection and create a new collection every time, set force_new_collection to True |
|
||||
) |
|
||||
|
|
||||
# Query |
|
||||
from deepsearcher.online_query import query |
|
||||
|
|
||||
question = 'Write a report comparing Milvus with other vector databases.' |
|
||||
answer, retrieved_results, consumed_token = query(question) |
|
||||
print(answer) |
|
||||
|
|
||||
# # get consumed tokens, about: 2.5~3w tokens when using openai gpt-4o model |
|
||||
# print(f"Consumed tokens: {consumed_token}") |
|
||||
|
|
@ -1,126 +0,0 @@ |
|||||
""" |
|
||||
Example usage of WatsonX embedding and LLM in DeepSearcher. |
|
||||
|
|
||||
This example demonstrates how to configure and use IBM WatsonX |
|
||||
embedding models and language models with DeepSearcher. |
|
||||
""" |
|
||||
|
|
||||
import os |
|
||||
from deepsearcher.configuration import Configuration |
|
||||
|
|
||||
def main(): |
|
||||
"""Example of using WatsonX with DeepSearcher.""" |
|
||||
|
|
||||
# Initialize configuration |
|
||||
config = Configuration() |
|
||||
|
|
||||
# Set up environment variables (alternatively, set these in your shell) |
|
||||
# os.environ["WATSONX_APIKEY"] = "your-watsonx-api-key" |
|
||||
# os.environ["WATSONX_URL"] = "https://your-watsonx-instance.com" |
|
||||
# os.environ["WATSONX_PROJECT_ID"] = "your-project-id" |
|
||||
|
|
||||
# Example 1: Configure WatsonX Embedding |
|
||||
print("=== WatsonX Embedding Configuration ===") |
|
||||
|
|
||||
# Basic configuration with default model |
|
||||
config.set_provider_config("embedding", "WatsonXEmbedding", {}) |
|
||||
|
|
||||
# Configuration with custom model |
|
||||
config.set_provider_config("embedding", "WatsonXEmbedding", { |
|
||||
"model": "ibm/slate-125m-english-rtrvr-v2" |
|
||||
}) |
|
||||
|
|
||||
# Configuration with explicit credentials |
|
||||
# config.set_provider_config("embedding", "WatsonXEmbedding", { |
|
||||
# "model": "sentence-transformers/all-minilm-l6-v2", |
|
||||
# "api_key": "your-api-key", |
|
||||
# "url": "https://your-watsonx-instance.com", |
|
||||
# "project_id": "your-project-id" |
|
||||
# }) |
|
||||
|
|
||||
print("WatsonX Embedding configured successfully!") |
|
||||
|
|
||||
# Example 2: Configure WatsonX LLM |
|
||||
print("\n=== WatsonX LLM Configuration ===") |
|
||||
|
|
||||
# Basic configuration with default model |
|
||||
config.set_provider_config("llm", "WatsonX", {}) |
|
||||
|
|
||||
# Configuration with custom model and parameters |
|
||||
config.set_provider_config("llm", "WatsonX", { |
|
||||
"model": "ibm/granite-3-3-8b-instruct", |
|
||||
"max_new_tokens": 1000, |
|
||||
"temperature": 0.7, |
|
||||
"top_p": 0.9, |
|
||||
"top_k": 50 |
|
||||
}) |
|
||||
|
|
||||
# Configuration with IBM Granite model |
|
||||
config.set_provider_config("llm", "WatsonX", { |
|
||||
"model": "ibm/granite-3-3-8b-instruct", |
|
||||
"max_new_tokens": 512, |
|
||||
"temperature": 0.1 |
|
||||
}) |
|
||||
|
|
||||
print("WatsonX LLM configured successfully!") |
|
||||
|
|
||||
# Example 3: Test embedding functionality |
|
||||
print("\n=== Testing WatsonX Embedding ===") |
|
||||
try: |
|
||||
from deepsearcher.embedding.watsonx_embedding import WatsonXEmbedding |
|
||||
|
|
||||
# Check if environment variables are set |
|
||||
if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]): |
|
||||
embedding = WatsonXEmbedding() |
|
||||
|
|
||||
# Test single query embedding |
|
||||
query = "What is artificial intelligence?" |
|
||||
query_embedding = embedding.embed_query(query) |
|
||||
print(f"Query embedding dimension: {len(query_embedding)}") |
|
||||
|
|
||||
# Test document embeddings |
|
||||
documents = [ |
|
||||
"Artificial intelligence is a branch of computer science.", |
|
||||
"Machine learning is a subset of AI.", |
|
||||
"Deep learning uses neural networks." |
|
||||
] |
|
||||
doc_embeddings = embedding.embed_documents(documents) |
|
||||
print(f"Document embeddings: {len(doc_embeddings)} vectors of dimension {len(doc_embeddings[0])}") |
|
||||
|
|
||||
else: |
|
||||
print("Environment variables not set. Skipping embedding test.") |
|
||||
|
|
||||
except ImportError: |
|
||||
print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai") |
|
||||
except Exception as e: |
|
||||
print(f"Error testing embedding: {e}") |
|
||||
|
|
||||
# Example 4: Test LLM functionality |
|
||||
print("\n=== Testing WatsonX LLM ===") |
|
||||
try: |
|
||||
from deepsearcher.llm.watsonx import WatsonX |
|
||||
|
|
||||
# Check if environment variables are set |
|
||||
if all(os.getenv(var) for var in ["WATSONX_APIKEY", "WATSONX_URL", "WATSONX_PROJECT_ID"]): |
|
||||
llm = WatsonX() |
|
||||
|
|
||||
# Test chat functionality |
|
||||
messages = [ |
|
||||
{"role": "system", "content": "You are a helpful AI assistant."}, |
|
||||
{"role": "user", "content": "Explain what artificial intelligence is in one sentence."} |
|
||||
] |
|
||||
|
|
||||
response = llm.chat(messages) |
|
||||
print(f"LLM Response: {response.content}") |
|
||||
print(f"Tokens used: {response.total_tokens}") |
|
||||
|
|
||||
else: |
|
||||
print("Environment variables not set. Skipping LLM test.") |
|
||||
|
|
||||
except ImportError: |
|
||||
print("WatsonX dependencies not installed. Run: pip install ibm-watsonx-ai") |
|
||||
except Exception as e: |
|
||||
print(f"Error testing LLM: {e}") |
|
||||
|
|
||||
if __name__ == "__main__": |
|
||||
main() |
|
@ -1,7 +0,0 @@ |
|||||
reviewers: |
|
||||
- czs007 |
|
||||
- xiaofan-luan |
|
||||
- scsven |
|
||||
|
|
||||
approvers: |
|
||||
- maintainers |
|
Before Width: | Height: | Size: 913 KiB |
Before Width: | Height: | Size: 72 KiB |
Before Width: | Height: | Size: 19 KiB |
Before Width: | Height: | Size: 70 KiB |
Before Width: | Height: | Size: 82 KiB |
Before Width: | Height: | Size: 110 KiB |
Before Width: | Height: | Size: 105 KiB |
Before Width: | Height: | Size: 28 KiB |
Before Width: | Height: | Size: 226 KiB |
Before Width: | Height: | Size: 25 KiB |
Before Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 228 KiB |
Before Width: | Height: | Size: 296 KiB |
Before Width: | Height: | Size: 261 KiB |
Before Width: | Height: | Size: 46 KiB |
Before Width: | Height: | Size: 41 KiB |
Before Width: | Height: | Size: 36 KiB |
Before Width: | Height: | Size: 231 KiB |
Before Width: | Height: | Size: 45 KiB |
Before Width: | Height: | Size: 242 KiB |
Before Width: | Height: | Size: 103 KiB |
Before Width: | Height: | Size: 232 KiB |
Before Width: | Height: | Size: 96 KiB |
Before Width: | Height: | Size: 162 KiB |
Before Width: | Height: | Size: 99 KiB |
Before Width: | Height: | Size: 499 KiB |
Before Width: | Height: | Size: 192 KiB |
Before Width: | Height: | Size: 181 KiB |
Before Width: | Height: | Size: 52 KiB |
Before Width: | Height: | Size: 21 KiB |
Before Width: | Height: | Size: 42 KiB |
Before Width: | Height: | Size: 368 KiB |
Before Width: | Height: | Size: 558 KiB |