deep-searcher/deepsearcher/vector_db/oracle.py

import array
import json
from typing import List, Optional, Union

import numpy as np

from deepsearcher.loader.splitter import Chunk
from deepsearcher.utils import log
from deepsearcher.vector_db.base import BaseVectorDB, CollectionInfo, RetrievalResult


class OracleDB(BaseVectorDB):
    """OracleDB class is a subclass of DB class."""

    client = None

    def __init__(
        self,
        user: str,
        password: str,
        dsn: str,
        config_dir: str,
        wallet_location: str,
        wallet_password: str,
        min: int = 1,
        max: int = 10,
        increment: int = 1,
        default_collection: str = "deepsearcher",
    ):
        """
        Initialize the Oracle database connection.

        Args:
            user (str): Oracle database username.
            password (str): Oracle database password.
            dsn (str): Oracle database connection string.
            config_dir (str): Directory containing Oracle configuration files.
            wallet_location (str): Location of the Oracle wallet.
            wallet_password (str): Password for the Oracle wallet.
            min (int, optional): Minimum number of connections in the pool. Defaults to 1.
            max (int, optional): Maximum number of connections in the pool. Defaults to 10.
            increment (int, optional): Increment for adding new connections. Defaults to 1.
            default_collection (str, optional): Default collection name. Defaults to "deepsearcher".
        """
        super().__init__(default_collection)
        self.default_collection = default_collection

        import oracledb

        oracledb.defaults.fetch_lobs = False
        self.DB_TYPE_VECTOR = oracledb.DB_TYPE_VECTOR

        try:
            self.client = oracledb.create_pool(
                user=user,
                password=password,
                dsn=dsn,
                config_dir=config_dir,
                wallet_location=wallet_location,
                wallet_password=wallet_password,
                min=min,
                max=max,
                increment=increment,
            )
            log.color_print(f"Connected to Oracle database at {dsn}")
            self.check_table()
        except Exception as e:
            log.critical(f"Failed to connect to Oracle database at {dsn}")
            log.critical(f"Oracle database error in init: {e}")
            raise

    def numpy_converter_in(self, value):
        """Convert numpy array to array.array"""
        if value.dtype == np.float64:
            dtype = "d"
        elif value.dtype == np.float32:
            dtype = "f"
        else:
            dtype = "b"
        return array.array(dtype, value)

    def input_type_handler(self, cursor, value, arraysize):
        """Set the type handler for the input data"""
        if isinstance(value, np.ndarray):
            return cursor.var(
                self.DB_TYPE_VECTOR,
                arraysize=arraysize,
                inconverter=self.numpy_converter_in,
            )

    def numpy_converter_out(self, value):
        """Convert array.array to numpy array"""
        if value.typecode == "b":
            dtype = np.int8
        elif value.typecode == "f":
            dtype = np.float32
        else:
            dtype = np.float64
        return np.array(value, copy=False, dtype=dtype)

    def output_type_handler(self, cursor, metadata):
        """Set the type handler for the output data"""
        if metadata.type_code is self.DB_TYPE_VECTOR:
            return cursor.var(
                metadata.type_code,
                arraysize=cursor.arraysize,
                outconverter=self.numpy_converter_out,
            )

    def query(self, sql: str, params: dict = None) -> Union[dict, None]:
        """
        Execute a SQL query and return the results.

        Args:
            sql (str): SQL query to execute.
            params (dict, optional): Parameters for the SQL query. Defaults to None.

        Returns:
            Union[dict, None]: Query results as a dictionary or None if no results.

        Raises:
            Exception: If there's an error executing the query.
        """
        with self.client.acquire() as connection:
            connection.inputtypehandler = self.input_type_handler
            connection.outputtypehandler = self.output_type_handler
            with connection.cursor() as cursor:
                try:
                    if log.dev_mode:
                        print("sql:\n", sql)
                    # log.debug("def query:"+params)
                    # print("sql:\n",sql)
                    # print("params:\n",params)
                    cursor.execute(sql, params)
                except Exception as e:
                    log.critical(f"Oracle database error in query: {e}")
                    raise
                columns = [column[0].lower() for column in cursor.description]
                rows = cursor.fetchall()
                if rows:
                    data = [dict(zip(columns, row)) for row in rows]
                else:
                    data = []
                if log.dev_mode:
                    print("data:\n", data)
                return data
            # self.client.drop(connection)

    def execute(self, sql: str, data: Union[list, dict] = None):
        """
        Execute a SQL statement without returning results.

        Args:
            sql (str): SQL statement to execute.
            data (Union[list, dict], optional): Data for the SQL statement. Defaults to None.

        Raises:
            Exception: If there's an error executing the statement.
        """
        try:
            with self.client.acquire() as connection:
                connection.inputtypehandler = self.input_type_handler
                connection.outputtypehandler = self.output_type_handler
                with connection.cursor() as cursor:
                    # print("sql:\n",sql)
                    # print("data:\n",data)
                    if data is None:
                        cursor.execute(sql)
                    else:
                        cursor.execute(sql, data)
                    connection.commit()
        except Exception as e:
            log.critical(f"Oracle database error in execute: {e}")
            log.error("ERROR sql:\n" + sql)
            log.error("ERROR data:\n" + data)
            raise

    def has_collection(self, collection: str = "deepsearcher"):
        """
        Check if a collection exists in the database.

        Args:
            collection (str, optional): Collection name to check. Defaults to "deepsearcher".

        Returns:
            bool: True if the collection exists, False otherwise.
        """
        SQL = SQL_TEMPLATES["has_collection"]
        params = {"collection": collection}
        res = self.query(SQL, params)
        if res:
            if res[0]["rowcnt"] > 0:
                return True
            else:
                return False
        else:
            return False

    def check_table(self):
        """
        Check if required tables exist and create them if they don't.

        Raises:
            Exception: If there's an error checking or creating tables.
        """
        SQL = SQL_TEMPLATES["has_table"]
        try:
            res = self.query(SQL)
            if len(res) < 2:
                missing_table = TABLES.keys() - set([i["table_name"] for i in res])
                for table in missing_table:
                    self.create_tables(table)
        except Exception as e:
            log.critical(f"Failed to check table in Oracle database, error info: {e}")
            raise

    def create_tables(self, table_name):
        """
        Create a table in the database.

        Args:
            table_name: Name of the table to create.

        Raises:
            Exception: If there's an error creating the table.
        """
        SQL = TABLES[table_name]
        try:
            self.execute(SQL)
            log.color_print(f"Created table {table_name} in Oracle database")
        except Exception as e:
            log.critical(f"Failed to create table {table_name} in Oracle database, error info: {e}")
            raise

    def drop_collection(self, collection: str = "deepsearcher"):
        """
        Drop a collection from the database.

        Args:
            collection (str, optional): Collection name to drop. Defaults to "deepsearcher".

        Raises:
            Exception: If there's an error dropping the collection.
        """
        try:
            params = {"collection": collection}
            SQL = SQL_TEMPLATES["drop_collection"]
            self.execute(SQL, params)

            SQL = SQL_TEMPLATES["drop_collection_item"]
            self.execute(SQL, params)
            log.color_print(f"Collection {collection} dropped")
        except Exception as e:
            log.critical(f"fail to drop collection, error info: {e}")
            raise

    def insertone(self, data):
        """
        Insert a single record into the database.

        Args:
            data: Data to insert.
        """
        SQL = SQL_TEMPLATES["insert"]
        self.execute(SQL, data)
        log.debug("insert done!")

    def searchone(
        self,
        collection: Optional[str],
        vector: Union[np.array, List[float]],
        top_k: int = 5,
    ):
        """
        Search for similar vectors in a collection.

        Args:
            collection (Optional[str]): Collection name to search in.
            vector (Union[np.array, List[float]]): Query vector for similarity search.
            top_k (int, optional): Number of results to return. Defaults to 5.

        Returns:
            list: List of search results.

        Raises:
            Exception: If there's an error during search.
        """
        log.debug("def searchone:" + collection)
        try:
            if isinstance(vector, List):
                vector = np.array(vector)
            embedding_string = "[" + ", ".join(map(str, vector.tolist())) + "]"
            dimension = vector.shape[0]
            dtype = str(vector.dtype).upper()

            SQL = SQL_TEMPLATES["search"].format(dimension=dimension, dtype=dtype)
            max_distance = 0.8
            params = {
                "collection": collection,
                "embedding_string": embedding_string,
                "top_k": top_k,
                "max_distance": max_distance,
            }
            res = self.query(SQL, params)
            if res:
                return res
            else:
                return []
        except Exception as e:
            log.critical(f"fail to search data, error info: {e}")
            raise

    def init_collection(
        self,
        dim: int,
        collection: Optional[str] = "deepsearcher",
        description: Optional[str] = "",
        force_new_collection: bool = False,
        text_max_length: int = 65_535,
        reference_max_length: int = 2048,
        metric_type: str = "L2",
        *args,
        **kwargs,
    ):
        """
        Initialize a collection in the database.

        Args:
            dim (int): Dimension of the vector embeddings.
            collection (Optional[str], optional): Collection name. Defaults to "deepsearcher".
            description (Optional[str], optional): Collection description. Defaults to "".
            force_new_collection (bool, optional): Whether to force create a new collection if it already exists. Defaults to False.
            text_max_length (int, optional): Maximum length for text field. Defaults to 65_535.
            reference_max_length (int, optional): Maximum length for reference field. Defaults to 2048.
            metric_type (str, optional): Metric type for vector similarity search. Defaults to "L2".
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Raises:
            Exception: If there's an error initializing the collection.
        """
        if not collection:
            collection = self.default_collection
        if description is None:
            description = ""
        try:
            has_collection = self.has_collection(collection)
            if force_new_collection and has_collection:
                self.drop_collection(collection)
            elif has_collection:
                return
            # insert collection info
            SQL = SQL_TEMPLATES["insert_collection"]
            params = {"collection": collection, "description": description}
            self.execute(SQL, params)
        except Exception as e:
            log.critical(f"fail to init_collection for oracle, error info: {e}")

    def insert_data(
        self,
        collection: Optional[str],
        chunks: List[Chunk],
        batch_size: int = 256,
        *args,
        **kwargs,
    ):
        """
        Insert data into a collection.

        Args:
            collection (Optional[str]): Collection name. If None, uses default_collection.
            chunks (List[Chunk]): List of Chunk objects to insert.
            batch_size (int, optional): Number of chunks to insert in each batch. Defaults to 256.
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Raises:
            Exception: If there's an error inserting data.
        """
        if not collection:
            collection = self.default_collection

        datas = []
        for chunk in chunks:
            _data = {
                "embedding": self.numpy_converter_in(np.array(chunk.embedding)),
                "text": chunk.text,
                "reference": chunk.reference,
                "metadata": json.dumps(chunk.metadata),
                "collection": collection,
            }
            datas.append(_data)

        batch_datas = [datas[i : i + batch_size] for i in range(0, len(datas), batch_size)]
        try:
            for batch_data in batch_datas:
                for _data in batch_data:
                    self.insertone(data=_data)
            log.color_print(f"Successfully insert {len(datas)} data")
        except Exception as e:
            log.critical(f"fail to insert data, error info: {e}")
            raise

    def search_data(
        self,
        collection: Optional[str],
        vector: Union[np.array, List[float]],
        top_k: int = 5,
        *args,
        **kwargs,
    ) -> List[RetrievalResult]:
        """
        Search for similar vectors in a collection.

        Args:
            collection (Optional[str]): Collection name. If None, uses default_collection.
            vector (Union[np.array, List[float]]): Query vector for similarity search.
            top_k (int, optional): Number of results to return. Defaults to 5.
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Returns:
            List[RetrievalResult]: List of retrieval results containing similar vectors.

        Raises:
            Exception: If there's an error during search.
        """
        if not collection:
            collection = self.default_collection
        try:
            # print("def search_data:",collection)
            # print("def search_data:",type(vector))
            search_results = self.searchone(collection=collection, vector=vector, top_k=top_k)
            # print("def search_data: search_results",search_results)

            return [
                RetrievalResult(
                    embedding=b["embedding"],
                    text=b["text"],
                    reference=b["reference"],
                    score=b["distance"],
                    metadata=json.loads(b["metadata"]),
                )
                for b in search_results
            ]
        except Exception as e:
            log.critical(f"fail to search data, error info: {e}")
            raise
            # return []

    def list_collections(self, *args, **kwargs) -> List[CollectionInfo]:
        """
        List all collections in the database.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Returns:
            List[CollectionInfo]: List of collection information objects.
        """
        collection_infos = []
        try:
            SQL = SQL_TEMPLATES["list_collections"]
            log.debug("def list_collections:" + SQL)
            collections = self.query(SQL)
            if collections:
                for collection in collections:
                    collection_infos.append(
                        CollectionInfo(
                            collection_name=collection["collection"],
                            description=collection["description"],
                        )
                    )
            return collection_infos
        except Exception as e:
            log.critical(f"fail to list collections, error info: {e}")
            raise

    def clear_db(self, collection: str = "deepsearcher", *args, **kwargs):
        """
        Clear (drop) a collection from the database.

        Args:
            collection (str, optional): Collection name to drop. Defaults to "deepsearcher".
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """
        if not collection:
            collection = self.default_collection
        try:
            self.client.drop_collection(collection)
        except Exception as e:
            log.warning(f"fail to clear db, error info: {e}")
            raise


TABLES = {
    "DEEPSEARCHER_COLLECTION_INFO": """CREATE TABLE DEEPSEARCHER_COLLECTION_INFO (    
        id INT generated by default as identity primary key,
        collection varchar(256),
        description CLOB,
        status NUMBER DEFAULT 1,
        createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updatetime TIMESTAMP DEFAULT NULL)""",
    "DEEPSEARCHER_COLLECTION_ITEM": """CREATE TABLE DEEPSEARCHER_COLLECTION_ITEM (    
        id INT generated by default as identity primary key,
        collection varchar(256),
        embedding VECTOR,
        text CLOB,
        reference varchar(4000),
        metadata CLOB,
        status NUMBER DEFAULT 1,
        createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updatetime TIMESTAMP DEFAULT NULL)""",
}

SQL_TEMPLATES = {
    "has_table": f"""SELECT table_name FROM all_tables 
        WHERE table_name in ({",".join([f"'{k}'" for k in TABLES.keys()])})""",
    "has_collection": "select count(*) as rowcnt from DEEPSEARCHER_COLLECTION_INFO where collection=:collection and status=1",
    "list_collections": "select collection,description from DEEPSEARCHER_COLLECTION_INFO where status=1",
    "drop_collection": "update DEEPSEARCHER_COLLECTION_INFO set status=0 where collection=:collection and status=1",
    "drop_collection_item": "update DEEPSEARCHER_COLLECTION_ITEM set status=0 where collection=:collection and status=1",
    "insert_collection": """INSERT INTO DEEPSEARCHER_COLLECTION_INFO (collection,description) 
        values (:collection,:description)""",
    "insert": """INSERT INTO DEEPSEARCHER_COLLECTION_ITEM (collection,embedding,text,reference,metadata) 
        values (:collection,:embedding,:text,:reference,:metadata)""",
    "search": """SELECT * FROM 
        (SELECT t.*,
            VECTOR_DISTANCE(t.embedding,vector(:embedding_string,{dimension},{dtype}),COSINE) as distance
        FROM DEEPSEARCHER_COLLECTION_ITEM t 
        JOIN DEEPSEARCHER_COLLECTION_INFO c ON t.collection=c.collection 
        WHERE t.collection=:collection AND t.status=1 AND c.status=1)
        WHERE distance<:max_distance ORDER BY distance ASC FETCH FIRST :top_k ROWS ONLY""",
}
initial commit 2 weeks ago			`import array`
			`import json`
			`from typing import List, Optional, Union`

			`import numpy as np`

			`from deepsearcher.loader.splitter import Chunk`
			`from deepsearcher.utils import log`
			`from deepsearcher.vector_db.base import BaseVectorDB, CollectionInfo, RetrievalResult`


			`class OracleDB(BaseVectorDB):`
			`"""OracleDB class is a subclass of DB class."""`

			`client = None`

			`def __init__(`
			`self,`
			`user: str,`
			`password: str,`
			`dsn: str,`
			`config_dir: str,`
			`wallet_location: str,`
			`wallet_password: str,`
			`min: int = 1,`
			`max: int = 10,`
			`increment: int = 1,`
			`default_collection: str = "deepsearcher",`
			`):`
			`"""`
			`Initialize the Oracle database connection.`

			`Args:`
			`user (str): Oracle database username.`
			`password (str): Oracle database password.`
			`dsn (str): Oracle database connection string.`
			`config_dir (str): Directory containing Oracle configuration files.`
			`wallet_location (str): Location of the Oracle wallet.`
			`wallet_password (str): Password for the Oracle wallet.`
			`min (int, optional): Minimum number of connections in the pool. Defaults to 1.`
			`max (int, optional): Maximum number of connections in the pool. Defaults to 10.`
			`increment (int, optional): Increment for adding new connections. Defaults to 1.`
			`default_collection (str, optional): Default collection name. Defaults to "deepsearcher".`
			`"""`
			`super().__init__(default_collection)`
			`self.default_collection = default_collection`

			`import oracledb`

			`oracledb.defaults.fetch_lobs = False`
			`self.DB_TYPE_VECTOR = oracledb.DB_TYPE_VECTOR`

			`try:`
			`self.client = oracledb.create_pool(`
			`user=user,`
			`password=password,`
			`dsn=dsn,`
			`config_dir=config_dir,`
			`wallet_location=wallet_location,`
			`wallet_password=wallet_password,`
			`min=min,`
			`max=max,`
			`increment=increment,`
			`)`
			`log.color_print(f"Connected to Oracle database at {dsn}")`
			`self.check_table()`
			`except Exception as e:`
			`log.critical(f"Failed to connect to Oracle database at {dsn}")`
			`log.critical(f"Oracle database error in init: {e}")`
			`raise`

			`def numpy_converter_in(self, value):`
			`"""Convert numpy array to array.array"""`
			`if value.dtype == np.float64:`
			`dtype = "d"`
			`elif value.dtype == np.float32:`
			`dtype = "f"`
			`else:`
			`dtype = "b"`
			`return array.array(dtype, value)`

			`def input_type_handler(self, cursor, value, arraysize):`
			`"""Set the type handler for the input data"""`
			`if isinstance(value, np.ndarray):`
			`return cursor.var(`
			`self.DB_TYPE_VECTOR,`
			`arraysize=arraysize,`
			`inconverter=self.numpy_converter_in,`
			`)`

			`def numpy_converter_out(self, value):`
			`"""Convert array.array to numpy array"""`
			`if value.typecode == "b":`
			`dtype = np.int8`
			`elif value.typecode == "f":`
			`dtype = np.float32`
			`else:`
			`dtype = np.float64`
			`return np.array(value, copy=False, dtype=dtype)`

			`def output_type_handler(self, cursor, metadata):`
			`"""Set the type handler for the output data"""`
			`if metadata.type_code is self.DB_TYPE_VECTOR:`
			`return cursor.var(`
			`metadata.type_code,`
			`arraysize=cursor.arraysize,`
			`outconverter=self.numpy_converter_out,`
			`)`

			`def query(self, sql: str, params: dict = None) -> Union[dict, None]:`
			`"""`
			`Execute a SQL query and return the results.`

			`Args:`
			`sql (str): SQL query to execute.`
			`params (dict, optional): Parameters for the SQL query. Defaults to None.`

			`Returns:`
			`Union[dict, None]: Query results as a dictionary or None if no results.`

			`Raises:`
			`Exception: If there's an error executing the query.`
			`"""`
			`with self.client.acquire() as connection:`
			`connection.inputtypehandler = self.input_type_handler`
			`connection.outputtypehandler = self.output_type_handler`
			`with connection.cursor() as cursor:`
			`try:`
			`if log.dev_mode:`
			`print("sql:\n", sql)`
			`# log.debug("def query:"+params)`
			`# print("sql:\n",sql)`
			`# print("params:\n",params)`
			`cursor.execute(sql, params)`
			`except Exception as e:`
			`log.critical(f"Oracle database error in query: {e}")`
			`raise`
			`columns = [column[0].lower() for column in cursor.description]`
			`rows = cursor.fetchall()`
			`if rows:`
			`data = [dict(zip(columns, row)) for row in rows]`
			`else:`
			`data = []`
			`if log.dev_mode:`
			`print("data:\n", data)`
			`return data`
			`# self.client.drop(connection)`

			`def execute(self, sql: str, data: Union[list, dict] = None):`
			`"""`
			`Execute a SQL statement without returning results.`

			`Args:`
			`sql (str): SQL statement to execute.`
			`data (Union[list, dict], optional): Data for the SQL statement. Defaults to None.`

			`Raises:`
			`Exception: If there's an error executing the statement.`
			`"""`
			`try:`
			`with self.client.acquire() as connection:`
			`connection.inputtypehandler = self.input_type_handler`
			`connection.outputtypehandler = self.output_type_handler`
			`with connection.cursor() as cursor:`
			`# print("sql:\n",sql)`
			`# print("data:\n",data)`
			`if data is None:`
			`cursor.execute(sql)`
			`else:`
			`cursor.execute(sql, data)`
			`connection.commit()`
			`except Exception as e:`
			`log.critical(f"Oracle database error in execute: {e}")`
			`log.error("ERROR sql:\n" + sql)`
			`log.error("ERROR data:\n" + data)`
			`raise`

			`def has_collection(self, collection: str = "deepsearcher"):`
			`"""`
			`Check if a collection exists in the database.`

			`Args:`
			`collection (str, optional): Collection name to check. Defaults to "deepsearcher".`

			`Returns:`
			`bool: True if the collection exists, False otherwise.`
			`"""`
			`SQL = SQL_TEMPLATES["has_collection"]`
			`params = {"collection": collection}`
			`res = self.query(SQL, params)`
			`if res:`
			`if res[0]["rowcnt"] > 0:`
			`return True`
			`else:`
			`return False`
			`else:`
			`return False`

			`def check_table(self):`
			`"""`
			`Check if required tables exist and create them if they don't.`

			`Raises:`
			`Exception: If there's an error checking or creating tables.`
			`"""`
			`SQL = SQL_TEMPLATES["has_table"]`
			`try:`
			`res = self.query(SQL)`
			`if len(res) < 2:`
			`missing_table = TABLES.keys() - set([i["table_name"] for i in res])`
			`for table in missing_table:`
			`self.create_tables(table)`
			`except Exception as e:`
			`log.critical(f"Failed to check table in Oracle database, error info: {e}")`
			`raise`

			`def create_tables(self, table_name):`
			`"""`
			`Create a table in the database.`

			`Args:`
			`table_name: Name of the table to create.`

			`Raises:`
			`Exception: If there's an error creating the table.`
			`"""`
			`SQL = TABLES[table_name]`
			`try:`
			`self.execute(SQL)`
			`log.color_print(f"Created table {table_name} in Oracle database")`
			`except Exception as e:`
			`log.critical(f"Failed to create table {table_name} in Oracle database, error info: {e}")`
			`raise`

			`def drop_collection(self, collection: str = "deepsearcher"):`
			`"""`
			`Drop a collection from the database.`

			`Args:`
			`collection (str, optional): Collection name to drop. Defaults to "deepsearcher".`

			`Raises:`
			`Exception: If there's an error dropping the collection.`
			`"""`
			`try:`
			`params = {"collection": collection}`
			`SQL = SQL_TEMPLATES["drop_collection"]`
			`self.execute(SQL, params)`

			`SQL = SQL_TEMPLATES["drop_collection_item"]`
			`self.execute(SQL, params)`
			`log.color_print(f"Collection {collection} dropped")`
			`except Exception as e:`
			`log.critical(f"fail to drop collection, error info: {e}")`
			`raise`

			`def insertone(self, data):`
			`"""`
			`Insert a single record into the database.`

			`Args:`
			`data: Data to insert.`
			`"""`
			`SQL = SQL_TEMPLATES["insert"]`
			`self.execute(SQL, data)`
			`log.debug("insert done!")`

			`def searchone(`
			`self,`
			`collection: Optional[str],`
			`vector: Union[np.array, List[float]],`
			`top_k: int = 5,`
			`):`
			`"""`
			`Search for similar vectors in a collection.`

			`Args:`
			`collection (Optional[str]): Collection name to search in.`
			`vector (Union[np.array, List[float]]): Query vector for similarity search.`
			`top_k (int, optional): Number of results to return. Defaults to 5.`

			`Returns:`
			`list: List of search results.`

			`Raises:`
			`Exception: If there's an error during search.`
			`"""`
			`log.debug("def searchone:" + collection)`
			`try:`
			`if isinstance(vector, List):`
			`vector = np.array(vector)`
			`embedding_string = "[" + ", ".join(map(str, vector.tolist())) + "]"`
			`dimension = vector.shape[0]`
			`dtype = str(vector.dtype).upper()`

			`SQL = SQL_TEMPLATES["search"].format(dimension=dimension, dtype=dtype)`
			`max_distance = 0.8`
			`params = {`
			`"collection": collection,`
			`"embedding_string": embedding_string,`
			`"top_k": top_k,`
			`"max_distance": max_distance,`
			`}`
			`res = self.query(SQL, params)`
			`if res:`
			`return res`
			`else:`
			`return []`
			`except Exception as e:`
			`log.critical(f"fail to search data, error info: {e}")`
			`raise`

			`def init_collection(`
			`self,`
			`dim: int,`
			`collection: Optional[str] = "deepsearcher",`
			`description: Optional[str] = "",`
			`force_new_collection: bool = False,`
			`text_max_length: int = 65_535,`
			`reference_max_length: int = 2048,`
			`metric_type: str = "L2",`
			`*args,`
			`**kwargs,`
			`):`
			`"""`
			`Initialize a collection in the database.`

			`Args:`
			`dim (int): Dimension of the vector embeddings.`
			`collection (Optional[str], optional): Collection name. Defaults to "deepsearcher".`
			`description (Optional[str], optional): Collection description. Defaults to "".`
			`force_new_collection (bool, optional): Whether to force create a new collection if it already exists. Defaults to False.`
			`text_max_length (int, optional): Maximum length for text field. Defaults to 65_535.`
			`reference_max_length (int, optional): Maximum length for reference field. Defaults to 2048.`
			`metric_type (str, optional): Metric type for vector similarity search. Defaults to "L2".`
			`*args: Variable length argument list.`
			`**kwargs: Arbitrary keyword arguments.`

			`Raises:`
			`Exception: If there's an error initializing the collection.`
			`"""`
			`if not collection:`
			`collection = self.default_collection`
			`if description is None:`
			`description = ""`
			`try:`
			`has_collection = self.has_collection(collection)`
			`if force_new_collection and has_collection:`
			`self.drop_collection(collection)`
			`elif has_collection:`
			`return`
			`# insert collection info`
			`SQL = SQL_TEMPLATES["insert_collection"]`
			`params = {"collection": collection, "description": description}`
			`self.execute(SQL, params)`
			`except Exception as e:`
			`log.critical(f"fail to init_collection for oracle, error info: {e}")`

			`def insert_data(`
			`self,`
			`collection: Optional[str],`
			`chunks: List[Chunk],`
			`batch_size: int = 256,`
			`*args,`
			`**kwargs,`
			`):`
			`"""`
			`Insert data into a collection.`

			`Args:`
			`collection (Optional[str]): Collection name. If None, uses default_collection.`
			`chunks (List[Chunk]): List of Chunk objects to insert.`
			`batch_size (int, optional): Number of chunks to insert in each batch. Defaults to 256.`
			`*args: Variable length argument list.`
			`**kwargs: Arbitrary keyword arguments.`

			`Raises:`
			`Exception: If there's an error inserting data.`
			`"""`
			`if not collection:`
			`collection = self.default_collection`

			`datas = []`
			`for chunk in chunks:`
			`_data = {`
			`"embedding": self.numpy_converter_in(np.array(chunk.embedding)),`
			`"text": chunk.text,`
			`"reference": chunk.reference,`
			`"metadata": json.dumps(chunk.metadata),`
			`"collection": collection,`
			`}`
			`datas.append(_data)`

			`batch_datas = [datas[i : i + batch_size] for i in range(0, len(datas), batch_size)]`
			`try:`
			`for batch_data in batch_datas:`
			`for _data in batch_data:`
			`self.insertone(data=_data)`
			`log.color_print(f"Successfully insert {len(datas)} data")`
			`except Exception as e:`
			`log.critical(f"fail to insert data, error info: {e}")`
			`raise`

			`def search_data(`
			`self,`
			`collection: Optional[str],`
			`vector: Union[np.array, List[float]],`
			`top_k: int = 5,`
			`*args,`
			`**kwargs,`
			`) -> List[RetrievalResult]:`
			`"""`
			`Search for similar vectors in a collection.`

			`Args:`
			`collection (Optional[str]): Collection name. If None, uses default_collection.`
			`vector (Union[np.array, List[float]]): Query vector for similarity search.`
			`top_k (int, optional): Number of results to return. Defaults to 5.`
			`*args: Variable length argument list.`
			`**kwargs: Arbitrary keyword arguments.`

			`Returns:`
			`List[RetrievalResult]: List of retrieval results containing similar vectors.`

			`Raises:`
			`Exception: If there's an error during search.`
			`"""`
			`if not collection:`
			`collection = self.default_collection`
			`try:`
			`# print("def search_data:",collection)`
			`# print("def search_data:",type(vector))`
			`search_results = self.searchone(collection=collection, vector=vector, top_k=top_k)`
			`# print("def search_data: search_results",search_results)`

			`return [`
			`RetrievalResult(`
			`embedding=b["embedding"],`
			`text=b["text"],`
			`reference=b["reference"],`
			`score=b["distance"],`
			`metadata=json.loads(b["metadata"]),`
			`)`
			`for b in search_results`
			`]`
			`except Exception as e:`
			`log.critical(f"fail to search data, error info: {e}")`
			`raise`
			`# return []`

			`def list_collections(self, args, *kwargs) -> List[CollectionInfo]:`
			`"""`
			`List all collections in the database.`

			`Args:`
			`*args: Variable length argument list.`
			`**kwargs: Arbitrary keyword arguments.`

			`Returns:`
			`List[CollectionInfo]: List of collection information objects.`
			`"""`
			`collection_infos = []`
			`try:`
			`SQL = SQL_TEMPLATES["list_collections"]`
			`log.debug("def list_collections:" + SQL)`
			`collections = self.query(SQL)`
			`if collections:`
			`for collection in collections:`
			`collection_infos.append(`
			`CollectionInfo(`
			`collection_name=collection["collection"],`
			`description=collection["description"],`
			`)`
			`)`
			`return collection_infos`
			`except Exception as e:`
			`log.critical(f"fail to list collections, error info: {e}")`
			`raise`

			`def clear_db(self, collection: str = "deepsearcher", args, *kwargs):`
			`"""`
			`Clear (drop) a collection from the database.`

			`Args:`
			`collection (str, optional): Collection name to drop. Defaults to "deepsearcher".`
			`*args: Variable length argument list.`
			`**kwargs: Arbitrary keyword arguments.`
			`"""`
			`if not collection:`
			`collection = self.default_collection`
			`try:`
			`self.client.drop_collection(collection)`
			`except Exception as e:`
			`log.warning(f"fail to clear db, error info: {e}")`
			`raise`


			`TABLES = {`
			`"DEEPSEARCHER_COLLECTION_INFO": """CREATE TABLE DEEPSEARCHER_COLLECTION_INFO (`
			`id INT generated by default as identity primary key,`
			`collection varchar(256),`
			`description CLOB,`
			`status NUMBER DEFAULT 1,`
			`createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,`
			`updatetime TIMESTAMP DEFAULT NULL)""",`
			`"DEEPSEARCHER_COLLECTION_ITEM": """CREATE TABLE DEEPSEARCHER_COLLECTION_ITEM (`
			`id INT generated by default as identity primary key,`
			`collection varchar(256),`
			`embedding VECTOR,`
			`text CLOB,`
			`reference varchar(4000),`
			`metadata CLOB,`
			`status NUMBER DEFAULT 1,`
			`createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,`
			`updatetime TIMESTAMP DEFAULT NULL)""",`
			`}`

			`SQL_TEMPLATES = {`
			`"has_table": f"""SELECT table_name FROM all_tables`
			`WHERE table_name in ({",".join([f"'{k}'" for k in TABLES.keys()])})""",`
			`"has_collection": "select count(*) as rowcnt from DEEPSEARCHER_COLLECTION_INFO where collection=:collection and status=1",`
			`"list_collections": "select collection,description from DEEPSEARCHER_COLLECTION_INFO where status=1",`
			`"drop_collection": "update DEEPSEARCHER_COLLECTION_INFO set status=0 where collection=:collection and status=1",`
			`"drop_collection_item": "update DEEPSEARCHER_COLLECTION_ITEM set status=0 where collection=:collection and status=1",`
			`"insert_collection": """INSERT INTO DEEPSEARCHER_COLLECTION_INFO (collection,description)`
			`values (:collection,:description)""",`
			`"insert": """INSERT INTO DEEPSEARCHER_COLLECTION_ITEM (collection,embedding,text,reference,metadata)`
			`values (:collection,:embedding,:text,:reference,:metadata)""",`
			`"search": """SELECT * FROM`
			`(SELECT t.*,`
			`VECTOR_DISTANCE(t.embedding,vector(:embedding_string,{dimension},{dtype}),COSINE) as distance`
			`FROM DEEPSEARCHER_COLLECTION_ITEM t`
			`JOIN DEEPSEARCHER_COLLECTION_INFO c ON t.collection=c.collection`
			`WHERE t.collection=:collection AND t.status=1 AND c.status=1)`
			`WHERE distance<:max_distance ORDER BY distance ASC FETCH FIRST :top_k ROWS ONLY""",`
			`}`