import array
import json
from typing import List, Optional, Union

import numpy as np

from deepsearcher.loader.splitter import Chunk
from deepsearcher.utils import log
from deepsearcher.vector_db.base import BaseVectorDB, CollectionInfo, RetrievalResult


class OracleDB(BaseVectorDB):
    """OracleDB class is a subclass of DB class."""

    client = None

    def __init__(
        self,
        user: str,
        password: str,
        dsn: str,
        config_dir: str,
        wallet_location: str,
        wallet_password: str,
        min: int = 1,
        max: int = 10,
        increment: int = 1,
        default_collection: str = "deepsearcher",
    ):
        """
        Initialize the Oracle database connection.

        Args:
            user (str): Oracle database username.
            password (str): Oracle database password.
            dsn (str): Oracle database connection string.
            config_dir (str): Directory containing Oracle configuration files.
            wallet_location (str): Location of the Oracle wallet.
            wallet_password (str): Password for the Oracle wallet.
            min (int, optional): Minimum number of connections in the pool. Defaults to 1.
            max (int, optional): Maximum number of connections in the pool. Defaults to 10.
            increment (int, optional): Increment for adding new connections. Defaults to 1.
            default_collection (str, optional): Default collection name. Defaults to "deepsearcher".
        """
        super().__init__(default_collection)
        self.default_collection = default_collection

        import oracledb

        oracledb.defaults.fetch_lobs = False
        self.DB_TYPE_VECTOR = oracledb.DB_TYPE_VECTOR

        try:
            self.client = oracledb.create_pool(
                user=user,
                password=password,
                dsn=dsn,
                config_dir=config_dir,
                wallet_location=wallet_location,
                wallet_password=wallet_password,
                min=min,
                max=max,
                increment=increment,
            )
            log.color_print(f"Connected to Oracle database at {dsn}")
            self.check_table()
        except Exception as e:
            log.critical(f"Failed to connect to Oracle database at {dsn}")
            log.critical(f"Oracle database error in init: {e}")
            raise

    def numpy_converter_in(self, value):
        """Convert numpy array to array.array"""
        if value.dtype == np.float64:
            dtype = "d"
        elif value.dtype == np.float32:
            dtype = "f"
        else:
            dtype = "b"
        return array.array(dtype, value)

    def input_type_handler(self, cursor, value, arraysize):
        """Set the type handler for the input data"""
        if isinstance(value, np.ndarray):
            return cursor.var(
                self.DB_TYPE_VECTOR,
                arraysize=arraysize,
                inconverter=self.numpy_converter_in,
            )

    def numpy_converter_out(self, value):
        """Convert array.array to numpy array"""
        if value.typecode == "b":
            dtype = np.int8
        elif value.typecode == "f":
            dtype = np.float32
        else:
            dtype = np.float64
        return np.array(value, copy=False, dtype=dtype)

    def output_type_handler(self, cursor, metadata):
        """Set the type handler for the output data"""
        if metadata.type_code is self.DB_TYPE_VECTOR:
            return cursor.var(
                metadata.type_code,
                arraysize=cursor.arraysize,
                outconverter=self.numpy_converter_out,
            )

    def query(self, sql: str, params: dict = None) -> Union[dict, None]:
        """
        Execute a SQL query and return the results.

        Args:
            sql (str): SQL query to execute.
            params (dict, optional): Parameters for the SQL query. Defaults to None.

        Returns:
            Union[dict, None]: Query results as a dictionary or None if no results.

        Raises:
            Exception: If there's an error executing the query.
        """
        with self.client.acquire() as connection:
            connection.inputtypehandler = self.input_type_handler
            connection.outputtypehandler = self.output_type_handler
            with connection.cursor() as cursor:
                try:
                    if log.dev_mode:
                        print("sql:\n", sql)
                    # log.debug("def query:"+params)
                    # print("sql:\n",sql)
                    # print("params:\n",params)
                    cursor.execute(sql, params)
                except Exception as e:
                    log.critical(f"Oracle database error in query: {e}")
                    raise
                columns = [column[0].lower() for column in cursor.description]
                rows = cursor.fetchall()
                if rows:
                    data = [dict(zip(columns, row)) for row in rows]
                else:
                    data = []
                if log.dev_mode:
                    print("data:\n", data)
                return data
            # self.client.drop(connection)

    def execute(self, sql: str, data: Union[list, dict] = None):
        """
        Execute a SQL statement without returning results.

        Args:
            sql (str): SQL statement to execute.
            data (Union[list, dict], optional): Data for the SQL statement. Defaults to None.

        Raises:
            Exception: If there's an error executing the statement.
        """
        try:
            with self.client.acquire() as connection:
                connection.inputtypehandler = self.input_type_handler
                connection.outputtypehandler = self.output_type_handler
                with connection.cursor() as cursor:
                    # print("sql:\n",sql)
                    # print("data:\n",data)
                    if data is None:
                        cursor.execute(sql)
                    else:
                        cursor.execute(sql, data)
                    connection.commit()
        except Exception as e:
            log.critical(f"Oracle database error in execute: {e}")
            log.error("ERROR sql:\n" + sql)
            log.error("ERROR data:\n" + data)
            raise

    def has_collection(self, collection: str = "deepsearcher"):
        """
        Check if a collection exists in the database.

        Args:
            collection (str, optional): Collection name to check. Defaults to "deepsearcher".

        Returns:
            bool: True if the collection exists, False otherwise.
        """
        SQL = SQL_TEMPLATES["has_collection"]
        params = {"collection": collection}
        res = self.query(SQL, params)
        if res:
            if res[0]["rowcnt"] > 0:
                return True
            else:
                return False
        else:
            return False

    def check_table(self):
        """
        Check if required tables exist and create them if they don't.

        Raises:
            Exception: If there's an error checking or creating tables.
        """
        SQL = SQL_TEMPLATES["has_table"]
        try:
            res = self.query(SQL)
            if len(res) < 2:
                missing_table = TABLES.keys() - set([i["table_name"] for i in res])
                for table in missing_table:
                    self.create_tables(table)
        except Exception as e:
            log.critical(f"Failed to check table in Oracle database, error info: {e}")
            raise

    def create_tables(self, table_name):
        """
        Create a table in the database.

        Args:
            table_name: Name of the table to create.

        Raises:
            Exception: If there's an error creating the table.
        """
        SQL = TABLES[table_name]
        try:
            self.execute(SQL)
            log.color_print(f"Created table {table_name} in Oracle database")
        except Exception as e:
            log.critical(f"Failed to create table {table_name} in Oracle database, error info: {e}")
            raise

    def drop_collection(self, collection: str = "deepsearcher"):
        """
        Drop a collection from the database.

        Args:
            collection (str, optional): Collection name to drop. Defaults to "deepsearcher".

        Raises:
            Exception: If there's an error dropping the collection.
        """
        try:
            params = {"collection": collection}
            SQL = SQL_TEMPLATES["drop_collection"]
            self.execute(SQL, params)

            SQL = SQL_TEMPLATES["drop_collection_item"]
            self.execute(SQL, params)
            log.color_print(f"Collection {collection} dropped")
        except Exception as e:
            log.critical(f"fail to drop collection, error info: {e}")
            raise

    def insertone(self, data):
        """
        Insert a single record into the database.

        Args:
            data: Data to insert.
        """
        SQL = SQL_TEMPLATES["insert"]
        self.execute(SQL, data)
        log.debug("insert done!")

    def searchone(
        self,
        collection: Optional[str],
        vector: Union[np.array, List[float]],
        top_k: int = 5,
    ):
        """
        Search for similar vectors in a collection.

        Args:
            collection (Optional[str]): Collection name to search in.
            vector (Union[np.array, List[float]]): Query vector for similarity search.
            top_k (int, optional): Number of results to return. Defaults to 5.

        Returns:
            list: List of search results.

        Raises:
            Exception: If there's an error during search.
        """
        log.debug("def searchone:" + collection)
        try:
            if isinstance(vector, List):
                vector = np.array(vector)
            embedding_string = "[" + ", ".join(map(str, vector.tolist())) + "]"
            dimension = vector.shape[0]
            dtype = str(vector.dtype).upper()

            SQL = SQL_TEMPLATES["search"].format(dimension=dimension, dtype=dtype)
            max_distance = 0.8
            params = {
                "collection": collection,
                "embedding_string": embedding_string,
                "top_k": top_k,
                "max_distance": max_distance,
            }
            res = self.query(SQL, params)
            if res:
                return res
            else:
                return []
        except Exception as e:
            log.critical(f"fail to search data, error info: {e}")
            raise

    def init_collection(
        self,
        dim: int,
        collection: Optional[str] = "deepsearcher",
        description: Optional[str] = "",
        force_new_collection: bool = False,
        text_max_length: int = 65_535,
        reference_max_length: int = 2048,
        metric_type: str = "L2",
        *args,
        **kwargs,
    ):
        """
        Initialize a collection in the database.

        Args:
            dim (int): Dimension of the vector embeddings.
            collection (Optional[str], optional): Collection name. Defaults to "deepsearcher".
            description (Optional[str], optional): Collection description. Defaults to "".
            force_new_collection (bool, optional): Whether to force create a new collection if it already exists. Defaults to False.
            text_max_length (int, optional): Maximum length for text field. Defaults to 65_535.
            reference_max_length (int, optional): Maximum length for reference field. Defaults to 2048.
            metric_type (str, optional): Metric type for vector similarity search. Defaults to "L2".
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Raises:
            Exception: If there's an error initializing the collection.
        """
        if not collection:
            collection = self.default_collection
        if description is None:
            description = ""
        try:
            has_collection = self.has_collection(collection)
            if force_new_collection and has_collection:
                self.drop_collection(collection)
            elif has_collection:
                return
            # insert collection info
            SQL = SQL_TEMPLATES["insert_collection"]
            params = {"collection": collection, "description": description}
            self.execute(SQL, params)
        except Exception as e:
            log.critical(f"fail to init_collection for oracle, error info: {e}")

    def insert_data(
        self,
        collection: Optional[str],
        chunks: List[Chunk],
        batch_size: int = 256,
        *args,
        **kwargs,
    ):
        """
        Insert data into a collection.

        Args:
            collection (Optional[str]): Collection name. If None, uses default_collection.
            chunks (List[Chunk]): List of Chunk objects to insert.
            batch_size (int, optional): Number of chunks to insert in each batch. Defaults to 256.
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Raises:
            Exception: If there's an error inserting data.
        """
        if not collection:
            collection = self.default_collection

        datas = []
        for chunk in chunks:
            _data = {
                "embedding": self.numpy_converter_in(np.array(chunk.embedding)),
                "text": chunk.text,
                "reference": chunk.reference,
                "metadata": json.dumps(chunk.metadata),
                "collection": collection,
            }
            datas.append(_data)

        batch_datas = [datas[i : i + batch_size] for i in range(0, len(datas), batch_size)]
        try:
            for batch_data in batch_datas:
                for _data in batch_data:
                    self.insertone(data=_data)
            log.color_print(f"Successfully insert {len(datas)} data")
        except Exception as e:
            log.critical(f"fail to insert data, error info: {e}")
            raise

    def search_data(
        self,
        collection: Optional[str],
        vector: Union[np.array, List[float]],
        top_k: int = 5,
        *args,
        **kwargs,
    ) -> List[RetrievalResult]:
        """
        Search for similar vectors in a collection.

        Args:
            collection (Optional[str]): Collection name. If None, uses default_collection.
            vector (Union[np.array, List[float]]): Query vector for similarity search.
            top_k (int, optional): Number of results to return. Defaults to 5.
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Returns:
            List[RetrievalResult]: List of retrieval results containing similar vectors.

        Raises:
            Exception: If there's an error during search.
        """
        if not collection:
            collection = self.default_collection
        try:
            # print("def search_data:",collection)
            # print("def search_data:",type(vector))
            search_results = self.searchone(collection=collection, vector=vector, top_k=top_k)
            # print("def search_data: search_results",search_results)

            return [
                RetrievalResult(
                    embedding=b["embedding"],
                    text=b["text"],
                    reference=b["reference"],
                    score=b["distance"],
                    metadata=json.loads(b["metadata"]),
                )
                for b in search_results
            ]
        except Exception as e:
            log.critical(f"fail to search data, error info: {e}")
            raise
            # return []

    def list_collections(self, *args, **kwargs) -> List[CollectionInfo]:
        """
        List all collections in the database.

        Args:
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.

        Returns:
            List[CollectionInfo]: List of collection information objects.
        """
        collection_infos = []
        try:
            SQL = SQL_TEMPLATES["list_collections"]
            log.debug("def list_collections:" + SQL)
            collections = self.query(SQL)
            if collections:
                for collection in collections:
                    collection_infos.append(
                        CollectionInfo(
                            collection_name=collection["collection"],
                            description=collection["description"],
                        )
                    )
            return collection_infos
        except Exception as e:
            log.critical(f"fail to list collections, error info: {e}")
            raise

    def clear_db(self, collection: str = "deepsearcher", *args, **kwargs):
        """
        Clear (drop) a collection from the database.

        Args:
            collection (str, optional): Collection name to drop. Defaults to "deepsearcher".
            *args: Variable length argument list.
            **kwargs: Arbitrary keyword arguments.
        """
        if not collection:
            collection = self.default_collection
        try:
            self.client.drop_collection(collection)
        except Exception as e:
            log.warning(f"fail to clear db, error info: {e}")
            raise


TABLES = {
    "DEEPSEARCHER_COLLECTION_INFO": """CREATE TABLE DEEPSEARCHER_COLLECTION_INFO (    
        id INT generated by default as identity primary key,
        collection varchar(256),
        description CLOB,
        status NUMBER DEFAULT 1,
        createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updatetime TIMESTAMP DEFAULT NULL)""",
    "DEEPSEARCHER_COLLECTION_ITEM": """CREATE TABLE DEEPSEARCHER_COLLECTION_ITEM (    
        id INT generated by default as identity primary key,
        collection varchar(256),
        embedding VECTOR,
        text CLOB,
        reference varchar(4000),
        metadata CLOB,
        status NUMBER DEFAULT 1,
        createtime TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
        updatetime TIMESTAMP DEFAULT NULL)""",
}

SQL_TEMPLATES = {
    "has_table": f"""SELECT table_name FROM all_tables 
        WHERE table_name in ({",".join([f"'{k}'" for k in TABLES.keys()])})""",
    "has_collection": "select count(*) as rowcnt from DEEPSEARCHER_COLLECTION_INFO where collection=:collection and status=1",
    "list_collections": "select collection,description from DEEPSEARCHER_COLLECTION_INFO where status=1",
    "drop_collection": "update DEEPSEARCHER_COLLECTION_INFO set status=0 where collection=:collection and status=1",
    "drop_collection_item": "update DEEPSEARCHER_COLLECTION_ITEM set status=0 where collection=:collection and status=1",
    "insert_collection": """INSERT INTO DEEPSEARCHER_COLLECTION_INFO (collection,description) 
        values (:collection,:description)""",
    "insert": """INSERT INTO DEEPSEARCHER_COLLECTION_ITEM (collection,embedding,text,reference,metadata) 
        values (:collection,:embedding,:text,:reference,:metadata)""",
    "search": """SELECT * FROM 
        (SELECT t.*,
            VECTOR_DISTANCE(t.embedding,vector(:embedding_string,{dimension},{dtype}),COSINE) as distance
        FROM DEEPSEARCHER_COLLECTION_ITEM t 
        JOIN DEEPSEARCHER_COLLECTION_INFO c ON t.collection=c.collection 
        WHERE t.collection=:collection AND t.status=1 AND c.status=1)
        WHERE distance<:max_distance ORDER BY distance ASC FETCH FIRST :top_k ROWS ONLY""",
}