"""Database Backend Abstraction Layer — SQLite/PostgreSQL."""

from __future__ import annotations

import os
import re
import sqlite3
import logging
from enum import Enum
from typing import Any, List, Optional, Tuple
from contextlib import contextmanager
from urllib.parse import urlparse

logger = logging.getLogger(__name__)

# Try to import psycopg2, but don't fail if not installed
try:
    import psycopg2
    import psycopg2.extras
    import psycopg2.pool

    PSYCOPG2_AVAILABLE = True
except ImportError:
    PSYCOPG2_AVAILABLE = False
    psycopg2 = None


class BackendType(Enum):
    """Database backend types."""

    SQLITE = "sqlite"
    POSTGRESQL = "postgresql"


def detect_backend() -> BackendType:
    """Detect database backend from environment variables."""
    database_url = os.getenv("DATABASE_URL", "") or os.getenv("CRYSTALLISE_DATABASE_URL", "")
    if database_url.startswith(("postgres://", "postgresql://")):
        if not PSYCOPG2_AVAILABLE:
            raise RuntimeError(
                "DATABASE_URL points to PostgreSQL but psycopg2 is not installed. "
                "Install with: pip install psycopg2-binary"
            )
        return BackendType.POSTGRESQL
    return BackendType.SQLITE


def convert_query_placeholders(query: str, target_backend: BackendType) -> str:
    """
    Convert query placeholders and SQLite-specific functions to target backend format.

    Handles:
    - ? → %s placeholder conversion
    - SQLite datetime() function → PostgreSQL equivalents
    - INSERT OR REPLACE → PostgreSQL upsert syntax

    Args:
        query: SQL query string with ? placeholders
        target_backend: Target backend type

    Returns:
        Converted query string
    """
    if target_backend == BackendType.SQLITE:
        return query

    if target_backend == BackendType.POSTGRESQL:
        # Convert SQLite-specific INSERT OR REPLACE → PostgreSQL upsert syntax.
        # Handle the settings table specifically with its known conflict columns.
        settings_replaced, n = re.subn(
            r"\bINSERT\s+OR\s+REPLACE\s+INTO\s+settings\b", "INSERT INTO settings", query, flags=re.IGNORECASE
        )
        if n > 0:
            # Was an INSERT OR REPLACE INTO settings — add upsert clause
            query = (
                settings_replaced.rstrip().rstrip(";")
                + " ON CONFLICT (project_id, setting_key) DO UPDATE SET setting_value = EXCLUDED.setting_value"
            )
        else:
            query = settings_replaced

        # Handle screening_jobs upsert (conflict on primary key 'id')
        screening_replaced, n = re.subn(
            r"\bINSERT\s+OR\s+REPLACE\s+INTO\s+screening_jobs\b",
            "INSERT INTO screening_jobs",
            query,
            flags=re.IGNORECASE,
        )
        if n > 0:
            query = (
                screening_replaced.rstrip().rstrip(";")
                + " ON CONFLICT (id) DO UPDATE SET "
                + "project_id = EXCLUDED.project_id, "
                + "status = EXCLUDED.status, progress = EXCLUDED.progress, "
                + "stage = EXCLUDED.stage, config = EXCLUDED.config, "
                + "results = EXCLUDED.results, clusters = EXCLUDED.clusters, "
                + "stage_timings = EXCLUDED.stage_timings, duration_ms = EXCLUDED.duration_ms, "
                + "estimated_cost_usd = EXCLUDED.estimated_cost_usd, error = EXCLUDED.error, "
                + "error_category = EXCLUDED.error_category, error_retryable = EXCLUDED.error_retryable, "
                + "model_version = EXCLUDED.model_version, "
                + "completed_at = EXCLUDED.completed_at"
            )
        else:
            query = screening_replaced

        # Handle indexer_jobs upsert (conflict on primary key 'id')
        indexer_replaced, n = re.subn(
            r"\bINSERT\s+OR\s+REPLACE\s+INTO\s+indexer_jobs\b",
            "INSERT INTO indexer_jobs",
            query,
            flags=re.IGNORECASE,
        )
        if n > 0:
            query = (
                indexer_replaced.rstrip().rstrip(";")
                + " ON CONFLICT (id) DO UPDATE SET "
                + "status = EXCLUDED.status, progress = EXCLUDED.progress, "
                + "config = EXCLUDED.config, results = EXCLUDED.results, "
                + "errors = EXCLUDED.errors, usage = EXCLUDED.usage, "
                + "error = EXCLUDED.error, error_category = EXCLUDED.error_category, "
                + "error_retryable = EXCLUDED.error_retryable, "
                + "model_version = EXCLUDED.model_version, "
                + "completed_at = EXCLUDED.completed_at, duration_ms = EXCLUDED.duration_ms, "
                + "estimated_cost_usd = EXCLUDED.estimated_cost_usd"
            )
        else:
            query = indexer_replaced

        # Generic fallback for any other INSERT OR REPLACE (strips to plain INSERT)
        query = re.sub(r"\bINSERT\s+OR\s+REPLACE\s+INTO\b", "INSERT INTO", query, flags=re.IGNORECASE)

        # Convert SQLite datetime() function calls before placeholder substitution.
        # Order matters: most-specific patterns first.

        # datetime('now', '-N unit') → NOW() - INTERVAL 'N unit'
        query = re.sub(
            r"datetime\(\s*'now'\s*,\s*'-(\d+)\s+(\w+)'\s*\)", r"NOW() - INTERVAL '\1 \2'", query, flags=re.IGNORECASE
        )
        # datetime('now') → CURRENT_TIMESTAMP
        query = re.sub(r"datetime\(\s*'now'\s*\)", "CURRENT_TIMESTAMP", query, flags=re.IGNORECASE)
        # datetime(column) → column::timestamp  (text column storing ISO datetime strings)
        query = re.sub(r"datetime\((\w+)\)", r"\1::timestamp", query, flags=re.IGNORECASE)

        # SQLite ? → PostgreSQL %s
        # Replace ? not inside string literals
        result = []
        in_string = False
        string_char = None
        i = 0
        while i < len(query):
            char = query[i]
            if char in ("'", '"') and (i == 0 or query[i - 1] != "\\"):
                if not in_string:
                    in_string = True
                    string_char = char
                elif char == string_char:
                    in_string = False
            elif char == "?" and not in_string:
                result.append("%s")
                i += 1
                continue
            result.append(char)
            i += 1
        return "".join(result)

    return query


def convert_schema_syntax(schema: str, to_backend: BackendType) -> str:
    """
    Convert SQLite schema syntax to PostgreSQL (or vice versa).

    Args:
        schema: SQL schema string (CREATE TABLE statements)
        to_backend: Target backend type

    Returns:
        Converted schema string
    """
    if to_backend == BackendType.POSTGRESQL:
        # SQLite → PostgreSQL conversions
        result = schema

        # INTEGER PRIMARY KEY AUTOINCREMENT → SERIAL PRIMARY KEY
        result = re.sub(r"INTEGER\s+PRIMARY\s+KEY\s+AUTOINCREMENT", "SERIAL PRIMARY KEY", result, flags=re.IGNORECASE)

        # INTEGER PRIMARY KEY (without AUTOINCREMENT) → SERIAL PRIMARY KEY
        # Only for id columns, be careful not to affect foreign keys
        result = re.sub(
            r"(\bid\b\s+)INTEGER\s+PRIMARY\s+KEY(?!\s+AUTOINCREMENT)",
            r"\1SERIAL PRIMARY KEY",
            result,
            flags=re.IGNORECASE,
        )

        # DATETIME → TIMESTAMP
        result = re.sub(r"\bDATETIME\b", "TIMESTAMP", result, flags=re.IGNORECASE)

        # datetime('now') → CURRENT_TIMESTAMP (SQLite-specific syntax)
        result = re.sub(r"\(datetime\('now'\)\)", "CURRENT_TIMESTAMP", result, flags=re.IGNORECASE)
        result = re.sub(r"datetime\('now'\)", "CURRENT_TIMESTAMP", result, flags=re.IGNORECASE)

        # INSERT OR IGNORE → INSERT ... ON CONFLICT DO NOTHING
        result = re.sub(r"INSERT\s+OR\s+IGNORE\s+INTO", "INSERT INTO", result, flags=re.IGNORECASE)

        return result

    elif to_backend == BackendType.SQLITE:
        # PostgreSQL → SQLite conversions
        result = schema

        # SERIAL PRIMARY KEY → INTEGER PRIMARY KEY AUTOINCREMENT
        result = re.sub(r"SERIAL\s+PRIMARY\s+KEY", "INTEGER PRIMARY KEY AUTOINCREMENT", result, flags=re.IGNORECASE)

        return result

    return schema


class PostgresConnectionWrapper:
    """
    Wrapper for psycopg2 connection to provide SQLite-like execute() API.

    This allows code written for SQLite (conn.execute(...)) to work with PostgreSQL.
    """

    def __init__(self, conn):
        self._conn = conn
        self._conn.cursor_factory = psycopg2.extras.DictCursor

    def execute(self, query: str, params: tuple = None):
        """Execute a query, creating a cursor automatically like SQLite."""
        # Skip PRAGMA statements (SQLite-specific)
        if query.strip().upper().startswith("PRAGMA"):
            return self._conn.cursor()

        # Convert ? placeholders to %s for PostgreSQL
        converted_query = convert_query_placeholders(query, BackendType.POSTGRESQL)

        cursor = self._conn.cursor()
        if params:
            cursor.execute(converted_query, params)
        else:
            cursor.execute(converted_query)
        return cursor

    def executemany(self, query: str, params_list):
        """Execute a query multiple times with different parameters."""
        converted_query = convert_query_placeholders(query, BackendType.POSTGRESQL)
        cursor = self._conn.cursor()
        cursor.executemany(converted_query, params_list)
        return cursor

    def cursor(self):
        """Get a cursor."""
        return self._conn.cursor()

    def commit(self):
        """Commit the transaction."""
        self._conn.commit()

    def rollback(self):
        """Rollback the transaction."""
        self._conn.rollback()

    def close(self):
        """Note: For pooled connections, don't actually close."""
        pass

    @property
    def raw_connection(self):
        """Access the underlying psycopg2 connection."""
        return self._conn


class DatabaseBackend:
    """
    Abstract database backend providing unified interface for SQLite and PostgreSQL.
    """

    def __init__(
        self,
        backend_type: Optional[BackendType] = None,
        connection_string: Optional[str] = None,
        db_path: Optional[str] = None,
        database_url: Optional[str] = None,
        pool_size: int = 5,
    ):
        """
        Initialize database backend.

        Args:
            backend_type: Backend type (auto-detected if not provided)
            connection_string: Database connection string (legacy, use db_path or database_url)
            db_path: SQLite database path (e.g., "data/screener.db" or ":memory:")
            database_url: PostgreSQL connection URL (postgres://...)
            pool_size: Connection pool size (PostgreSQL only)
        """
        # Resolve database_url from parameter or environment
        resolved_database_url = (
            database_url or os.getenv("DATABASE_URL", "") or os.getenv("CRYSTALLISE_DATABASE_URL", "")
        )

        # Determine backend type
        if backend_type:
            self.backend_type = backend_type
        elif resolved_database_url.startswith(("postgres://", "postgresql://")):
            if not PSYCOPG2_AVAILABLE:
                raise RuntimeError(
                    "DATABASE_URL points to PostgreSQL but psycopg2 is not installed. "
                    "Install with: pip install psycopg2-binary"
                )
            self.backend_type = BackendType.POSTGRESQL
        else:
            self.backend_type = BackendType.SQLITE

        self.pool_size = pool_size
        self._pool = None
        self._keeper_conn = None  # For in-memory SQLite
        self._memory_db_id = None

        # Resolve connection string
        if self.backend_type == BackendType.POSTGRESQL:
            self.connection_string = connection_string or resolved_database_url
            if not self.connection_string:
                raise ValueError("DATABASE_URL environment variable not set")
        else:
            # SQLite: use db_path, connection_string, or default
            self.connection_string = db_path or connection_string or "data/screener.db"

        # Initialize connection pool for PostgreSQL
        if self.backend_type == BackendType.POSTGRESQL:
            self._init_pool()

    def _init_pool(self):
        """Initialize PostgreSQL connection pool."""
        if not PSYCOPG2_AVAILABLE:
            raise RuntimeError("psycopg2 is not installed")

        # Parse DATABASE_URL
        url = urlparse(self.connection_string)

        self._pool = psycopg2.pool.ThreadedConnectionPool(
            minconn=1,
            maxconn=self.pool_size,
            host=url.hostname,
            port=url.port or 5432,
            database=url.path.lstrip("/"),
            user=url.username,
            password=url.password,
        )
        logger.info(f"PostgreSQL connection pool initialized (size={self.pool_size})")

    def is_sqlite(self) -> bool:
        """Check if using SQLite backend."""
        return self.backend_type == BackendType.SQLITE

    def is_postgresql(self) -> bool:
        """Check if using PostgreSQL backend."""
        return self.backend_type == BackendType.POSTGRESQL

    @contextmanager
    def get_connection(self):
        """
        Get a database connection (context manager).

        For SQLite: Creates a new connection each time
        For PostgreSQL: Gets connection from pool, wrapped for SQLite-like API

        Yields:
            Database connection (PostgresConnectionWrapper for PostgreSQL)
        """
        if self.backend_type == BackendType.SQLITE:
            # Handle in-memory databases
            if self.connection_string == ":memory:" or self.connection_string.startswith("file:"):
                if self._memory_db_id is None:
                    import uuid

                    self._memory_db_id = uuid.uuid4().hex
                    conn_str = f"file:{self._memory_db_id}?mode=memory&cache=shared"
                    self._keeper_conn = sqlite3.connect(conn_str, uri=True)
                conn = sqlite3.connect(f"file:{self._memory_db_id}?mode=memory&cache=shared", uri=True)
            else:
                conn = sqlite3.connect(self.connection_string)

            conn.row_factory = sqlite3.Row
            conn.execute("PRAGMA foreign_keys = ON")

            try:
                yield conn
                conn.commit()
            except Exception:
                conn.rollback()
                raise
            finally:
                conn.close()

        else:  # PostgreSQL
            raw_conn = self._pool.getconn()
            conn = PostgresConnectionWrapper(raw_conn)
            try:
                yield conn
                raw_conn.commit()
            except Exception:
                raw_conn.rollback()
                raise
            finally:
                self._pool.putconn(raw_conn)

    def execute(self, conn, query: str, params: Optional[Tuple] = None, returning: bool = False) -> Any:
        """
        Execute a query with automatic placeholder conversion.

        Args:
            conn: Database connection
            query: SQL query (can use either ? or %s placeholders)
            params: Query parameters
            returning: If True and INSERT, add RETURNING id (PostgreSQL)

        Returns:
            Cursor object
        """
        # Convert placeholders if needed
        converted_query = convert_query_placeholders(query, self.backend_type)

        # Add RETURNING clause for PostgreSQL INSERTs
        if returning and self.backend_type == BackendType.POSTGRESQL:
            if "INSERT INTO" in converted_query.upper() and "RETURNING" not in converted_query.upper():
                converted_query = converted_query.rstrip(";") + " RETURNING id"

        if self.backend_type == BackendType.SQLITE:
            if params:
                return conn.execute(converted_query, params)
            return conn.execute(converted_query)
        else:
            cursor = conn.cursor()
            if params:
                cursor.execute(converted_query, params)
            else:
                cursor.execute(converted_query)
            return cursor

    def executemany(self, conn, query: str, params_list: List[Tuple]) -> Any:
        """Execute a query multiple times with different parameters."""
        converted_query = convert_query_placeholders(query, self.backend_type)

        if self.backend_type == BackendType.SQLITE:
            return conn.executemany(converted_query, params_list)
        else:
            cursor = conn.cursor()
            cursor.executemany(converted_query, params_list)
            return cursor

    def executescript(self, conn, script: str):
        """
        Execute multiple SQL statements.

        For SQLite: Uses executescript()
        For PostgreSQL: Splits and executes individually
        """
        if self.backend_type == BackendType.SQLITE:
            conn.executescript(script)
        else:
            # PostgreSQL: Split statements and execute
            # Convert schema syntax first
            converted_script = convert_schema_syntax(script, BackendType.POSTGRESQL)

            # Split by semicolons (simple approach - may need refinement for complex cases)
            statements = [s.strip() for s in converted_script.split(";") if s.strip()]

            cursor = conn.cursor()
            for statement in statements:
                if statement:
                    try:
                        # Handle INSERT ... that was originally INSERT OR IGNORE
                        # Add ON CONFLICT DO NOTHING for settings table (has UNIQUE constraint)
                        if re.match(r"INSERT\s+INTO\s+settings\s*\(", statement, re.IGNORECASE):
                            if "ON CONFLICT" not in statement.upper():
                                statement = statement.rstrip() + " ON CONFLICT (project_id, setting_key) DO NOTHING"

                        cursor.execute(statement)
                    except Exception as e:
                        logger.warning(f"Statement failed: {statement[:100]}... Error: {e}")
                        raise

    def get_lastrowid(self, cursor) -> Optional[int]:
        """
        Get the last inserted row ID.

        For SQLite: Returns cursor.lastrowid
        For PostgreSQL: Fetches from RETURNING clause (query must include RETURNING)

        Args:
            cursor: Database cursor after INSERT with RETURNING (PostgreSQL)

        Returns:
            Last inserted row ID
        """
        if self.backend_type == BackendType.SQLITE:
            return cursor.lastrowid
        else:
            row = cursor.fetchone()
            if row:
                return row["id"] if isinstance(row, dict) else row[0]
            return None

    def table_exists(self, conn, table_name: str) -> bool:
        """Check if a table exists."""
        if self.backend_type == BackendType.SQLITE:
            cursor = conn.execute("SELECT name FROM sqlite_master WHERE type='table' AND name=?", (table_name,))
        else:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT table_name FROM information_schema.tables WHERE table_schema = 'public' AND table_name = %s",
                (table_name,),
            )
        return cursor.fetchone() is not None

    def get_table_columns(self, conn, table_name: str) -> List[str]:
        """Get column names for a table."""
        if self.backend_type == BackendType.SQLITE:
            cursor = conn.execute(f"PRAGMA table_info({table_name})")
            return [row[1] for row in cursor.fetchall()]
        else:
            cursor = conn.cursor()
            cursor.execute(
                "SELECT column_name FROM information_schema.columns "
                "WHERE table_schema = 'public' AND table_name = %s "
                "ORDER BY ordinal_position",
                (table_name,),
            )
            return [row["column_name"] for row in cursor.fetchall()]

    def close(self):
        """Close connections and cleanup."""
        if self._keeper_conn:
            self._keeper_conn.close()
            self._keeper_conn = None

        if self._pool:
            self._pool.closeall()
            self._pool = None


# Global backend instance (lazy initialization)
_backend: Optional[DatabaseBackend] = None


def get_backend(connection_string: Optional[str] = None, force_type: Optional[BackendType] = None) -> DatabaseBackend:
    """
    Get or create the global database backend instance.

    Args:
        connection_string: Optional connection string override
        force_type: Force a specific backend type

    Returns:
        DatabaseBackend instance
    """
    global _backend

    if _backend is None or connection_string is not None:
        backend_type = force_type or detect_backend()
        # Pool size from settings (env: CRYSTALLISE_DB_POOL_SIZE). Imported
        # lazily to avoid a circular import — settings.py is loaded by many
        # modules that themselves import from this module's `BackendType`.
        try:
            from crystallise.config.settings import get_settings
            pool_size = get_settings().db_pool_size
        except Exception:
            pool_size = 20
        _backend = DatabaseBackend(
            backend_type=backend_type,
            connection_string=connection_string,
            pool_size=pool_size,
        )

    return _backend


def reset_backend():
    """Reset the global backend (for testing)."""
    global _backend
    if _backend:
        _backend.close()
    _backend = None
