"""SQL CRUD operations for projects, criteria, papers, and indexing fields."""

from __future__ import annotations

import json
import logging
from typing import Any, Optional

from .backend import DatabaseBackend, get_backend

logger = logging.getLogger(__name__)

# ---------------------------------------------------------------------------
# Schema initialisation
# ---------------------------------------------------------------------------

_SCHEMA_SQL = """
CREATE TABLE IF NOT EXISTS projects (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    name TEXT NOT NULL,
    description TEXT NOT NULL DEFAULT '',
    research_questions TEXT NOT NULL DEFAULT '[]',
    project_type TEXT NOT NULL DEFAULT 'screening',
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    updated_at DATETIME DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE IF NOT EXISTS project_criteria (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    category TEXT NOT NULL DEFAULT 'Other',
    text TEXT NOT NULL,
    description TEXT NOT NULL DEFAULT '',
    criterion_type TEXT NOT NULL DEFAULT 'exclude',
    is_active INTEGER NOT NULL DEFAULT 1,
    source TEXT NOT NULL DEFAULT 'human',
    ai_confidence REAL,
    ai_rationale TEXT,
    title_abstract_assessable INTEGER NOT NULL DEFAULT 1,
    notes TEXT,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    modified_at DATETIME DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE IF NOT EXISTS project_papers (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    external_id TEXT NOT NULL DEFAULT '',
    title TEXT NOT NULL DEFAULT '',
    abstract TEXT NOT NULL DEFAULT '',
    citation TEXT NOT NULL DEFAULT '',
    pmid TEXT NOT NULL DEFAULT '',
    doi TEXT NOT NULL DEFAULT '',
    authors TEXT NOT NULL DEFAULT '',
    url TEXT NOT NULL DEFAULT '',
    author_location TEXT NOT NULL DEFAULT '',
    year TEXT NOT NULL DEFAULT ''
);

CREATE TABLE IF NOT EXISTS ai_activity_log (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER,
    action_type TEXT NOT NULL,
    model TEXT,
    duration_ms INTEGER,
    estimated_cost_usd REAL,
    status TEXT NOT NULL DEFAULT 'completed',
    metadata TEXT DEFAULT '{}',
    user_email TEXT,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
    FOREIGN KEY (project_id) REFERENCES projects(id) ON DELETE CASCADE
);

CREATE TABLE IF NOT EXISTS project_fields (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    name TEXT NOT NULL,
    description TEXT NOT NULL DEFAULT '',
    data_type_primary TEXT NOT NULL DEFAULT 'string',
    data_type_secondary TEXT NOT NULL DEFAULT 'NA',
    examples TEXT NOT NULL DEFAULT '[]',
    examples_mode TEXT NOT NULL DEFAULT 'guide',
    depth TEXT NOT NULL DEFAULT 'minimal'
);

CREATE TABLE IF NOT EXISTS project_indexer_results (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    results TEXT NOT NULL DEFAULT '[]',
    errors TEXT NOT NULL DEFAULT '[]',
    usage TEXT NOT NULL DEFAULT '{}',
    model TEXT NOT NULL DEFAULT '',
    record_count INTEGER NOT NULL DEFAULT 0,
    tag_groups TEXT NOT NULL DEFAULT '{}',
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE IF NOT EXISTS project_fulltext_results (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER NOT NULL REFERENCES projects(id) ON DELETE CASCADE,
    results TEXT NOT NULL DEFAULT '[]',
    errors TEXT NOT NULL DEFAULT '[]',
    record_count INTEGER NOT NULL DEFAULT 0,
    created_at DATETIME DEFAULT CURRENT_TIMESTAMP
);

CREATE TABLE IF NOT EXISTS project_fulltext_config (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    project_id INTEGER NOT NULL UNIQUE REFERENCES projects(id) ON DELETE CASCADE,
    s2_api_key TEXT NOT NULL DEFAULT '',
    unpaywall_email TEXT NOT NULL DEFAULT ''
);
"""

_schema_initialised = False


def _ensure_schema(db: DatabaseBackend) -> None:
    global _schema_initialised
    if _schema_initialised:
        return
    with db.get_connection() as conn:
        db.executescript(conn, _SCHEMA_SQL)
        # Run migrations with savepoints so PostgreSQL doesn't abort the transaction
        _migrations = (
            [
                "ALTER TABLE projects ADD COLUMN project_type TEXT NOT NULL DEFAULT 'screening'",
                "ALTER TABLE project_indexer_results ADD COLUMN tag_groups TEXT NOT NULL DEFAULT '{}'",
                "ALTER TABLE project_fields ADD COLUMN depth TEXT NOT NULL DEFAULT 'minimal'",
            ]
            + [
                f"ALTER TABLE project_papers ADD COLUMN {col} TEXT NOT NULL DEFAULT ''"
                for col in ("pmid", "doi", "authors", "url", "author_location", "year")
            ]
            + [
                "ALTER TABLE project_fulltext_config ADD COLUMN openalex_api_key TEXT NOT NULL DEFAULT ''",
            ]
        )
        for sql in _migrations:
            try:
                db.execute(conn, "SAVEPOINT _migrate")
                db.execute(conn, sql)
                db.execute(conn, "RELEASE SAVEPOINT _migrate")
            except Exception:
                try:
                    db.execute(conn, "ROLLBACK TO SAVEPOINT _migrate")
                except Exception:
                    pass  # SQLite doesn't support savepoints the same way
    _schema_initialised = True


def _row_to_dict(row) -> dict:
    """Convert a DB row to dict (works for sqlite3.Row and psycopg2 DictRow)."""
    if hasattr(row, "keys"):
        return dict(row)
    return dict(row)


# ---------------------------------------------------------------------------
# Projects CRUD
# ---------------------------------------------------------------------------


def list_projects(db: Optional[DatabaseBackend] = None) -> list[dict]:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            """
            SELECT p.*,
                   (SELECT COUNT(*) FROM project_criteria WHERE project_id = p.id) AS criteria_count,
                   (SELECT COUNT(*) FROM project_papers WHERE project_id = p.id) AS paper_count
            FROM projects p ORDER BY p.updated_at DESC
        """,
        )
        rows = cursor.fetchall()
    result = []
    for row in rows:
        d = _row_to_dict(row)
        d["research_questions"] = json.loads(d.get("research_questions", "[]"))
        result.append(d)
    return result


def get_project(project_id: int, db: Optional[DatabaseBackend] = None) -> Optional[dict]:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            """
            SELECT p.*,
                   (SELECT COUNT(*) FROM project_criteria WHERE project_id = p.id) AS criteria_count,
                   (SELECT COUNT(*) FROM project_papers WHERE project_id = p.id) AS paper_count
            FROM projects p WHERE p.id = ?
        """,
            (project_id,),
        )
        row = cursor.fetchone()
    if not row:
        return None
    d = _row_to_dict(row)
    d["research_questions"] = json.loads(d.get("research_questions", "[]"))
    return d


def create_project(
    name: str,
    description: str = "",
    research_questions: Optional[list[str]] = None,
    project_type: str = "screening",
    db: Optional[DatabaseBackend] = None,
) -> dict:
    db = db or get_backend()
    _ensure_schema(db)
    rq_json = json.dumps(research_questions or [])
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "INSERT INTO projects (name, description, research_questions, project_type) VALUES (?, ?, ?, ?)",
            (name, description, rq_json, project_type),
            returning=True,
        )
        pid = db.get_lastrowid(cursor)
    return get_project(pid, db)  # type: ignore


def update_project(
    project_id: int,
    name: Optional[str] = None,
    description: Optional[str] = None,
    research_questions: Optional[list[str]] = None,
    project_type: Optional[str] = None,
    db: Optional[DatabaseBackend] = None,
) -> Optional[dict]:
    db = db or get_backend()
    _ensure_schema(db)
    sets: list[str] = []
    params: list[Any] = []
    if name is not None:
        sets.append("name = ?")
        params.append(name)
    if description is not None:
        sets.append("description = ?")
        params.append(description)
    if research_questions is not None:
        sets.append("research_questions = ?")
        params.append(json.dumps(research_questions))
    if project_type is not None:
        sets.append("project_type = ?")
        params.append(project_type)
    if not sets:
        return get_project(project_id, db)
    sets.append("updated_at = CURRENT_TIMESTAMP")
    params.append(project_id)
    with db.get_connection() as conn:
        db.execute(conn, f"UPDATE projects SET {', '.join(sets)} WHERE id = ?", tuple(params))
    return get_project(project_id, db)


def delete_project(project_id: int, db: Optional[DatabaseBackend] = None) -> bool:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        # Delete cascade children first for SQLite compat
        db.execute(conn, "DELETE FROM project_fields WHERE project_id = ?", (project_id,))
        db.execute(conn, "DELETE FROM project_papers WHERE project_id = ?", (project_id,))
        db.execute(conn, "DELETE FROM project_criteria WHERE project_id = ?", (project_id,))
        cursor = db.execute(conn, "DELETE FROM projects WHERE id = ?", (project_id,))
        return cursor.rowcount > 0


# ---------------------------------------------------------------------------
# Criteria CRUD
# ---------------------------------------------------------------------------


def list_criteria(project_id: int, db: Optional[DatabaseBackend] = None) -> list[dict]:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "SELECT * FROM project_criteria WHERE project_id = ? ORDER BY id",
            (project_id,),
        )
        return [_row_to_dict(r) for r in cursor.fetchall()]


def add_criterion(project_id: int, data: dict, db: Optional[DatabaseBackend] = None) -> dict:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            """INSERT INTO project_criteria
               (project_id, category, text, description, criterion_type, is_active,
                source, ai_confidence, ai_rationale, title_abstract_assessable, notes)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            (
                project_id,
                data.get("category", "Other"),
                data["text"],
                data.get("description", ""),
                data.get("criterion_type", "exclude"),
                1 if data.get("is_active", True) else 0,
                data.get("source", "human"),
                data.get("ai_confidence"),
                data.get("ai_rationale"),
                1 if data.get("title_abstract_assessable", True) else 0,
                data.get("notes"),
            ),
            returning=True,
        )
        cid = db.get_lastrowid(cursor)
    with db.get_connection() as conn:
        cursor = db.execute(conn, "SELECT * FROM project_criteria WHERE id = ?", (cid,))
        return _row_to_dict(cursor.fetchone())


def update_criterion(
    project_id: int, criterion_id: int, data: dict, db: Optional[DatabaseBackend] = None
) -> Optional[dict]:
    db = db or get_backend()
    _ensure_schema(db)
    sets: list[str] = []
    params: list[Any] = []
    for field in (
        "category",
        "text",
        "description",
        "criterion_type",
        "source",
        "ai_confidence",
        "ai_rationale",
        "notes",
    ):
        if field in data:
            sets.append(f"{field} = ?")
            params.append(data[field])
    if "is_active" in data:
        sets.append("is_active = ?")
        params.append(1 if data["is_active"] else 0)
    if "title_abstract_assessable" in data:
        sets.append("title_abstract_assessable = ?")
        params.append(1 if data["title_abstract_assessable"] else 0)
    if not sets:
        return None
    sets.append("modified_at = CURRENT_TIMESTAMP")
    params.extend([criterion_id, project_id])
    with db.get_connection() as conn:
        db.execute(
            conn, f"UPDATE project_criteria SET {', '.join(sets)} WHERE id = ? AND project_id = ?", tuple(params)
        )
        cursor = db.execute(
            conn, "SELECT * FROM project_criteria WHERE id = ? AND project_id = ?", (criterion_id, project_id)
        )
        row = cursor.fetchone()
    return _row_to_dict(row) if row else None


def delete_criterion(project_id: int, criterion_id: int, db: Optional[DatabaseBackend] = None) -> bool:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "DELETE FROM project_criteria WHERE id = ? AND project_id = ?",
            (criterion_id, project_id),
        )
        return cursor.rowcount > 0


# ---------------------------------------------------------------------------
# Papers CRUD
# ---------------------------------------------------------------------------


def list_papers(
    project_id: int,
    limit: int = 100,
    offset: int = 0,
    db: Optional[DatabaseBackend] = None,
) -> dict:
    """Return paginated papers with total count."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        count_cursor = db.execute(
            conn, "SELECT COUNT(*) as cnt FROM project_papers WHERE project_id = ?", (project_id,)
        )
        total = _row_to_dict(count_cursor.fetchone())["cnt"]
        cursor = db.execute(
            conn,
            "SELECT * FROM project_papers WHERE project_id = ? ORDER BY id LIMIT ? OFFSET ?",
            (project_id, limit, offset),
        )
        papers = [_row_to_dict(r) for r in cursor.fetchall()]
    return {"total": total, "papers": papers}


def _get_paper_field(p: dict, *keys: str, default: str = "") -> str:
    """Get a paper field by trying multiple key aliases (case-insensitive)."""
    for k in keys:
        if k in p and p[k]:
            return str(p[k])
    # Fallback: case-insensitive scan
    lower_map = {kk.lower(): kk for kk in p}
    for k in keys:
        real = lower_map.get(k.lower())
        if real and p[real]:
            return str(p[real])
    return default


def _clean_doi(raw: str) -> str:
    """Extract a clean DOI from a raw string.

    Handles: trailing periods/semicolons, 'doi:' prefix, full URLs,
    extra whitespace, and embedded text like 'doi: 10.xxx/yyy.'
    """
    import re

    s = raw.strip()
    if not s:
        return ""
    # Extract DOI pattern (10.XXXX/...) from anywhere in the string
    m = re.search(r"(10\.\d{4,9}/[^\s;,]+)", s)
    if not m:
        return s  # return as-is if no DOI pattern found
    doi = m.group(1).rstrip(".")
    return doi


def bulk_add_papers(project_id: int, papers: list[dict], db: Optional[DatabaseBackend] = None) -> int:
    """Insert papers in bulk. Returns count added."""
    db = db or get_backend()
    _ensure_schema(db)
    if not papers:
        return 0
    rows = [
        (
            project_id,
            _get_paper_field(p, "external_id", "id", "ID"),
            _get_paper_field(p, "title", "Title"),
            _get_paper_field(p, "abstract", "Abstract"),
            _get_paper_field(p, "citation", "Citation", "Full citation", "reference"),
            _get_paper_field(p, "pmid", "PMID"),
            _clean_doi(_get_paper_field(p, "doi", "DOI", "Doi")),
            _get_paper_field(p, "authors", "Authors"),
            _get_paper_field(p, "url", "URL", "Full publication URL"),
            _get_paper_field(p, "author_location", "Author location", "AuthorLocation", "AuthorAffiliation"),
            _get_paper_field(p, "year", "Year"),
        )
        for p in papers
    ]
    with db.get_connection() as conn:
        db.executemany(
            conn,
            """INSERT INTO project_papers
               (project_id, external_id, title, abstract, citation, pmid, doi, authors, url, author_location, year)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""",
            rows,
        )
    return len(rows)


def clear_papers(project_id: int, db: Optional[DatabaseBackend] = None) -> int:
    """Delete all papers and associated results for a project. Returns paper count deleted."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        # Clear indexer results for this project
        db.execute(conn, "DELETE FROM project_indexer_results WHERE project_id = ?", (project_id,))
        # Clear screening jobs and decisions (tables may not exist yet — use savepoints for PostgreSQL)
        for table in ("screening_jobs", "screening_decisions"):
            try:
                db.execute(conn, "SAVEPOINT _clear")
                db.execute(conn, f"DELETE FROM {table} WHERE project_id = ?", (project_id,))
                db.execute(conn, "RELEASE SAVEPOINT _clear")
            except Exception:
                try:
                    db.execute(conn, "ROLLBACK TO SAVEPOINT _clear")
                except Exception:
                    pass
        # Clear papers
        cursor = db.execute(conn, "DELETE FROM project_papers WHERE project_id = ?", (project_id,))
        return cursor.rowcount


# ---------------------------------------------------------------------------
# Indexing fields CRUD
# ---------------------------------------------------------------------------


def list_fields(project_id: int, db: Optional[DatabaseBackend] = None) -> list[dict]:
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(conn, "SELECT * FROM project_fields WHERE project_id = ? ORDER BY id", (project_id,))
        rows = [_row_to_dict(r) for r in cursor.fetchall()]
    for r in rows:
        r["examples"] = json.loads(r.get("examples", "[]"))
    return rows


def replace_fields(project_id: int, fields: list[dict], db: Optional[DatabaseBackend] = None) -> list[dict]:
    """Replace all indexing fields for a project."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        db.execute(conn, "DELETE FROM project_fields WHERE project_id = ?", (project_id,))
        for f in fields:
            db.execute(
                conn,
                """INSERT INTO project_fields
                   (project_id, name, description, data_type_primary, data_type_secondary, examples, examples_mode, depth)
                   VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
                (
                    project_id,
                    f["name"],
                    f.get("description", ""),
                    f.get("data_type_primary", "string"),
                    f.get("data_type_secondary", "NA"),
                    json.dumps(f.get("examples", [])),
                    f.get("examples_mode", "guide"),
                    f.get("depth", "minimal"),
                ),
            )
    return list_fields(project_id, db)


# ---------------------------------------------------------------------------
# Indexer Results CRUD
# ---------------------------------------------------------------------------


def save_indexer_results(
    project_id: int,
    results: list[dict],
    errors: list[str],
    usage: dict | None = None,
    model: str = "",
    db: Optional[DatabaseBackend] = None,
) -> None:
    """Save indexer results for a project, merging with any existing results by paper ID."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        # Merge with existing results: new results overwrite by ID, others preserved
        existing = get_latest_indexer_results(project_id, db)
        if existing and existing.get("results"):
            existing_by_id: dict[str, dict] = {}
            for r in existing["results"]:
                rid = str(r.get("ID") or r.get("id") or "").lower()
                if rid:
                    existing_by_id[rid] = r
            # Overwrite with new results
            for r in results:
                rid = str(r.get("ID") or r.get("id") or "").lower()
                if rid:
                    existing_by_id[rid] = r
            results = list(existing_by_id.values())
            # Only keep errors for papers still in error state after merge
            has_errors = any(r.get("indexing_status") == "error" for r in results)
            if not has_errors:
                errors = []
        db.execute(conn, "DELETE FROM project_indexer_results WHERE project_id = ?", (project_id,))
        db.execute(
            conn,
            """INSERT INTO project_indexer_results
               (project_id, results, errors, usage, model, record_count)
               VALUES (?, ?, ?, ?, ?, ?)""",
            (
                project_id,
                json.dumps(results),
                json.dumps(errors),
                json.dumps(usage or {}),
                model,
                len(results),
            ),
        )


def get_latest_indexer_results(
    project_id: int,
    db: Optional[DatabaseBackend] = None,
) -> dict | None:
    """Get the latest indexer results for a project."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "SELECT * FROM project_indexer_results WHERE project_id = ? ORDER BY created_at DESC LIMIT 1",
            (project_id,),
        )
        row = cursor.fetchone()
        if row is None:
            return None
        d = _row_to_dict(row)
        d["results"] = json.loads(d.get("results", "[]"))
        d["errors"] = json.loads(d.get("errors", "[]"))
        d["usage"] = json.loads(d.get("usage", "{}"))
        d["tag_groups"] = json.loads(d.get("tag_groups", "{}"))
        return d


def save_tag_groups(
    project_id: int,
    tag_groups: dict,
    db: Optional[DatabaseBackend] = None,
) -> bool:
    """Save tag group mappings for the latest indexer results of a project.

    Returns True if a row was updated, False if no indexer results exist yet.
    """
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        # Find the latest result row for this project
        cursor = db.execute(
            conn,
            "SELECT id FROM project_indexer_results WHERE project_id = ? ORDER BY created_at DESC LIMIT 1",
            (project_id,),
        )
        row = cursor.fetchone()
        if row is None:
            return False
        row_id = row["id"] if hasattr(row, "keys") else row[0]
        db.execute(
            conn,
            "UPDATE project_indexer_results SET tag_groups = ? WHERE id = ?",
            (json.dumps(tag_groups), row_id),
        )
        return True


# ---------------------------------------------------------------------------
# AI Activity Log CRUD
# ---------------------------------------------------------------------------


def create_ai_log_entry(
    project_id: int,
    action_type: str,
    model: Optional[str] = None,
    duration_ms: Optional[int] = None,
    estimated_cost_usd: Optional[float] = None,
    status: str = "completed",
    metadata: Optional[dict] = None,
    user_email: Optional[str] = None,
    db: Optional[DatabaseBackend] = None,
) -> dict:
    """Create an AI activity log entry."""
    db = db or get_backend()
    _ensure_schema(db)
    meta_json = json.dumps(metadata or {})
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            """INSERT INTO ai_activity_log
               (project_id, action_type, model, duration_ms, estimated_cost_usd, status, metadata, user_email)
               VALUES (?, ?, ?, ?, ?, ?, ?, ?)""",
            (project_id, action_type, model, duration_ms, estimated_cost_usd, status, meta_json, user_email),
            returning=True,
        )
        log_id = db.get_lastrowid(cursor)
        cursor = db.execute(conn, "SELECT * FROM ai_activity_log WHERE id = ?", (log_id,))
        row = cursor.fetchone()
    d = _row_to_dict(row)
    d["metadata"] = json.loads(d.get("metadata", "{}"))
    return d


def list_ai_log_entries(
    project_id: int,
    limit: int = 100,
    db: Optional[DatabaseBackend] = None,
) -> list[dict]:
    """List AI activity log entries for a project."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "SELECT * FROM ai_activity_log WHERE project_id = ? ORDER BY id DESC LIMIT ?",
            (project_id, limit),
        )
        rows = [_row_to_dict(r) for r in cursor.fetchall()]
    for r in rows:
        r["metadata"] = json.loads(r.get("metadata", "{}"))
    return rows


# ---------------------------------------------------------------------------
# Full-text results CRUD
# ---------------------------------------------------------------------------


def save_fulltext_results(
    project_id: int,
    results: list[dict],
    errors: list[str],
    db: Optional[DatabaseBackend] = None,
) -> None:
    """Save full-text check results for a project (replace previous)."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        db.execute(conn, "DELETE FROM project_fulltext_results WHERE project_id = ?", (project_id,))
        db.execute(
            conn,
            """INSERT INTO project_fulltext_results
               (project_id, results, errors, record_count)
               VALUES (?, ?, ?, ?)""",
            (project_id, json.dumps(results), json.dumps(errors), len(results)),
        )


def get_latest_fulltext_results(
    project_id: int,
    db: Optional[DatabaseBackend] = None,
) -> dict | None:
    """Get the latest full-text check results for a project."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "SELECT * FROM project_fulltext_results WHERE project_id = ? ORDER BY created_at DESC LIMIT 1",
            (project_id,),
        )
        row = cursor.fetchone()
        if row is None:
            return None
        d = _row_to_dict(row)
        d["results"] = json.loads(d.get("results", "[]"))
        d["errors"] = json.loads(d.get("errors", "[]"))
        return d


# ---------------------------------------------------------------------------
# Full-text config CRUD
# ---------------------------------------------------------------------------


def get_fulltext_config(
    project_id: int,
    db: Optional[DatabaseBackend] = None,
) -> dict:
    """Get full-text configuration for a project, falling back to env vars."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "SELECT * FROM project_fulltext_config WHERE project_id = ?",
            (project_id,),
        )
        row = cursor.fetchone()

    from crystallise.config.settings import get_settings

    settings = get_settings()

    if row is not None:
        d = _row_to_dict(row)
        s2_key = d.get("s2_api_key", "") or settings.s2_api_key
        email = d.get("unpaywall_email", "") or settings.unpaywall_email
        oa_key = d.get("openalex_api_key", "") or settings.openalex_api_key
    else:
        s2_key = settings.s2_api_key
        email = settings.unpaywall_email
        oa_key = settings.openalex_api_key

    return {
        "s2_api_key_set": bool(s2_key),
        "s2_api_key_hint": f"****{s2_key[-4:]}" if len(s2_key) >= 4 else ("****" if s2_key else ""),
        "unpaywall_email": email,
        "openalex_api_key_set": bool(oa_key),
        "openalex_api_key_hint": f"****{oa_key[-4:]}" if len(oa_key) >= 4 else ("****" if oa_key else ""),
    }


def save_fulltext_config(
    project_id: int,
    s2_api_key: str | None = None,
    unpaywall_email: str | None = None,
    openalex_api_key: str | None = None,
    db: Optional[DatabaseBackend] = None,
) -> dict:
    """Upsert full-text configuration for a project. Returns updated config."""
    db = db or get_backend()
    _ensure_schema(db)
    with db.get_connection() as conn:
        cursor = db.execute(
            conn,
            "SELECT * FROM project_fulltext_config WHERE project_id = ?",
            (project_id,),
        )
        row = cursor.fetchone()
        if row is not None:
            existing = _row_to_dict(row)
            new_key = s2_api_key if s2_api_key is not None else existing.get("s2_api_key", "")
            new_email = unpaywall_email if unpaywall_email is not None else existing.get("unpaywall_email", "")
            new_oa_key = openalex_api_key if openalex_api_key is not None else existing.get("openalex_api_key", "")
            db.execute(
                conn,
                "UPDATE project_fulltext_config SET s2_api_key = ?, unpaywall_email = ?, openalex_api_key = ? WHERE project_id = ?",
                (new_key, new_email, new_oa_key, project_id),
            )
        else:
            db.execute(
                conn,
                "INSERT INTO project_fulltext_config (project_id, s2_api_key, unpaywall_email, openalex_api_key) VALUES (?, ?, ?, ?)",
                (project_id, s2_api_key or "", unpaywall_email or "", openalex_api_key or ""),
            )
    return get_fulltext_config(project_id, db)
