"""
Google Sheets service for KPI metrics (monthly sheets: January, February, ...).

Reads from a separate KPI spreadsheet. Share it with the same service account
used for the GP violations sheet: kpistats@...iam.gserviceaccount.com (Viewer).
Uses GOOGLE_KPI_SHEETS_ID; same credentials as sheets_service (GOOGLE_CREDENTIALS_PATH).
"""
import calendar
import logging
import re
import time
from datetime import datetime, timedelta
from typing import Dict, List, Optional, Any, Tuple
from pathlib import Path
import os

logger = logging.getLogger(__name__)

WEEKLY_WORKLOAD_FORM_TAB = "Weekly Workload Form"

MONTH_SHEET_NAMES = [
    "January", "February", "March", "April", "May", "June",
    "July", "August", "September", "October", "November", "December",
]

# Metric label (column A in sectioned layout) -> our metric key (case-insensitive match)
METRIC_LABEL_TO_KEY = {
    "quote conversion": "quoteConversion",
    "client reviews": "clientReviews",
    "positive client reviews": "clientReviews",
    "avg. time to first attendance": "firstAttendance",
    "avg time to first attendance": "firstAttendance",
    "quote creation to acceptance time": "quoteAcceptance",
    "quote creation to acceptance": "quoteAcceptance",
    "quote creation to acceptance time (days)": "quoteAcceptance",
    "jobs completed": "jobsCompleted",
    "total pending jobs": "pendingJobs",
    "weekly workload form completion": "workloadForm",
    "pending jobs with due dates": "dueDates",
}

BRANCH_NAMES = ("bunbury", "busselton", "mandurah")

# Branch -> list of department names that have the clientReviews (Client Reviews) KPI
_BRANCH_DEPTS_CLIENT_REVIEWS = {
    "Busselton": ["Electrical", "Air"],
    "Bunbury": ["Residential", "Air", "Solar"],
    "Mandurah": ["Electrical", "Air"],
}

_kpi_cache: Dict[str, tuple] = {}
KPI_CACHE_TTL_SECONDS = 120  # 2 minutes


def _norm(s: str) -> str:
    return (s or "").strip().lower()


class KpiSheetsService:
    """Read KPI actuals from a Google Sheet with one tab per month."""

    def __init__(self):
        self.spreadsheet_id = os.getenv("GOOGLE_KPI_SHEETS_ID")
        backend_dir = Path(__file__).resolve().parent.parent.parent
        default_creds = backend_dir / "data" / "google_credentials.json"
        self.credentials_path = os.getenv("GOOGLE_CREDENTIALS_PATH", str(default_creds))
        self._service = None
        self._initialized = False

    def _initialize(self) -> None:
        if self._initialized:
            return
        try:
            if not self.spreadsheet_id:
                logger.warning("GOOGLE_KPI_SHEETS_ID not set - KPI from Sheets disabled")
                return
            from google.oauth2 import service_account
            from googleapiclient.discovery import build
            creds_path = Path(self.credentials_path)
            if not creds_path.exists():
                logger.warning("Google credentials not found at %s", creds_path)
                return
            credentials = service_account.Credentials.from_service_account_file(
                str(creds_path.absolute()),
                scopes=["https://www.googleapis.com/auth/spreadsheets.readonly"],
            )
            self._service = build("sheets", "v4", credentials=credentials)
            self._initialized = True
            logger.info("KPI Sheets service initialized (spreadsheet %s)", self.spreadsheet_id[:20] + "...")
        except Exception as e:
            logger.exception("Failed to initialize KPI Sheets: %s", e)
            self._service = None

    def get_kpi_for_month(self, year: int, month: int) -> Dict[str, Dict[str, Dict[str, float]]]:
        """
        Read one month's sheet and return actuals: { branch: { department: { metricKey: value } } }.
        month is 1-12 (calendar month).
        """
        if not self._initialized:
            self._initialize()
        if not self._service:
            return {}

        sheet_name = MONTH_SHEET_NAMES[month - 1] if 1 <= month <= 12 else None
        if not sheet_name:
            return {}

        cache_key = f"{sheet_name}_{year}"
        now = time.time()
        if cache_key in _kpi_cache:
            cached_at, data = _kpi_cache[cache_key]
            if now - cached_at < KPI_CACHE_TTL_SECONDS:
                return data
        try:
            range_name = f"{sheet_name}!A:Z"
            result = self._service.spreadsheets().values().get(
                spreadsheetId=self.spreadsheet_id,
                range=range_name,
                valueRenderOption="FORMATTED_VALUE",
            ).execute()
            values = result.get("values", [])
        except Exception as e:
            logger.warning("Could not read KPI sheet %s: %s", sheet_name, e)
            return {}

        out = self._parse_sheet_values(values)
        # Bunbury Commercial: Weekly Workload Form completion from same sheet, "Weekly Workload Form" tab
        pct, count = self.get_workload_form_completion(year, month)
        if pct is not None:
            if "Bunbury" not in out:
                out["Bunbury"] = {}
            if "Commercial" not in out["Bunbury"]:
                out["Bunbury"]["Commercial"] = {}
            out["Bunbury"]["Commercial"]["workloadForm"] = pct
            out["Bunbury"]["Commercial"]["workloadFormCount"] = count
        # Google Place review counts: use reviews_this_month for KPI actual, total for card display
        try:
            from app.services.google_reviews_service import get_google_review_counts_for_month
            review_data = get_google_review_counts_for_month(year, month)
            for branch, depts in _BRANCH_DEPTS_CLIENT_REVIEWS.items():
                if branch not in review_data:
                    continue
                data = review_data[branch]
                reviews_this_month = data.get("reviews_this_month", 0)
                total = data.get("total", 0)
                if branch not in out:
                    out[branch] = {}
                for dept in depts:
                    if dept not in out[branch]:
                        out[branch][dept] = {}
                    out[branch][dept]["clientReviews"] = reviews_this_month
                    out[branch][dept]["clientReviewsTotal"] = total
        except Exception as e:
            logger.debug("Google review counts not applied: %s", e)
        _kpi_cache[cache_key] = (now, out)
        return out

    def _parse_sheet_values(self, values: List[List[Any]]) -> Dict[str, Dict[str, Dict[str, float]]]:
        """
        Parse sheet with sectioned layout.
        Layout: section headers like "Bunbury Residental" on their own row in column A.
        Then alternating rows: metric name in col A (row N), value in col A (row N+1).
        Also supports name in A + value in B on the same row.
        """
        out: Dict[str, Dict[str, Dict[str, float]]] = {}
        if not values:
            return out

        current_branch: Optional[str] = None
        current_dept: Optional[str] = None
        pending_metric_key: Optional[str] = None  # waiting for next row's value

        for row in values:
            if not row:
                pending_metric_key = None
                continue
            cell_a = str(row[0]).strip() if len(row) > 0 else ""
            cell_b = str(row[1]).strip() if len(row) > 1 else ""

            # If we're expecting a value for the previous metric name
            if pending_metric_key is not None and current_branch and current_dept:
                # Try col A as the value (the common layout: value alone on next row)
                val_str = cell_a
                try:
                    num = float(val_str.replace(",", "").strip())
                    out[current_branch][current_dept][pending_metric_key] = num
                    pending_metric_key = None
                    continue  # consumed this row as a value
                except (ValueError, TypeError):
                    pass
                pending_metric_key = None  # not a number, fall through to normal parsing

            # Section header: "Bunbury Residental", "Busselton Air", etc.
            if cell_a:
                parts = cell_a.split()
                if parts and _norm(parts[0]) in BRANCH_NAMES:
                    # Make sure this isn't a known metric label
                    if not self._is_metric_label(cell_a):
                        branch = parts[0].strip()
                        dept = " ".join(parts[1:]).strip()
                        if dept.lower() == "residental":
                            dept = "Residential"
                        elif dept and dept[0].islower():
                            dept = dept.capitalize()
                        current_branch = branch
                        current_dept = dept
                        if current_branch not in out:
                            out[current_branch] = {}
                        if current_dept not in out[current_branch]:
                            out[current_branch][current_dept] = {}
                        pending_metric_key = None
                        continue

            # Metric row
            if current_branch is None or current_dept is None:
                continue
            if not cell_a:
                continue

            matched_key = self._match_metric(cell_a)
            if matched_key is None:
                continue

            # Value on same row in column B?
            if cell_b:
                try:
                    num = float(cell_b.replace(",", "").strip())
                    out[current_branch][current_dept][matched_key] = num
                    continue
                except (ValueError, TypeError):
                    pass

            # Otherwise value is on the next row — store pending
            pending_metric_key = matched_key

        return out

    @staticmethod
    def _is_metric_label(text: str) -> bool:
        norm = _norm(text)
        for label in METRIC_LABEL_TO_KEY:
            if norm == label or label in norm:
                return True
        return False

    @staticmethod
    def _match_metric(text: str) -> Optional[str]:
        norm = _norm(text)
        for label, key in METRIC_LABEL_TO_KEY.items():
            if norm == label or label in norm:
                return key
        return None

    def get_kpi_quarter(
        self, year: int, month: int
    ) -> Dict[str, Dict[str, Dict[str, List[Optional[float]]]]]:
        """
        Get KPI actuals for the quarter that contains the given month.
        month is 1-12. Returns { branch: { department: { metricKey: [m1, m2, m3] } } }
        where m1,m2,m3 are the three months in the quarter (e.g. Jan, Feb, Mar for Q1).
        """
        if not self._initialized:
            self._initialize()
        if not self._service:
            return {}

        # Quarter: Q1=1,2,3 Q2=4,5,6 Q3=7,8,9 Q4=10,11,12
        quarter_start = ((month - 1) // 3) * 3 + 1
        months_in_quarter = [quarter_start, quarter_start + 1, quarter_start + 2]

        all_branches: set = set()
        by_month: List[Dict[str, Dict[str, Dict[str, float]]]] = []
        for m in months_in_quarter:
            data = self.get_kpi_for_month(year, m)
            by_month.append(data)
            for b in data:
                all_branches.add(b)

        # Build branch -> dept -> metric -> [v1, v2, v3]
        result: Dict[str, Dict[str, Dict[str, List[Optional[float]]]]] = {}
        metric_keys: set = set()
        for data in by_month:
            for b, depts in data.items():
                for d, metrics in depts.items():
                    metric_keys.update(metrics.keys())

        for branch in all_branches:
            result[branch] = {}
            for data in by_month:
                for dept in data.get(branch, {}).keys():
                    if dept not in result[branch]:
                        result[branch][dept] = {k: [None, None, None] for k in metric_keys}
            for dept in list(result[branch].keys()):
                for mi, data in enumerate(by_month):
                    dept_data = data.get(branch, {}).get(dept, {})
                    for key in result[branch][dept]:
                        val = dept_data.get(key)
                        if val is not None:
                            result[branch][dept][key][mi] = val

        return result

    def get_workload_form_completion(self, year: int, month: int) -> Tuple[Optional[float], int]:
        """
        Read "Weekly Workload Form" tab (Submission Id, Created At), filter by Created At
        in the given month. Returns (completion_pct, submission_count).
        completion_pct = min(100, (count / weeks_in_month) * 100). Returns (None, 0) if tab missing or parse error.
        """
        if not self._initialized:
            self._initialize()
        if not self._service:
            return (None, 0)
        cache_key = f"{WEEKLY_WORKLOAD_FORM_TAB}_{year}_{month}"
        now = time.time()
        if cache_key in _kpi_cache:
            cached_at, val = _kpi_cache[cache_key]
            if now - cached_at < KPI_CACHE_TTL_SECONDS:
                return val
        try:
            result = self._service.spreadsheets().values().get(
                spreadsheetId=self.spreadsheet_id,
                range=f"{WEEKLY_WORKLOAD_FORM_TAB}!A:Z",
                valueRenderOption="FORMATTED_VALUE",
            ).execute()
            values = result.get("values", [])
            if not values:
                _kpi_cache[cache_key] = (now, (None, 0))
                return (None, 0)
            headers = [str(c).strip() for c in values[0]]
            created_at_col = None
            for i, h in enumerate(headers):
                if _norm(h) in ("created at", "created_at", "createdat"):
                    created_at_col = i
                    break
            if created_at_col is None:
                logger.warning("Weekly Workload Form: no 'Created At' column in %s", headers[:10])
                _kpi_cache[cache_key] = (now, (None, 0))
                return (None, 0)
            count = 0
            for row in values[1:]:
                if created_at_col >= len(row):
                    continue
                raw = str(row[created_at_col]).strip()
                if not raw:
                    continue
                dt = self._parse_created_at(raw)
                if dt and dt.year == year and dt.month == month:
                    count += 1
            n_days = calendar.monthrange(year, month)[1]
            n_weeks = max(1, (n_days + 6) // 7)
            pct = min(100.0, round((count / n_weeks) * 100, 1))
            out = (pct, count)
            _kpi_cache[cache_key] = (now, out)
            return out
        except Exception as e:
            logger.warning("Could not read Weekly Workload Form for %s-%s: %s", year, month, e)
            return (None, 0)

    @staticmethod
    def _parse_created_at(raw: str) -> Optional[datetime]:
        """Parse Created At string (ISO, date only, or US/UK style) to datetime; return None if invalid."""
        raw = (raw or "").strip()
        if not raw:
            return None
        # ISO with T and optional Z
        m = re.match(r"(\d{4})-(\d{2})-(\d{2})", raw)
        if m:
            try:
                return datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)))
            except ValueError:
                pass
        # dd/mm/yyyy or d/m/yyyy
        m = re.match(r"(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})", raw)
        if m:
            try:
                d, mon, y = int(m.group(1)), int(m.group(2)), int(m.group(3))
                return datetime(y, mon, d)
            except ValueError:
                pass
        # mm/dd/yyyy (US)
        m = re.match(r"(\d{1,2})[/\-](\d{1,2})[/\-](\d{4})", raw)
        if m:
            try:
                mon, d, y = int(m.group(1)), int(m.group(2)), int(m.group(3))
                if 1 <= mon <= 12 and 1 <= d <= 31:
                    return datetime(y, mon, d)
            except ValueError:
                pass
        try:
            return datetime.fromisoformat(raw.replace("Z", "+00:00").replace("+00:00", ""))
        except (ValueError, TypeError):
            pass
        return None

    def get_subscriber_totals(self) -> Dict[str, int]:
        """
        Get annual subscription counts from "Subscriber Totals" tab.
        A2=Busselton, A4=Bunbury, A6=Mandurah.
        Returns { "Busselton": int, "Bunbury": int, "Mandurah": int }.
        """
        if not self._initialized:
            self._initialize()
        if not self._service:
            return {"Busselton": 0, "Bunbury": 0, "Mandurah": 0}
        try:
            now = time.time()
            cache_key = "Subscriber Totals"
            values = None
            if cache_key in _kpi_cache:
                cached_at, cached_data = _kpi_cache[cache_key]
                if now - cached_at < KPI_CACHE_TTL_SECONDS:
                    values = cached_data
            if values is None:
                result = self._service.spreadsheets().values().get(
                    spreadsheetId=self.spreadsheet_id,
                    range="Subscriber Totals!A2:A6",
                    valueRenderOption="FORMATTED_VALUE",
                ).execute()
                values = result.get("values", [])
                _kpi_cache[cache_key] = (now, values)

            def _safe_int(row, default=0):
                if not row or not row[0]:
                    return default
                try:
                    return int(float(str(row[0]).replace(",", "").strip()))
                except (ValueError, TypeError):
                    return default

            out = {"Busselton": 0, "Bunbury": 0, "Mandurah": 0}
            if len(values) >= 1:
                out["Busselton"] = _safe_int(values[0])
            if len(values) >= 3:
                out["Bunbury"] = _safe_int(values[2])
            if len(values) >= 5:
                out["Mandurah"] = _safe_int(values[4])
            return out
        except Exception as e:
            logger.warning("Could not fetch subscriber totals: %s", e)
            return {"Busselton": 0, "Bunbury": 0, "Mandurah": 0}

    MARKETING_NUMBERS_TAB = "Marketing Numbers"
    _marketing_cache: Dict[str, tuple] = {}

    def get_marketing_numbers(self, year: int, month: int) -> Dict[str, Dict[str, float]]:
        """
        Read "Marketing Numbers" tab from KPI Seeder sheet.
        Date column format: "2026-03-17" (YYYY-MM-DD). No historical rows; you pull full
        monthly data each day, so we use the latest row per location in the selected month.
        Columns: Date, Client, Location, Google Cost, Google Clicks, Google Leads,
        Google Cost Per Lead, Meta Cost, Meta Clicks, Meta Leads, Meta Cost Per Lead,
        Total Quote Value, Total Sales Value.
        Returns { "Bunbury": { google_cost, ... }, "Busselton": {...}, "Mandurah": {...} }.
        """
        if not self._initialized:
            self._initialize()
        if not self._service:
            return {}

        cache_key = f"marketing_{year}_{month}"
        now = time.time()
        if cache_key in self._marketing_cache:
            cached_at, data = self._marketing_cache[cache_key]
            if now - cached_at < KPI_CACHE_TTL_SECONDS:
                return data

        try:
            result = self._service.spreadsheets().values().get(
                spreadsheetId=self.spreadsheet_id,
                range=f"{self.MARKETING_NUMBERS_TAB}!A:Z",
                valueRenderOption="FORMATTED_VALUE",
            ).execute()
            values = result.get("values", [])
        except Exception as e:
            logger.warning("Could not read Marketing Numbers sheet: %s", e)
            return {}

        def _parse_currency(s: Any) -> float:
            if s is None or s == "":
                return 0.0
            raw = str(s).strip().replace("$", "").replace(",", "").strip()
            try:
                return float(raw)
            except (ValueError, TypeError):
                return 0.0

        def _parse_int(s: Any) -> int:
            if s is None or s == "":
                return 0
            raw = str(s).strip().replace(",", "").strip()
            try:
                return int(float(raw))
            except (ValueError, TypeError):
                return 0

        if not values:
            return {}

        header = [str(c).strip().lower() for c in values[0]]
        col = {h: i for i, h in enumerate(header)}
        date_col = col.get("date", 0)
        location_col = col.get("location", 2)
        # Single-row columns; we take the latest row per location (no summing)
        row_cols = [
            ("google cost", "google_cost", _parse_currency),
            ("google clicks", "google_clicks", _parse_int),
            ("google leads", "google_leads", _parse_int),
            ("google cost per lead", "google_cost_per_lead", _parse_currency),
            ("meta cost", "meta_cost", _parse_currency),
            ("meta clicks", "meta_clicks", _parse_int),
            ("meta leads", "meta_leads", _parse_int),
            ("meta cost per lead", "meta_cost_per_lead", _parse_currency),
            ("total quote value", "total_quote_value", _parse_currency),
            ("total sales value", "total_sales_value", _parse_currency),
        ]

        # Collect (date, location, row) for rows in the selected month; date format "2026-03-17"
        candidates: List[Tuple[datetime, str, List[Any]]] = []
        for row in values[1:]:
            if len(row) <= max(date_col, location_col):
                continue
            raw_date = row[date_col] if date_col < len(row) else ""
            location = str(row[location_col]).strip() if location_col < len(row) else ""
            if not location or location.lower() not in BRANCH_NAMES:
                continue
            location = location.capitalize() if location.lower() == "bunbury" else location
            if location.lower() == "busselton":
                location = "Busselton"
            elif location.lower() == "mandurah":
                location = "Mandurah"
            elif location.lower() == "bunbury":
                location = "Bunbury"

            if isinstance(raw_date, (int, float)):
                try:
                    dt = datetime(1899, 12, 30) + timedelta(days=int(raw_date))
                except (ValueError, OverflowError):
                    dt = None
            else:
                dt = self._parse_created_at(str(raw_date or ""))
            if dt is None or dt.year != year or dt.month != month:
                continue
            candidates.append((dt, location, row))

        # Per location, keep only the row with the latest date (full monthly data as of that day)
        by_location: Dict[str, Tuple[datetime, List[Any]]] = {}
        for dt, location, row in candidates:
            if location not in by_location or dt > by_location[location][0]:
                by_location[location] = (dt, row)

        out: Dict[str, Dict[str, float]] = {}
        for location, (_dt, row) in by_location.items():
            out[location] = {
                "google_cost": 0.0, "google_clicks": 0, "google_leads": 0,
                "google_cost_per_lead": 0.0, "meta_cost": 0.0, "meta_clicks": 0,
                "meta_leads": 0, "meta_cost_per_lead": 0.0,
                "total_quote_value": 0.0, "total_sales_value": 0.0,
            }
            for header_name, key, parser in row_cols:
                idx = col.get(header_name)
                if idx is None or idx >= len(row):
                    continue
                val = parser(row[idx])
                if isinstance(val, float):
                    out[location][key] = val
                else:
                    out[location][key] = float(int(val))
        # Ensure cost per lead used from sheet (or recompute if missing)
        for loc, m in out.items():
            if m["google_leads"] and m["google_cost"] and not m["google_cost_per_lead"]:
                m["google_cost_per_lead"] = m["google_cost"] / m["google_leads"]
            if m["meta_leads"] and m["meta_cost"] and not m["meta_cost_per_lead"]:
                m["meta_cost_per_lead"] = m["meta_cost"] / m["meta_leads"]

        self._marketing_cache[cache_key] = (now, out)
        return out


kpi_sheets_service = KpiSheetsService()