"""
Google Sheets service for tracking GP threshold violations.

This service reads from a Google Sheet that tracks instances where
Gross Profit has fallen below acceptable thresholds.
"""
import logging
import time
from datetime import datetime, timedelta, date
from typing import Dict, Optional
from pathlib import Path
from google.oauth2 import service_account
from googleapiclient.discovery import build
from googleapiclient.errors import HttpError
import os

logger = logging.getLogger(__name__)

# Cache: "sheet" -> (timestamp, values)
_sheet_cache: Dict[str, tuple] = {}
CACHE_TTL_SECONDS = 60  # Refresh sheet data at most every 60 seconds


class SheetsService:
    """Service for interacting with Google Sheets to fetch GP violation data."""
    
    def __init__(self):
        self.spreadsheet_id = os.getenv("GOOGLE_SHEETS_ID")
        # Get the absolute path to the backend directory
        backend_dir = Path(__file__).parent.parent.parent
        default_creds_path = backend_dir / "data" / "google_credentials.json"
        self.credentials_path = os.getenv("GOOGLE_CREDENTIALS_PATH", str(default_creds_path))
        self._service = None
        self._initialized = False
        
    def _initialize(self):
        """Initialize the Google Sheets service."""
        if self._initialized:
            return
            
        try:
            if not self.spreadsheet_id:
                logger.warning("⚠️ GOOGLE_SHEETS_ID not set in environment variables")
                return
            
            logger.info(f"🔍 Checking for Google credentials at: {self.credentials_path}")
            
            # Check if credentials file exists
            creds_path = Path(self.credentials_path)
            if not creds_path.exists():
                logger.warning(f"⚠️ Google credentials file not found at {self.credentials_path}")
                logger.info(f"   Absolute path checked: {creds_path.absolute()}")
                return
            
            logger.info(f"📄 Found credentials file at: {creds_path.absolute()}")
            
            # Load credentials
            credentials = service_account.Credentials.from_service_account_file(
                str(creds_path.absolute()),
                scopes=['https://www.googleapis.com/auth/spreadsheets.readonly']
            )
            
            # Build the service
            self._service = build('sheets', 'v4', credentials=credentials)
            self._initialized = True
            logger.info("✅ Google Sheets service initialized successfully")
            
        except Exception as e:
            logger.error(f"❌ Failed to initialize Google Sheets service: {e}")
            self._service = None
    
    def get_gp_violations_for_month(
        self,
        year: int,
        month: int,
        branch: Optional[str] = None,
        department: Optional[str] = None
    ) -> int:
        """
        Get count of GP threshold violations for a specific time period.
        Reads from Date and Branch columns - any row in the given month/year matches.
        """
        if not self._initialized:
            self._initialize()
            
        if not self._service:
            logger.warning("⚠️ Google Sheets service not available - returning 0 violations")
            return 0
        
        try:
            # Each branch+department has its own tab (e.g. "Busselton Air", "Bunbury Electrics", "Mandurah Air")
            if branch and department:
                sheet_name = f"{branch} {department}"
            else:
                sheet_name = os.getenv("GOOGLE_SHEETS_TAB", "Bunbury Electrics")

            now = time.time()
            cache_key = sheet_name
            if cache_key in _sheet_cache:
                cached_at, values = _sheet_cache[cache_key]
                if now - cached_at < CACHE_TTL_SECONDS:
                    pass
                else:
                    values = None
            else:
                values = None

            if values is None:
                range_name = f"{sheet_name}!A:Z"
                result = self._service.spreadsheets().values().get(
                    spreadsheetId=self.spreadsheet_id,
                    range=range_name,
                    valueRenderOption="FORMATTED_VALUE"
                ).execute()
                values = result.get('values', [])
                _sheet_cache[cache_key] = (now, values)
            
            if not values:
                logger.info("No data found in Google Sheet")
                return 0
            
            # Find Date and Branch column indices from header row
            header = values[0]
            date_col = None
            branch_col = None
            for i, cell in enumerate(header):
                cell_lower = str(cell).strip().lower()
                if cell_lower == 'date':
                    date_col = i
                elif cell_lower == 'branch':
                    branch_col = i
                if date_col is not None and branch_col is not None:
                    break
            
            # Fallback: assume A=Date, B=Branch
            if date_col is None:
                date_col = 0
            if branch_col is None:
                branch_col = 1
            
            # Skip first row only if it looks like a header (contains both "date" and "branch")
            header_lower = [str(c).lower() for c in header]
            has_header = 'date' in header_lower and 'branch' in header_lower
            data_rows = values[1:] if has_header else values
            count = 0
            
            # Date formats - American (MM/DD/YY) first since Zapier feeds that format
            date_formats = [
                "%m/%d/%y %I:%M %p",   # 02/07/26 2:39 AM = Feb 7, 2026
                "%m/%d/%y %H:%M",
                "%m/%d/%Y %I:%M %p",
                "%m/%d/%Y %H:%M",
                "%m/%d/%y",
                "%m/%d/%Y",
                "%d/%m/%y %I:%M %p",
                "%d/%m/%y %H:%M",
                "%d/%m/%Y %I:%M %p",
                "%d/%m/%Y %H:%M",
                "%d/%m/%y",
                "%d/%m/%Y",
                "%Y-%m-%d %H:%M:%S",
                "%Y-%m-%d",
                "%Y-%m-%dT%H:%M:%S",
                "%d-%m-%Y",
                "%m-%d-%Y",
            ]
            
            for row in data_rows:
                if len(row) <= max(date_col, branch_col):
                    continue
                
                date_str = str(row[date_col]).strip()
                branch_dept = str(row[branch_col]).strip()
                
                if not date_str or not branch_dept:
                    continue
                
                # Parse date
                parsed_date = None
                for fmt in date_formats:
                    try:
                        parsed_date = datetime.strptime(date_str, fmt)
                        break
                    except (ValueError, TypeError):
                        continue
                
                # Handle Excel serial date (numbers from UNFORMATTED_VALUE)
                if parsed_date is None and date_str.replace('.', '').replace('-', '').isdigit():
                    try:
                        serial = float(date_str)
                        # Excel serial: days since 1899-12-30
                        parsed_date = datetime(1899, 12, 30) + timedelta(days=serial)
                    except (ValueError, TypeError):
                        pass
                
                if parsed_date is None:
                    continue
                
                # Must be in the requested month/year
                if parsed_date.year != year or parsed_date.month != month:
                    continue
                
                # Match branch and department (tab is already branch+department specific; still filter by row if Branch column present)
                branch_dept_lower = branch_dept.lower()
                if branch and department:
                    branch_match = branch_dept.startswith(branch)
                    dept_match = False
                    if department == "Air":
                        dept_match = "air" in branch_dept_lower
                    elif department == "Electrics":
                        dept_match = "elect" in branch_dept_lower
                    elif department == "Commercial":
                        dept_match = "commercial" in branch_dept_lower
                    elif department == "Solar":
                        dept_match = "solar" in branch_dept_lower
                    else:
                        dept_match = department.lower() in branch_dept_lower
                    if not (branch_match and dept_match):
                        continue
                elif branch:
                    if not branch_dept.startswith(branch):
                        continue
                elif department:
                    if department == "Air":
                        if "air" not in branch_dept_lower:
                            continue
                    elif department == "Electrics":
                        if "elect" not in branch_dept_lower:
                            continue
                    elif department == "Commercial":
                        if "commercial" not in branch_dept_lower:
                            continue
                    elif department == "Solar":
                        if "solar" not in branch_dept_lower:
                            continue
                    else:
                        if department.lower() not in branch_dept_lower:
                            continue
                
                count += 1
            
            if count == 0 and data_rows:
                first = data_rows[0]
                sample_date = first[date_col] if len(first) > date_col else 'N/A'
                sample_branch = first[branch_col] if len(first) > branch_col else 'N/A'
                logger.debug(f"Sheet has {len(data_rows)} rows. Sample: date='{sample_date}' branch='{sample_branch}' (cols: date={date_col}, branch={branch_col})")
            
            logger.debug(f"Found {count} GP violations for {branch or 'all'} {department or 'all'} in {month}/{year}")
            return count

        except HttpError as e:
            logger.error(f"❌ Google Sheets API error: {e}")
            return 0
        except Exception as e:
            logger.error(f"❌ Error fetching GP violations: {e}", exc_info=True)
            return 0

    def get_gp_violations_for_date_range(
        self,
        from_date: date,
        to_date: date,
        branch: Optional[str] = None,
        department: Optional[str] = None
    ) -> int:
        """
        Get count of GP threshold violations within a date range (inclusive).
        Reads from Date and Branch columns - any row with date in [from_date, to_date] matches.
        """
        if not self._initialized:
            self._initialize()

        if not self._service:
            logger.warning("⚠️ Google Sheets service not available - returning 0 violations")
            return 0

        try:
            if branch and department:
                sheet_name = f"{branch} {department}"
            else:
                sheet_name = os.getenv("GOOGLE_SHEETS_TAB", "Bunbury Electrics")

            now = time.time()
            cache_key = sheet_name
            if cache_key in _sheet_cache:
                cached_at, values = _sheet_cache[cache_key]
                if now - cached_at < CACHE_TTL_SECONDS:
                    pass
                else:
                    values = None
            else:
                values = None

            if values is None:
                range_name = f"{sheet_name}!A:Z"
                result = self._service.spreadsheets().values().get(
                    spreadsheetId=self.spreadsheet_id,
                    range=range_name,
                    valueRenderOption="FORMATTED_VALUE"
                ).execute()
                values = result.get('values', [])
                _sheet_cache[cache_key] = (now, values)

            if not values:
                return 0

            header = values[0]
            date_col = None
            branch_col = None
            for i, cell in enumerate(header):
                cell_lower = str(cell).strip().lower()
                if cell_lower == 'date':
                    date_col = i
                elif cell_lower == 'branch':
                    branch_col = i
                if date_col is not None and branch_col is not None:
                    break
            if date_col is None:
                date_col = 0
            if branch_col is None:
                branch_col = 1

            header_lower = [str(c).lower() for c in header]
            has_header = 'date' in header_lower and 'branch' in header_lower
            data_rows = values[1:] if has_header else values
            count = 0

            date_formats = [
                "%m/%d/%y %I:%M %p", "%m/%d/%y %H:%M", "%m/%d/%Y %I:%M %p", "%m/%d/%Y %H:%M",
                "%m/%d/%y", "%m/%d/%Y", "%d/%m/%y %I:%M %p", "%d/%m/%y %H:%M",
                "%d/%m/%Y %I:%M %p", "%d/%m/%Y %H:%M", "%d/%m/%y", "%d/%m/%Y",
                "%Y-%m-%d %H:%M:%S", "%Y-%m-%d", "%Y-%m-%dT%H:%M:%S", "%d-%m-%Y", "%m-%d-%Y",
            ]

            for row in data_rows:
                if len(row) <= max(date_col, branch_col):
                    continue
                date_str = str(row[date_col]).strip()
                branch_dept = str(row[branch_col]).strip()
                if not date_str or not branch_dept:
                    continue
                parsed_date = None
                for fmt in date_formats:
                    try:
                        parsed_date = datetime.strptime(date_str, fmt)
                        break
                    except (ValueError, TypeError):
                        continue
                if parsed_date is None and date_str.replace('.', '').replace('-', '').isdigit():
                    try:
                        serial = float(date_str)
                        parsed_date = datetime(1899, 12, 30) + timedelta(days=serial)
                    except (ValueError, TypeError):
                        pass
                if parsed_date is None:
                    continue
                row_date = parsed_date.date() if hasattr(parsed_date, 'date') else date(parsed_date.year, parsed_date.month, parsed_date.day)
                if row_date < from_date or row_date > to_date:
                    continue
                branch_dept_lower = branch_dept.lower()
                if branch and department:
                    branch_match = branch_dept.startswith(branch)
                    dept_match = False
                    if department == "Air":
                        dept_match = "air" in branch_dept_lower
                    elif department == "Electrics":
                        dept_match = "elect" in branch_dept_lower
                    elif department == "Commercial":
                        dept_match = "commercial" in branch_dept_lower
                    elif department == "Solar":
                        dept_match = "solar" in branch_dept_lower
                    else:
                        dept_match = department.lower() in branch_dept_lower
                    if not (branch_match and dept_match):
                        continue
                elif branch:
                    if not branch_dept.startswith(branch):
                        continue
                elif department:
                    if department == "Air" and "air" not in branch_dept_lower:
                        continue
                    elif department == "Electrics" and "elect" not in branch_dept_lower:
                        continue
                    elif department == "Commercial" and "commercial" not in branch_dept_lower:
                        continue
                    elif department == "Solar" and "solar" not in branch_dept_lower:
                        continue
                    elif department.lower() not in branch_dept_lower:
                        continue
                count += 1

            return count
        except HttpError as e:
            logger.error(f"❌ Google Sheets API error: {e}")
            return 0
        except Exception as e:
            logger.error(f"❌ Error fetching GP violations: {e}", exc_info=True)
            return 0

    def get_combined_violations_for_date_range(
        self, from_date: date, to_date: date, branch: str
    ) -> int:
        """Combined GP violations for all departments in a branch within date range."""
        air_count = self.get_gp_violations_for_date_range(from_date, to_date, branch, "Air")
        electrics_count = self.get_gp_violations_for_date_range(from_date, to_date, branch, "Electrics")
        return air_count + electrics_count

    def get_combined_violations(
        self,
        year: int,
        month: int,
        branch: str
    ) -> int:
        """
        Get combined count of GP violations for all departments in a branch.
        
        Args:
            year: Year (e.g., 2026)
            month: Month (1-12)
            branch: Branch name (e.g., "Bunbury", "Busselton", "Mandurah")
            
        Returns:
            Total count of GP violations across Air and Electrics departments
        """
        air_count = self.get_gp_violations_for_month(year, month, branch, "Air")
        electrics_count = self.get_gp_violations_for_month(year, month, branch, "Electrics")
        return air_count + electrics_count

# Singleton instance
sheets_service = SheetsService()
