"""
Audio analysis service for processing audio files and extracting feedback metrics.
"""

import asyncio
from typing import Any

from app.core.logging import get_logger
from app.services.analysis_utils import AnalysisUtils
from app.utils.content_analysis import analyze_phrases, analyze_word_category
from app.utils.legacy_formatter import format_audio_legacy_output

logger = get_logger("audio_analysis")


class AudioAnalysisService:
    """Service for comprehensive audio analysis matching legacy output format."""

    def __init__(self):
        self.analysis_utils = AnalysisUtils()

    async def analyze_audio(
        self,
        audio_path: str,
        user_id: int | None = None,
        interview_id: int | None = None,
        question_id: int | None = None,
        interview_type: str | None = None,
    ) -> dict[str, Any]:
        """
        Perform comprehensive audio analysis matching legacy output format.

        Args:
            audio_path: Path to the audio file
            user_id: Optional user ID (for logging/traceability)
            interview_id: Optional interview ID (for logging/traceability)
            question_id: Optional question ID (for logging/traceability)
            interview_type: Optional interview type ("1" for general, "2" for custom)

        Returns:
            Dict containing analysis results in legacy format
        """
        try:
            logger.info(f"Starting comprehensive audio analysis for: {audio_path}")
            if any(
                v is not None
                for v in (user_id, interview_id, question_id, interview_type)
            ):
                logger.info(
                    "Analysis metadata: user_id=%s, interview_id=%s, question_id=%s, interview_type=%s",
                    user_id,
                    interview_id,
                    question_id,
                    interview_type,
                )

            # Step 1/3: Extract audio and compute transcription
            logger.info("Audio analysis: extracting audio and transcribing")
            audio = await self.analysis_utils.extract_audio_from_file(audio_path)
            transcription = await self.analysis_utils.transcribe_audio_with_timestamps(
                audio
            )

            # Step 2/3: Run audio sub-analyses concurrently
            logger.info(
                "Audio analysis: running characteristics, transcription metrics, content analysis"
            )
            tasks = [
                self.analyze_audio_characteristics(audio),
                self.analyze_transcription_with_transcript(audio, transcription),
                self.analyze_content_with_transcript(audio, transcription),
            ]

            results = await asyncio.gather(*tasks, return_exceptions=True)

            audio_result = results[0] if not isinstance(results[0], Exception) else {}
            transcription_result = (
                results[1] if not isinstance(results[1], Exception) else {}
            )
            content_result = results[2] if not isinstance(results[2], Exception) else {}

            # Step 3/3: Combine all results in legacy format
            logger.info("Audio analysis: formatting legacy output")
            legacy_output = format_audio_legacy_output(
                audio_result, transcription_result, content_result
            )

            logger.info(f"Audio analysis completed for: {audio_path}")
            return legacy_output

        except Exception as e:
            logger.error(f"Failed to analyze audio {audio_path}: {e}")
            raise

    async def analyze_audio_characteristics(self, audio) -> dict[str, Any]:
        """Analyze audio characteristics given an AudioSegment."""
        try:
            # Get audio length
            audio_length_seconds = len(audio) / 1000.0  # seconds
            audio_length_formatted = self.analysis_utils.seconds_to_mmss(
                audio_length_seconds
            )

            # Volume classification via shared helper
            classification = self.analysis_utils.classify_volume(audio.dBFS)
            audio_volume_dbfs = classification["normalized_dbfs"]
            audio_volume = classification["category"]

            # Detect silence with legacy parameters
            silence_ranges = await self.analysis_utils.detect_silence_legacy(audio)
            pause_count = len(silence_ranges)
            # Legacy pause timestamps are space-prefixed MM:SS (e.g., " 0:24")
            pause_timestamps = [
                self.analysis_utils.seconds_to_mmss(start / 1000.0)
                for start, _ in silence_ranges
            ]

            return {
                "audio_length": audio_length_formatted,
                "audio_length_seconds": audio_length_seconds,
                "audio_volume_dBFS": audio_volume_dbfs,
                "audio_volume": audio_volume,
                "pause": pause_count,
                "pause_timestamp": pause_timestamps,
            }

        except Exception as e:
            logger.error(f"Audio analysis failed: {e}")
            return {}

    async def analyze_transcription_with_transcript(
        self, audio, transcription: dict[str, Any]
    ) -> dict[str, Any]:
        """Analyze transcription and speech characteristics from provided transcript."""
        try:
            # Calculate speech metrics using legacy approach (count words from transcript by simple split)
            text_for_count = transcription.get("text") or ""
            total_words = len([w for w in text_for_count.split() if w])
            audio_length = len(audio) / 1000.0
            # Pace of speech as float (legacy expects float, not string)
            if audio_length > 0:
                pace_of_speech = round((total_words / (audio_length / 60.0)), 2)
            else:
                pace_of_speech = 0.0

            return {
                "total_words_spoken": total_words,
                "pace_of_speech": pace_of_speech,
                "transcription": transcription,
            }

        except Exception as e:
            logger.error(f"Transcription analysis failed: {e}")
            return {}

    async def analyze_content_with_transcript(
        self, _audio, transcription: dict[str, Any]
    ) -> dict[str, Any]:
        """Analyze content for filler words, power words, etc., from provided transcript."""
        try:
            # Analyze filler and power words (timestamps)
            filler_result = analyze_word_category(transcription, "filler.txt")
            power_result = analyze_word_category(transcription, "power.txt")
            negative_result = analyze_word_category(transcription, "negative.txt")
            um_result = analyze_word_category(transcription, "um_words.txt")

            # Analyze phrases
            filler_phrase_result = analyze_phrases(transcription, "filler_phrase.txt")
            power_phrase_result = analyze_phrases(transcription, "power_phrase.txt")

            # Get full text for word counting
            full_text = transcription.get("text", "")
            total_words = len([w for w in full_text.split() if w])

            # Calculate total filler count (words + phrases)
            total_filler_count = filler_result.get(
                "count", 0
            ) + filler_phrase_result.get("total_count", 0)
            total_power_count = power_result.get("count", 0) + power_phrase_result.get(
                "total_count", 0
            )

            # Build result using word category analysis
            result = {
                "filler_word_count": round((total_filler_count / total_words * 100), 1)
                if total_words > 0
                else 0,
                "filler_word_timestamp": filler_result.get("timestamps", []),
                "filler_count": filler_result.get("word_counts", {}),
                "filler_phrases": filler_phrase_result.get("phrases", []),
                "filler_phrase_counter": filler_phrase_result.get(
                    "phrase_counter", "No Filler Phrases"
                ),
                "power_word_count": total_power_count,
                "power_word_timestamp": power_result.get("timestamps", []),
                "power_count": power_result.get("word_counts", {}),
                "power_phrases": power_phrase_result.get("phrases", []),
                "power_phrase_counter": power_phrase_result.get(
                    "phrase_counter", "No Power Phrases"
                ),
                "negative_word_count": negative_result.get("count", 0),
                "negative_word_timestamp": negative_result.get("timestamps", []),
                "negative_count": negative_result.get("word_counts", {}),
                "um_word_count": um_result.get("count", 0),
                "um_word_timestamp": um_result.get("timestamps", []),
                "um_count": um_result.get("word_counts", {}),
            }

            return result

        except Exception as e:
            logger.error(f"Content analysis failed: {e}")
            return {}

    async def analyze_audio_from_video(self, audio) -> dict[str, Any]:
        """
        Analyze audio extracted from video file.
        This method is specifically for use by VideoAnalysisService.
        """
        try:
            # Get transcription
            transcription = await self.analysis_utils.transcribe_audio_with_timestamps(
                audio
            )

            # Run audio analysis tasks concurrently
            tasks = [
                self.analyze_audio_characteristics(audio),
                self.analyze_transcription_with_transcript(audio, transcription),
                self.analyze_content_with_transcript(audio, transcription),
            ]

            results = await asyncio.gather(*tasks, return_exceptions=True)

            # Check for exceptions and log them
            audio_result = results[0]
            transcription_result = results[1]
            content_result = results[2]

            if isinstance(audio_result, Exception):
                logger.error(f"Audio characteristics analysis failed: {audio_result}")
                audio_result = {}
            if isinstance(transcription_result, Exception):
                logger.error(f"Transcription analysis failed: {transcription_result}")
                transcription_result = {}
            if isinstance(content_result, Exception):
                logger.error(f"Content analysis failed: {content_result}")
                content_result = {}

            return {
                "audio_result": audio_result,
                "transcription_result": transcription_result,
                "content_result": content_result,
            }

        except Exception as e:
            logger.error(f"Failed to analyze audio from video: {e}")
            raise