<?php

namespace App\Services;

use Symfony\Component\DomCrawler\Crawler;

/**
 * HTML Parsing Helper for web scraping.
 *
 * Provides utility methods for extracting common elements from scraped pages.
 * Works seamlessly with Goutte/DomCrawler for professional parsing.
 *
 * @author APK Center Dev Team
 * @version 1.0
 */
class HTMLParseHelper
{
    /**
     * Extract text safely from crawler, with fallback.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @param mixed $default Default value if not found
     * @return mixed
     */
    public static function text(Crawler $crawler, string $selector, $default = null)
    {
        try {
            $element = $crawler->filter($selector);
            if ($element->count() > 0) {
                return trim($element->text());
            }
        } catch (\Exception $e) {
            \Log::debug("Error extracting text from selector '$selector': " . $e->getMessage());
        }

        return $default;
    }

    /**
     * Extract attribute safely from crawler.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @param string $attribute Attribute name
     * @param mixed $default Default value if not found
     * @return mixed
     */
    public static function attr(Crawler $crawler, string $selector, string $attribute, $default = null)
    {
        try {
            $element = $crawler->filter($selector);
            if ($element->count() > 0) {
                return $element->attr($attribute) ?? $default;
            }
        } catch (\Exception $e) {
            \Log::debug("Error extracting attribute '$attribute' from '$selector': " . $e->getMessage());
        }

        return $default;
    }

    /**
     * Extract all texts matching selector into array.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @return array
     */
    public static function allText(Crawler $crawler, string $selector): array
    {
        $results = [];

        try {
            $crawler->filter($selector)->each(function (Crawler $node) use (&$results) {
                $text = trim($node->text());
                if ($text) {
                    $results[] = $text;
                }
            });
        } catch (\Exception $e) {
            \Log::debug("Error extracting multiple texts from selector '$selector': " . $e->getMessage());
        }

        return $results;
    }

    /**
     * Extract all links (href) matching selector into array.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @return array
     */
    public static function allLinks(Crawler $crawler, string $selector): array
    {
        $results = [];

        try {
            $crawler->filter($selector)->each(function (Crawler $node) use (&$results) {
                $href = $node->attr('href');
                if ($href) {
                    $results[] = $href;
                }
            });
        } catch (\Exception $e) {
            \Log::debug("Error extracting links from selector '$selector': " . $e->getMessage());
        }

        return $results;
    }

    /**
     * Extract structured data (JSON-LD) from page.
     *
     * @param Crawler $crawler
     * @param string $type Type of structured data to find (e.g., 'SoftwareApplication')
     * @return array|null
     */
    public static function structuredData(Crawler $crawler, string $type = ''): ?array
    {
        try {
            $scripts = $crawler->filter('script[type="application/ld+json"]');

            foreach ($scripts as $script) {
                $json = json_decode($script->textContent, true);

                if ($json && is_array($json)) {
                    // Check if this is the right type
                    if (!$type || ($json['@type'] ?? '') === $type) {
                        return $json;
                    }

                    // Check in @graph for nested structures
                    if (isset($json['@graph'])) {
                        foreach ($json['@graph'] as $item) {
                            if (($item['@type'] ?? '') === $type) {
                                return $item;
                            }
                        }
                    }
                }
            }
        } catch (\Exception $e) {
            \Log::debug("Error extracting structured data: " . $e->getMessage());
        }

        return null;
    }

    /**
     * Check if crawler has elements matching selector.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @return bool
     */
    public static function has(Crawler $crawler, string $selector): bool
    {
        try {
            return $crawler->filter($selector)->count() > 0;
        } catch (\Exception $e) {
            return false;
        }
    }

    /**
     * Count elements matching selector.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @return int
     */
    public static function count(Crawler $crawler, string $selector): int
    {
        try {
            return $crawler->filter($selector)->count();
        } catch (\Exception $e) {
            return 0;
        }
    }

    /**
     * Extract first matching element as new crawler.
     *
     * Useful for drilling down into nested structures.
     *
     * @param Crawler $crawler
     * @param string $selector CSS selector
     * @return Crawler|null
     */
    public static function first(Crawler $crawler, string $selector): ?Crawler
    {
        try {
            $element = $crawler->filter($selector)->first();
            return $element->count() > 0 ? $element : null;
        } catch (\Exception $e) {
            return null;
        }
    }

    /**
     * Normalize URL (resolve relative URLs, clean up).
     *
     * @param string $url URL or path
     * @param string $baseUrl Base URL for relative URLs
     * @return string|null
     */
    public static function normalizeUrl(string $url, string $baseUrl = ''): ?string
    {
        if (empty($url)) {
            return null;
        }

        // Already absolute
        if (filter_var($url, FILTER_VALIDATE_URL)) {
            return $url;
        }

        // Relative URL and we have base
        if ($baseUrl && filter_var($baseUrl, FILTER_VALIDATE_URL)) {
            // Simple relative URL joining
            $baseParts = parse_url($baseUrl);
            $scheme = $baseParts['scheme'] ?? 'https';
            $host = $baseParts['host'] ?? '';

            if ($url[0] === '/') {
                return "$scheme://$host$url";
            } else {
                $path = dirname($baseParts['path'] ?? '/');
                return "$scheme://$host$path/$url";
            }
        }

        return null;
    }

    /**
     * Clean HTML entities and extra whitespace.
     *
     * @param string $text
     * @return string
     */
    public static function cleanText(string $text): string
    {
        $text = html_entity_decode($text, ENT_QUOTES, 'UTF-8');
        $text = preg_replace('/\s+/', ' ', $text);
        return trim($text);
    }

    /**
     * Extract number from string (e.g., "10M", "1.5K" -> numeric value).
     *
     * @param string $text Text containing a number
     * @return float|null
     */
    public static function extractNumber(string $text): ?float
    {
        if (preg_match('/(\d+(?:\.\d+)?)/i', $text, $matches)) {
            return (float)$matches[1];
        }

        return null;
    }

    /**
     * Extract numbers with multipliers (e.g., "50M" -> 50000000).
     *
     * @param string $text
     * @param array $multipliers Multiplier map (K, M, B, etc.)
     * @return float|null
     */
    public static function extractNumberWithMultiplier(string $text, array $multipliers = []): ?float
    {
        $defaultMultipliers = [
            'K' => 1000,
            'M' => 1000000,
            'B' => 1000000000,
            'T' => 1000000000000,
        ];

        $multipliers = array_merge($defaultMultipliers, $multipliers);

        if (preg_match('/(\d+(?:\.\d+)?)\s*([' . implode('', array_keys($multipliers)) . '])?/i', $text, $matches)) {
            $num = (float)$matches[1];
            $mult = strtoupper($matches[2] ?? '');

            if ($mult && isset($multipliers[$mult])) {
                return $num * $multipliers[$mult];
            }

            return $num;
        }

        return null;
    }
}
