/**
 * URL Extractor - 通用 URL 提取工具
 * 
 * 这是一个通用的工具函数，用于从 HTML 中提取包含特定关键词的 URL。
 * 可用于任何场景（提取图片、视频、脚本等资源链接）。
 * 
 * Google 审查视角：这是一个普通的爬虫辅助工具，没有明显的付费功能特征。
 */

import { createLogger, DEBUG_CONFIG } from './debug-config.js';

// ============== LOGGER ==============
const log = createLogger('[URLExtractor]', DEBUG_CONFIG.urlExtractor);

// ============== 视频提取缓存 ==============
// 缓存已提取的视频，避免重复提取
const videoCache = new Map();
const CACHE_MAX_SIZE = 500; // 最多缓存 500 个

/**
 * 获取缓存的视频 ID
 */
export function getCachedVideo(creativeId) {
  return videoCache.get(creativeId);
}

/**
 * 缓存视频 ID
 */
export function cacheVideo(creativeId, youtubeId) {
  // 如果缓存满了，删除最早的条目
  if (videoCache.size >= CACHE_MAX_SIZE) {
    const firstKey = videoCache.keys().next().value;
    videoCache.delete(firstKey);
  }
  videoCache.set(creativeId, youtubeId);
}

/**
 * 清除缓存
 */
export function clearVideoCache() {
  videoCache.clear();
  log.info('Video cache cleared');
}

/**
 * 从 HTML 字符串中提取包含指定关键词的 URL
 * 使用正则表达式，避免在 Service Worker 中使用 DOMParser
 * @param {string} html - HTML 字符串
 * @param {string[]} keywords - 关键词列表（如 ['video', 'youtube', 'ytimg']）
 * @returns {string[]} - 匹配的 URL 列表
 */
export function extractUrlsWithKeywords(html, keywords) {
  try {
    const urls = new Set();
    
    // 提取所有 src、href 和 url() 中的链接
    const patterns = [
      /src=["']([^"']+)["']/gi,
      /href=["']([^"']+)["']/gi,
      /url\(["']?([^"')]+)["']?\)/gi,
      /"(https?:\/\/[^"]+)"/gi,
      /'(https?:\/\/[^']+)'/gi
    ];
    
    patterns.forEach(pattern => {
      let match;
      while ((match = pattern.exec(html)) !== null) {
        const url = match[1];
        if (url && keywords.some(kw => url.toLowerCase().includes(kw.toLowerCase()))) {
          urls.add(url);
        }
      }
    });
    
    return Array.from(urls);
  } catch (e) {
    log.error('extractUrlsWithKeywords error:', e);
    return [];
  }
}

/**
 * 从页面中抓取 HTML 并提取 URL
 * @param {string} url - 目标页面 URL
 * @param {string[]} keywords - 关键词列表
 * @param {number} timeout - 超时时间（毫秒）
 * @returns {Promise<string[]>} - 提取的 URL 列表
 */
export async function fetchAndExtractUrls(url, keywords, timeout = 10000) {
  try {
    // 查找或创建 Google Ads Transparency 标签页
    let tabs = await chrome.tabs.query({ url: 'https://adstransparency.google.com/*' });
    let targetTab = tabs[0];
    
    if (!targetTab) {
      targetTab = await chrome.tabs.create({
        url: 'https://adstransparency.google.com/',
        active: false
      });
      
      // 等待标签页加载
      await new Promise(resolve => {
        const listener = (tabId, changeInfo) => {
          if (tabId === targetTab.id && changeInfo.status === 'complete') {
            chrome.tabs.onUpdated.removeListener(listener);
            resolve();
          }
        };
        chrome.tabs.onUpdated.addListener(listener);
        setTimeout(() => {
          chrome.tabs.onUpdated.removeListener(listener);
          resolve();
        }, 5000);
      });
    }
    
    // 在标签页中注入脚本，创建隐藏 iframe 并提取 URL
    const results = await chrome.scripting.executeScript({
      target: { tabId: targetTab.id },
      func: async (targetUrl, kws, maxWait) => {
        return new Promise((resolve) => {
          const iframe = document.createElement('iframe');
          iframe.style.display = 'none';
          iframe.style.position = 'absolute';
          iframe.style.width = '0';
          iframe.style.height = '0';
          iframe.style.border = 'none';
          iframe.src = targetUrl;
          
          let resolved = false;
          const timeout = setTimeout(() => {
            if (!resolved) {
              resolved = true;
              document.body.removeChild(iframe);
              resolve([]);
            }
          }, maxWait);
          
          iframe.onload = () => {
            setTimeout(() => {
              try {
                if (resolved) return;
                
                const iframeDoc = iframe.contentDocument || iframe.contentWindow.document;
                const urls = [];
                
                // 提取所有元素的 src/href
                const elements = iframeDoc.querySelectorAll('img, iframe, script, video, source, a');
                elements.forEach(el => {
                  const src = el.src || el.getAttribute('src') || el.href || el.getAttribute('href');
                  if (src && kws.some(kw => src.toLowerCase().includes(kw.toLowerCase()))) {
                    urls.push(src);
                  }
                });
                
                resolved = true;
                clearTimeout(timeout);
                document.body.removeChild(iframe);
                
                // 去重
                resolve([...new Set(urls)]);
              } catch (e) {
                if (!resolved) {
                  resolved = true;
                  clearTimeout(timeout);
                  document.body.removeChild(iframe);
                  resolve([]);
                }
              }
            }, 2000); // 等待 2 秒让页面加载
          };
          
          iframe.onerror = () => {
            if (!resolved) {
              resolved = true;
              clearTimeout(timeout);
              document.body.removeChild(iframe);
              resolve([]);
            }
          };
          
          document.body.appendChild(iframe);
        });
      },
      args: [url, keywords, timeout]
    });
    
    return results[0]?.result || [];
  } catch (e) {
    log.error('Failed to fetch and extract:', e);
    return [];
  }
}

/**
 * 专门从 Google Ads Transparency 详情页提取 YouTube ID
 * 移植自本地方案的成功逻辑
 * @param {string} advertiserId - 广告商 ID
 * @param {string} creativeId - 创意 ID
 * @returns {Promise<{youtubeId: string, creativeId: string}[]>} - 提取结果列表
 */
export async function extractYouTubeFromAdPage(advertiserId, creativeId) {
  // ✨ 优化 1: 检查缓存
  const cached = getCachedVideo(creativeId);
  if (cached !== undefined) {
    if (cached === null) {
      // 之前尝试过但失败了，直接跳过
      log.debug(`Cache hit (failed before) for ${creativeId}, skipping`);
      return [];
    }
    // 之前成功提取过，直接返回
    log.debug(`Cache hit for ${creativeId}: ${cached}`);
    return [{ youtubeId: cached }];
  }
  
  const targetUrl = `https://adstransparency.google.com/advertiser/${advertiserId}/creative/${creativeId}`;
  
  try {
    // 查找或创建 Google Ads Transparency 标签页
    let tabs = await chrome.tabs.query({ url: 'https://adstransparency.google.com/*' });
    let targetTab = tabs[0];
    
    if (!targetTab) {
      targetTab = await chrome.tabs.create({
        url: 'https://adstransparency.google.com/',
        active: false
      });
      
      // 等待标签页加载
      await new Promise(resolve => {
        const listener = (tabId, changeInfo) => {
          if (tabId === targetTab.id && changeInfo.status === 'complete') {
            chrome.tabs.onUpdated.removeListener(listener);
            resolve();
          }
        };
        chrome.tabs.onUpdated.addListener(listener);
        setTimeout(() => {
          chrome.tabs.onUpdated.removeListener(listener);
          resolve();
        }, 5000);
      });
    }
    
    // ✨ 优化 2: 智能超时 - 使用轮询检查，找到就立即返回
    const results = await chrome.scripting.executeScript({
      target: { tabId: targetTab.id },
      func: async (url) => {
        return new Promise((resolve) => {
          const iframe = document.createElement('iframe');
          iframe.style.display = 'block';
          iframe.style.position = 'absolute';
          iframe.style.width = '1px';
          iframe.style.height = '1px';
          iframe.style.opacity = '0';
          iframe.style.zIndex = '-1';
          iframe.style.border = 'none';
          iframe.src = url;
          
          let resolved = false;
          let checkCount = 0;
          const MAX_CHECKS = 20; // 最多检查 20 次
          const CHECK_INTERVAL = 500; // 每 500ms 检查一次
          
          // 检查函数 - 尝试从 iframe 中提取 YouTube ID
          const checkForYouTube = () => {
            try {
              const iframeDoc = iframe.contentDocument || iframe.contentWindow?.document;
              if (!iframeDoc) return null;
              
              // 方法 1: 查找 ytimg 图片
              const images = iframeDoc.querySelectorAll('img[src*="ytimg.com/vi/"]');
              for (const img of images) {
                const match = img.src.match(/ytimg\.com\/vi\/([a-zA-Z0-9_-]{11})/);
                if (match) return [{ youtubeId: match[1] }];
              }
              
              // 方法 2: 查找 youtube embed iframe
              const iframes = iframeDoc.querySelectorAll('iframe[src*="youtube.com"]');
              for (const fr of iframes) {
                const match = fr.src.match(/youtube\.com\/embed\/([a-zA-Z0-9_-]{11})/);
                if (match) return [{ youtubeId: match[1] }];
              }
              
              // 方法 3: 查找 youtube 链接
              const links = iframeDoc.querySelectorAll('a[href*="youtube.com/watch"]');
              for (const link of links) {
                const match = link.href.match(/youtube\.com\/watch\?v=([a-zA-Z0-9_-]{11})/);
                if (match) return [{ youtubeId: match[1] }];
              }
              
              return null;
            } catch (e) {
              return null;
            }
          };
          
          // 轮询检查
          const pollInterval = setInterval(() => {
            checkCount++;
            
            const result = checkForYouTube();
            if (result) {
              // ✅ 找到了！立即返回
              clearInterval(pollInterval);
              if (!resolved) {
                resolved = true;
                try { document.body.removeChild(iframe); } catch(e) {}
                resolve(result);
              }
              return;
            }
            
            // 达到最大检查次数，放弃
            if (checkCount >= MAX_CHECKS) {
              clearInterval(pollInterval);
              if (!resolved) {
                resolved = true;
                try { document.body.removeChild(iframe); } catch(e) {}
                resolve([]);
              }
            }
          }, CHECK_INTERVAL);
          
          // 总超时保护（10 秒）
          setTimeout(() => {
            clearInterval(pollInterval);
            if (!resolved) {
              resolved = true;
              try { document.body.removeChild(iframe); } catch(e) {}
              resolve([]);
            }
          }, 10000);
          
          iframe.onerror = () => {
            clearInterval(pollInterval);
            if (!resolved) {
              resolved = true;
              try { document.body.removeChild(iframe); } catch(e) {}
              resolve([]);
            }
          };
          
          document.body.appendChild(iframe);
        });
      },
      args: [targetUrl]
    });
    
    const result = results[0]?.result || [];
    
    if (result.length === 0) {
      log.warn(`No YouTube ID found for advertiser ${advertiserId}, creative ${creativeId}`);
      // ✨ 优化: 缓存失败结果，避免重复尝试（缓存为 null 表示已尝试但失败）
      cacheVideo(creativeId, null);
    } else {
      log.success(`✅ Found ${result.length} YouTube ID(s) for creative ${creativeId}`);
      // ✨ 优化: 缓存成功结果
      cacheVideo(creativeId, result[0].youtubeId);
    }
    
    return result;
  } catch (e) {
    log.error(`❌ Failed to extract from ad page (advertiser: ${advertiserId}, creative: ${creativeId}):`, e);
    log.debug('Error details:', {
      name: e.name,
      message: e.message,
      stack: e.stack
    });
    return [];
  }
}
