﻿/**
 * Ads Insight Pro - Content Script
 * Runs on Google Ads Transparency Center pages
 * 
 * Purpose: Detect page info (domain/advertiser) and communicate with popup
 * API calls are now handled by Service Worker directly
 */

// ============== LOGGER ==============
const LOG_PREFIX = '[AdsInsightPro][Content]';
const log = {
  debug: (...args) => console.log(`%c${LOG_PREFIX}[DEBUG]`, 'color: #888', ...args),
  info: (...args) => console.log(`%c${LOG_PREFIX}[INFO]`, 'color: #4285f4; font-weight: bold', ...args),
  warn: (...args) => console.warn(`%c${LOG_PREFIX}[WARN]`, 'color: #fbbc04; font-weight: bold', ...args),
  error: (...args) => console.error(`%c${LOG_PREFIX}[ERROR]`, 'color: #ea4335; font-weight: bold', ...args),
  success: (...args) => console.log(`%c${LOG_PREFIX}[SUCCESS]`, 'color: #34a853; font-weight: bold', ...args)
};

// ============== INITIALIZATION ==============
log.info('Content script loaded');
log.debug('URL:', window.location.href);

// ============== MESSAGE HANDLERS ==============

browser.runtime.onMessage.addListener((message, _sender, sendResponse) => {
  // 只处理特定的消息类型，不拦截其他消息（如 OCR 消息）
  if (message.type === 'GET_PAGE_INFO') {
    log.debug('Received message:', message.type);
    const pageInfo = getPageInfo();
    log.debug('Sending page info:', pageInfo);
    sendResponse(pageInfo);
    return true;
  } 
  
  if (message.type === 'PING') {
    log.debug('Received PING');
    sendResponse({ success: true });
    return true;
  }
  
  // 不处理其他消息类型，让其他监听器处理
  return false;
});

// ============== PAGE INFO ==============

function getPageInfo() {
  const url = window.location.href;
  log.debug('Analyzing URL:', url);
  
  const result = {
    success: false,
    mode: null,
    domain: null,
    advertiserId: null,
    advertiserName: null,
    adsCount: 0,
    region: '',  // empty = "anywhere"
    url: url
  };
  
  try {
    const urlParams = new URLSearchParams(window.location.search);
    
    // Extract region from URL: ?region=anywhere or ?region=HK
    const regionParam = urlParams.get('region');
    if (regionParam && regionParam !== 'anywhere') {
      // Use the region code directly (e.g., "HK", "US", "DE")
      // This will be converted to human-readable name in the export
      result.region = regionParam.toUpperCase();
      log.debug('Detected region:', regionParam, '→', result.region);
    } else {
      result.region = 'ANYWHERE';  // "anywhere"
      log.debug('Using "anywhere" region');
    }
    
    // 🆕 Check single ad detail page: /advertiser/AR.../creative/CR...
    const singleAdMatch = url.match(/\/advertiser\/(AR\d+)\/creative\/(CR\d+)/);
    if (singleAdMatch) {
      result.success = true;
      result.mode = 'single_ad';
      result.advertiserId = singleAdMatch[1];
      result.creativeId = singleAdMatch[2];
      result.advertiserName = extractAdvertiserName();
      result.adsCount = 1;
      log.info('Detected single ad page:', result.advertiserId, result.creativeId);
      return result;
    }
    
    // Check advertiser page: /advertiser/AR123456789
    const advertiserMatch = url.match(/\/advertiser\/(AR\d+)/);
    if (advertiserMatch) {
      result.success = true;
      result.mode = 'advertiser';
      result.advertiserId = advertiserMatch[1];
      result.advertiserName = extractAdvertiserName();
      result.adsCount = extractAdsCount();
      log.info('Detected advertiser page:', result.advertiserId, result.advertiserName);
      return result;
    }
    
    // Check domain search: ?domain=amazon.de
    const domain = urlParams.get('domain');
    if (domain) {
      result.success = true;
      result.mode = 'domain';
      result.domain = domain;
      result.adsCount = extractAdsCount();
      log.info('Detected domain search:', domain);
      return result;
    }
    
    // 🆕 Check brand aggregation page (2+ advertiser links)
    const advertiserIds = extractAdvertiserIds();
    if (advertiserIds.length >= 2) {
      result.success = true;
      result.mode = 'brand';
      result.advertiserIds = advertiserIds;
      result.brandName = extractBrandName();
      result.adsCount = extractAdsCount();
      log.info('Detected brand aggregation page:', advertiserIds.length, 'accounts');
      log.info('Brand name extracted:', result.brandName);
      log.debug('Advertiser IDs:', advertiserIds);
      return result;
    }
    
    // Home page or other
    if (url.includes('adstransparency.google.com')) {
      result.success = true;
      result.mode = 'home';
      log.info('Detected home page');
      return result;
    }
    
  } catch (e) {
    log.error('Error getting page info:', e);
  }
  
  return result;
}

function extractAdsCount() {
  try {
    const pageText = document.body.innerText;
    
    // 多语言广告单词表（涵盖主流语言）
    // 英语: ads, 简中: 个广告, 繁中: 個廣告, 日语: 件の広告, 韩语: 개 광고
    // 德语: Anzeigen, 法语: annonces, 西语: anuncios, 葡语: anúncios
    // 俄语: объявлений/объявление, 阿语: إعلان, 意语: annunci, 荷语: advertenties
    // 波兰语: reklam, 土耳其语: reklam, 越南语: quảng cáo, 泰语: โฆษณา, 印尼语: iklan
    const adWords = [
      'ads?',           // 英语
      '个广告',         // 简中
      '個广告',         // 简中变体
      '个廣告',         // 混合
      '個廣告',         // 繁中
      '廣告',           // 繁中简写
      '广告',           // 简中简写
      '件の広告',       // 日语
      '広告',           // 日语简写
      '개\s*광고',     // 韩语
      '광고',           // 韩语简写
      'Anzeigen',       // 德语
      'annonces?',      // 法语
      'anuncios?',      // 西语
      'anúncios?',      // 葡语
      'объявлени\w*', // 俄语
      'إعلان\w*',      // 阿语
      'annunci',        // 意语
      'advertenties?',  // 荷语
      'reklam\w*',      // 波兰/土耳其
      'quảng\s*cáo',  // 越南语
      'โฆษณา',        // 泰语
      'iklan'           // 印尼
    ].join('|');
    
    // 通用模式 1: K/M 格式 (e.g., "~300K ads", "~1.5M 個廣告")
    const kmPattern = new RegExp(`~?([\\d.]+)\\s*([KMkm万])\\s*(?:[个個件]\\s*)?(?:${adWords})`, 'i');
    const kmMatch = pageText.match(kmPattern);
    if (kmMatch) {
      const num = parseFloat(kmMatch[1]);
      const unit = kmMatch[2].toUpperCase();
      let count;
      if (unit === 'M') {
        count = Math.round(num * 1000000);
      } else if (unit === '万' || kmMatch[2] === '万') {
        count = Math.round(num * 10000);
      } else {
        count = Math.round(num * 1000);
      }
      log.debug('Extracted ads count (K/M format):', count);
      return count;
    }
    
    // 通用模式 2: 纯数字格式 (e.g., "1,234 ads", "1.234 Anzeigen")
    // 支持逗号和点号作为千位分隔符
    const numPattern = new RegExp(`~?([\\d.,]+)\\s*(?:[个個件]\\s*)?(?:${adWords})`, 'i');
    const numMatch = pageText.match(numPattern);
    if (numMatch) {
      // 处理不同的数字格式: 1,234 (英语) 或 1.234 (德语/西语)
      let numStr = numMatch[1];
      // 如果同时有逗号和点号，判断哪个是千位分隔符
      if (numStr.includes(',') && numStr.includes('.')) {
        // 最后一个是小数点
        if (numStr.lastIndexOf('.') > numStr.lastIndexOf(',')) {
          numStr = numStr.replace(/,/g, '');
        } else {
          numStr = numStr.replace(/\./g, '').replace(',', '.');
        }
      } else if (numStr.includes('.') && numStr.indexOf('.') < numStr.length - 3) {
        // 点号作为千位分隔符 (e.g., 1.234)
        numStr = numStr.replace(/\./g, '');
      } else {
        numStr = numStr.replace(/,/g, '');
      }
      const count = parseInt(numStr);
      if (!isNaN(count) && count > 0) {
        log.debug('Extracted ads count (number format):', count);
        return count;
      }
    }
    
    // 备用方案: 查找页面上任何看起来像广告数量的大数字
    // 通常在页面顶部显示，格式为 ~XXXK 或 ~X.XM
    const fallbackMatch = pageText.match(/~([\d.]+)\s*([KMkm])/i);
    if (fallbackMatch) {
      const num = parseFloat(fallbackMatch[1]);
      const unit = fallbackMatch[2].toUpperCase();
      const count = unit === 'M' ? Math.round(num * 1000000) : Math.round(num * 1000);
      log.debug('Extracted ads count (fallback K/M):', count);
      return count;
    }
    
  } catch (e) {
    log.error('Error extracting ads count:', e);
  }
  return 0;
}

function extractAdvertiserName() {
  try {
    // 方法 1: 从 h1 标签提取
    const h1 = document.querySelector('h1');
    if (h1) {
      const name = h1.textContent.trim();
      if (name && !name.includes('Ads Transparency') && !name.includes('广告透明度') && !name.includes('Google')) {
        log.debug('Extracted advertiser name from h1:', name);
        return name;
      }
    }
    
    // 方法 2: 从页面标题提取
    const title = document.title;
    if (title) {
      // 处理格式如 "Adidas America, Inc - Google Ads Transparency Center"
      if (title.includes(' - ')) {
        const name = title.split(' - ')[0].trim();
        if (name && !name.includes('Ads Transparency') && !name.includes('Google')) {
          log.debug('Extracted advertiser name from title:', name);
          return name;
        }
      }
      // 处理格式如 "Adidas America, Inc | Google Ads Transparency Center"
      if (title.includes(' | ')) {
        const name = title.split(' | ')[0].trim();
        if (name && !name.includes('Ads Transparency') && !name.includes('Google')) {
          log.debug('Extracted advertiser name from title (pipe):', name);
          return name;
        }
      }
    }
    
    // 方法 3: 从页面中查找 advertiser-name 类
    const advertiserNameEl = document.querySelector('.advertiser-name, [class*="advertiser-name"]');
    if (advertiserNameEl) {
      const name = advertiserNameEl.textContent.trim();
      if (name) {
        log.debug('Extracted advertiser name from class:', name);
        return name;
      }
    }
    
    // 方法 4: 从 heading role 元素提取
    const heading = document.querySelector('[role="heading"]');
    if (heading) {
      const name = heading.textContent.trim();
      if (name && !name.includes('Ads Transparency') && !name.includes('Google') && name.length < 100) {
        log.debug('Extracted advertiser name from heading role:', name);
        return name;
      }
    }
    
    log.debug('Could not extract advertiser name');
  } catch (e) {
    log.error('Error extracting advertiser name:', e);
  }
  return null;
}

/**
 * 🆕 从页面提取所有唯一的 advertiser ID
 * @returns {string[]} 唯一的 advertiser ID 数组
 */
function extractAdvertiserIds() {
  try {
    const ids = new Set();
    const links = document.querySelectorAll('a[href*="/advertiser/"]');
    
    links.forEach(link => {
      // 🔧 CRITICAL FIX: Extract exactly 20 digits, ignore any trailing characters
      // Some URLs may have extra path segments like /creative/CR...
      const match = link.href.match(/\/advertiser\/(AR\d{20})(?:\/|$)/);
      if (match) {
        ids.add(match[1]);
      }
    });
    
    const result = Array.from(ids);
    log.debug('Extracted advertiser IDs:', result.length, result);
    return result;
  } catch (e) {
    log.error('Error extracting advertiser IDs:', e);
    return [];
  }
}

/**
 * 🆕 提取品牌名称（支持中文和聚合页面）
 * @returns {string} 品牌名称
 */
function extractBrandName() {
  try {
    // 方法 1: 从品牌聚合页面的标题提取（最优先）
    const multiAdvertiserTitle = document.querySelector('.multi-advertiser-title');
    if (multiAdvertiserTitle) {
      const name = multiAdvertiserTitle.textContent.trim();
      if (name && name.length > 1) {
        log.debug('Extracted brand name from multi-advertiser-title:', name);
        return name;
      }
    }
    
    // 方法 2: 从广告商名称提取（品牌聚合页面）
    const advertiserName = document.querySelector('.advertiser-name');
    if (advertiserName) {
      const name = advertiserName.textContent.trim();
      if (name && name.length > 1) {
        log.debug('Extracted brand name from advertiser-name:', name);
        return name;
      }
    }
    
    // 方法 3: 尝试从 h1 标签提取（中文名字可能只有2个字）
    const h1 = document.querySelector('h1');
    if (h1) {
      const name = h1.textContent.trim();
      if (name && !name.includes('Ads Transparency') && !name.includes('Google') && !name.includes('广告透明度')) {
        log.debug('Extracted brand name from h1:', name);
        return name;
      }
    }
    
    // 方法 4: 尝试从页面标题提取
    const title = document.title;
    if (title) {
      // 处理各种标题格式
      let name = null;
      if (title.includes(' - ')) {
        name = title.split(' - ')[0].trim();
      } else if (title.includes(' | ')) {
        name = title.split(' | ')[0].trim();
      }
      if (name && !name.includes('Ads Transparency') && !name.includes('Google') && !name.includes('广告透明度') && name.length > 1) {
        log.debug('Extracted brand name from title:', name);
        return name;
      }
    }
    
    // 方法 5: 从页面文本中查找品牌名称（聚合页面特征）
    // 查找 "这些结果涵盖具有相似名称" 之前的文本
    const pageText = document.body.innerText;
    const brandMatch = pageText.match(/([\u4e00-\u9fa5a-zA-Z0-9_\s]+?)\n这些结果涵盖/);
    if (brandMatch) {
      const name = brandMatch[1].trim();
      if (name && name.length > 1 && name.length < 50) {
        log.debug('Extracted brand name from page text (Chinese):', name);
        return name;
      }
    }
    
    // 方法 5b: 英文版本 "These results cover multiple"
    const brandMatchEn = pageText.match(/([\w\s]+?)\nThese results cover/i);
    if (brandMatchEn) {
      const name = brandMatchEn[1].trim();
      if (name && name.length > 1 && name.length < 50) {
        log.debug('Extracted brand name from page text (English):', name);
        return name;
      }
    }
    
    // 方法 6: 尝试从第一个 advertiser 链接的文本提取
    const firstAdvertiserLink = document.querySelector('a[href*="/advertiser/"]');
    if (firstAdvertiserLink) {
      const linkText = firstAdvertiserLink.textContent.trim();
      if (linkText && linkText.length > 1 && !linkText.includes('AR0')) {
        log.debug('Extracted brand name from first advertiser link text:', linkText);
        return linkText;
      }
      
      let parent = firstAdvertiserLink.closest('div[role="heading"]');
      if (!parent) {
        parent = firstAdvertiserLink.closest('[class*="heading"]');
      }
      if (parent) {
        const name = parent.textContent.trim();
        if (name && !name.includes('Ads Transparency') && name.length > 1) {
          log.debug('Extracted brand name from advertiser parent:', name);
          return name;
        }
      }
    }
    
    // 调试信息：记录页面状态
    log.debug('Brand name extraction failed. Page state:', {
      multiAdvertiserTitle: !!document.querySelector('.multi-advertiser-title'),
      advertiserName: !!document.querySelector('.advertiser-name'),
      h1: document.querySelector('h1')?.textContent?.substring(0, 50),
      title: document.title?.substring(0, 50),
      advertiserLinks: document.querySelectorAll('a[href*="/advertiser/"]').length,
      readyState: document.readyState
    });
    
    log.warn('Could not extract brand name, using default');
    return 'Unknown Brand';
  } catch (e) {
    log.error('Error extracting brand name:', e);
    return 'Unknown Brand';
  }
}

// ============== READY ==============
log.success('Content script ready');
