/**
 * Ads Insight Pro - OCR Injector (Content Script)
 * 
 * 鏂规 A 瀹炵幇锛氬湪鐩爣椤甸潰涓婁笅鏂囦腑娉ㄥ叆 Tesseract.js 鎵ц OCR
 * 
 * 鍘熺悊锛?
 * - Chrome Extension MV3 鐨勬墿灞曢〉闈㈢姝?unsafe-eval
 * - 浣?adstransparency.google.com 鐨?CSP 鍏佽 unsafe-eval
 * - 閫氳繃 Content Script 娉ㄥ叆浠ｇ爜鍒伴〉闈笂涓嬫枃锛屽埄鐢ㄩ〉闈㈢殑 CSP
 */

const OCR_LOG_PREFIX = '[AdsInsightPro][OCR-Injector]';
const ocrLog = {
  info: (...args) => console.log(`%c${OCR_LOG_PREFIX}[INFO]`, 'color: #4285f4', ...args),
  success: (...args) => console.log(`%c${OCR_LOG_PREFIX}[SUCCESS]`, 'color: #34a853', ...args),
  warn: (...args) => console.warn(`%c${OCR_LOG_PREFIX}[WARN]`, 'color: #fbbc04', ...args),
  error: (...args) => console.error(`%c${OCR_LOG_PREFIX}[ERROR]`, 'color: #ea4335', ...args)
};

// 鐘舵€?
let tesseractInjected = false;
let tesseractReady = false;
let pendingRequests = new Map();

// 馃敡 鍏抽敭锛氬湪鑴氭湰鍔犺浇鏃跺氨璁剧疆鍝嶅簲鐩戝惉鍣?
window.addEventListener('AdsInsightOCR_Response', function(e) {
  const { requestId, success, text, confidence, error } = e.detail;
  ocrLog.info('Received OCR response for:', requestId, success ? 'success' : 'failed');
  
  const pending = pendingRequests.get(requestId);
  if (pending) {
    pendingRequests.delete(requestId);
    if (success) {
      pending.resolve({ text, confidence });
    } else {
      pending.reject(new Error(error || 'Unknown OCR error'));
    }
  }
});

window.addEventListener('AdsInsightOCR_Ready', function() {
  ocrLog.success('Tesseract.js loaded in page context');
  tesseractReady = true;
});

window.addEventListener('AdsInsightOCR_LoadError', function() {
  ocrLog.error('Failed to load Tesseract.js in page context');
  tesseractReady = false;
});

/**
 * 注入 Tesseract.js 到页面上下文
 * 使用外部脚本文件以符合 CSP 要求
 */
function injectTesseractScript() {
  if (tesseractInjected) return;
  tesseractInjected = true;
  
  ocrLog.info('Injecting Tesseract.js into page context...');
  
  // 使用外部脚本文件而非内联脚本，以符合 CSP
  const script = document.createElement('script');
  script.id = 'ads-insight-tesseract-loader';
  script.src = chrome.runtime.getURL('content/ocr-page-script.js');
  script.onload = () => {
    ocrLog.info('OCR page script loaded');
  };
  script.onerror = (e) => {
    ocrLog.error('Failed to load OCR page script:', e);
    tesseractReady = false;
  };
  
  document.head.appendChild(script);
}

/**
 * 绛夊緟 Tesseract 鍑嗗灏辩华
 */
async function waitForTesseract(timeoutMs = 30000) {
  if (tesseractReady) return true;
  
  // 娉ㄥ叆鑴氭湰锛堝鏋滆繕娌℃湁锛?
  injectTesseractScript();
  
  return new Promise((resolve) => {
    const startTime = Date.now();
    const check = setInterval(() => {
      if (tesseractReady) {
        clearInterval(check);
        resolve(true);
      } else if (Date.now() - startTime > timeoutMs) {
        clearInterval(check);
        ocrLog.warn('Tesseract.js load timeout');
        resolve(false);
      }
    }, 100);
  });
}

/**
 * 鍦ㄩ〉闈笂涓嬫枃涓墽琛?OCR
 */
async function performOCRInPage(imageUrl) {
  const ready = await waitForTesseract();
  if (!ready) {
    throw new Error('Tesseract.js not available');
  }
  
  const requestId = 'ocr_' + Date.now() + '_' + Math.random().toString(36).substr(2, 9);
  ocrLog.info('Sending OCR request:', requestId);
  
  return new Promise((resolve, reject) => {
    const timeout = setTimeout(() => {
      pendingRequests.delete(requestId);
      reject(new Error('OCR timeout (60s)'));
    }, 60000);
    
    pendingRequests.set(requestId, {
      resolve: (result) => {
        clearTimeout(timeout);
        resolve(result);
      },
      reject: (err) => {
        clearTimeout(timeout);
        reject(err);
      }
    });
    
    // 鍙戦€?OCR 璇锋眰鍒伴〉闈笂涓嬫枃
    window.dispatchEvent(new CustomEvent('AdsInsightOCR_Request', {
      detail: { requestId, imageUrl }
    }));
  });
}

/**
 * 瑙ｆ瀽 OCR 鏂囨湰
 */
function parseOCRText(text) {
  const result = { domain: '', title: '', description: '' };
  if (!text) return result;
  
  const DOMAIN_REGEX = /(?:https?:\/\/)?(?:www\.)?([a-zA-Z0-9][-a-zA-Z0-9]*(?:\.[a-zA-Z0-9][-a-zA-Z0-9]*)+)/gi;
  const AD_LABELS = [
    'sponsored', 'ad', 'advertisement', 'promoted', 'learn more', 
    'shop now', 'buy now', 'sign up', 'get started', 'click here'
  ];
  
  const lines = text.split(/[\n\r]+/).map(l => l.trim()).filter(l => l.length > 2);
  
  // 提取域名 (Extract domain)
  for (const line of lines) {
    const domainMatch = line.match(DOMAIN_REGEX);
    if (domainMatch) {
      const domain = domainMatch[0]
        .replace(/^https?:\/\//i, '')
        .replace(/^www\./i, '')
        .split('/')[0]
        .toLowerCase();
      
      if (domain && !domain.includes('google') && !domain.includes('gstatic') && domain.includes('.')) {
        result.domain = domain;
        break;
      }
    }
  }
  
  // 提取标题和描述（跳过广告标签）
  const textLines = lines.filter(l => {
    const lower = l.toLowerCase();
    if (DOMAIN_REGEX.test(l)) return false;
    if (/^[0-9\s\-\.\,\!\?\@\#\$\%\^\&\*\(\)]+$/.test(l)) return false;
    if (AD_LABELS.some(label => lower === label)) return false;
    return true;
  });
  
  if (textLines.length > 0) {
    result.title = textLines[0].substring(0, 200);
  }
  if (textLines.length > 1) {
    result.description = textLines.slice(1).join(' ').substring(0, 500);
  }
  
  return result;
}

/**
 * 澶勭悊鎵归噺 OCR 璇锋眰
 */
async function processBatchOCR(ads, sendResponse) {
  ocrLog.info('Starting batch OCR for', ads.length, 'images');
  
  const results = [];
  
  for (let i = 0; i < ads.length; i++) {
    const ad = ads[i];
    ocrLog.info(`Processing ${i + 1}/${ads.length}: ${ad.creative_id}`);
    
    try {
      const result = await performOCRInPage(ad.image_url);
      const parsed = parseOCRText(result.text);
      
      results.push({
        creative_id: ad.creative_id,
        ad_domain: parsed.domain,
        ad_title: parsed.title,
        ad_description: parsed.description,
        confidence: result.confidence / 100,
        raw_text: result.text
      });
      
      ocrLog.success(`OCR ${i + 1}/${ads.length} completed, confidence: ${result.confidence}`);
      
      // 鍙戦€佽繘搴?
      chrome.runtime.sendMessage({
        type: 'OCR_PROGRESS_FROM_CONTENT',
        processed: i + 1,
        total: ads.length
      }).catch(() => {});
      
    } catch (err) {
      ocrLog.error(`OCR ${i + 1}/${ads.length} failed:`, err.message);
      results.push({
        creative_id: ad.creative_id,
        ad_domain: '',
        ad_title: '',
        ad_description: '',
        confidence: 0,
        raw_text: ''
      });
    }
  }
  
  ocrLog.success('Batch OCR complete:', results.length, 'results');
  sendResponse({ success: true, results });
}

// 鐩戝惉鏉ヨ嚜 Service Worker 鐨勬秷鎭?
chrome.runtime.onMessage.addListener((message, _sender, sendResponse) => {
  if (message.type === 'OCR_PERFORM') {
    ocrLog.info('Received single OCR request');
    
    performOCRInPage(message.imageUrl)
      .then(result => {
        const parsed = parseOCRText(result.text);
        sendResponse({
          success: true,
          result: {
            ad_domain: parsed.domain,
            ad_title: parsed.title,
            ad_description: parsed.description,
            confidence: result.confidence / 100,
            raw_text: result.text
          }
        });
      })
      .catch(err => {
        ocrLog.error('Single OCR failed:', err.message);
        sendResponse({ success: false, error: err.message });
      });
    
    return true; // 淇濇寔娑堟伅閫氶亾寮€鏀?
  }
  
  if (message.type === 'OCR_BATCH') {
    ocrLog.info('Received batch OCR request:', message.ads.length, 'images');
    
    // 馃敡 鍏抽敭淇锛氫娇鐢ㄥ崟鐙殑鍑芥暟澶勭悊锛岀‘淇?sendResponse 鍦ㄦ纭殑鏃舵満璋冪敤
    processBatchOCR(message.ads, sendResponse);
    
    return true; // 淇濇寔娑堟伅閫氶亾寮€鏀?
  }
  
  if (message.type === 'OCR_CHECK_READY') {
    sendResponse({ ready: tesseractReady, injected: tesseractInjected });
    return true;
  }
  
  // 涓嶅鐞嗗叾浠栨秷鎭被鍨?
  return false;
});

// 椤甸潰鍔犺浇鏃堕娉ㄥ叆 Tesseract锛堝彲閫夛紝鍔犲揩棣栨 OCR锛?
if (document.readyState === 'complete') {
  setTimeout(injectTesseractScript, 1000);
} else {
  window.addEventListener('load', () => {
    setTimeout(injectTesseractScript, 1000);
  });
}

ocrLog.info('OCR Injector loaded');
