import dotenv from "dotenv";
import sharp from "sharp";
import pLimit from "p-limit";
import fs from "fs";
import path from "path";
import { load as cheerioLoad } from "cheerio";
import { createClient } from "@supabase/supabase-js";

dotenv.config();

const SUPABASE_URL = process.env.SUPABASE_URL || "";
const SUPABASE_SERVICE_ROLE_KEY = process.env.SUPABASE_SERVICE_ROLE_KEY || "";
const SUPABASE_BUCKET = process.env.SUPABASE_BUCKET || "company-assets";

const MARKET = process.env.BACKFILL_MARKET || "BE-WAL";
const BATCH_SIZE = Math.max(50, Math.min(1000, Number(process.env.BACKFILL_BATCH_SIZE || 300)));
const MAX_ROWS = Math.max(0, Number(process.env.BACKFILL_MAX_ROWS || 0));
const DRY_RUN = String(process.env.BACKFILL_DRY_RUN || "0") === "1";
const CONCURRENCY = Math.max(1, Math.min(8, Number(process.env.BACKFILL_CONCURRENCY || 3)));
const BACKFILL_SEED_PATH =
  process.env.BACKFILL_SEED_PATH || path.join(process.cwd(), "out", "seed_companies.sql.bak_2026-02-16T11-01-49-868Z");

const USER_AGENT = "Mozilla/5.0 (compatible; RevelysLogoBackfill/1.0; +contact@revelys.be)";
const MAX_LOGO_BYTES = 3_000_000;

if (!SUPABASE_URL || !SUPABASE_SERVICE_ROLE_KEY) {
  console.error("Missing SUPABASE_URL or SUPABASE_SERVICE_ROLE_KEY in .env");
  process.exit(1);
}

const SUPABASE_HOST = (() => {
  try {
    return new URL(SUPABASE_URL).hostname.toLowerCase();
  } catch {
    return null;
  }
})();

const supabase = createClient(SUPABASE_URL, SUPABASE_SERVICE_ROLE_KEY, { auth: { persistSession: false } });

function safeUrl(raw) {
  if (!raw) return null;
  try {
    return new URL(String(raw)).toString();
  } catch {
    return null;
  }
}

function isSupabaseStorageUrl(raw) {
  if (!raw || !SUPABASE_HOST) return false;
  try {
    const u = new URL(String(raw));
    return u.hostname.toLowerCase() === SUPABASE_HOST && /^\/storage\/v1\/object\/public\//i.test(u.pathname);
  } catch {
    return false;
  }
}

function isCoverLikeLogoUrl(raw) {
  if (!raw) return false;
  try {
    const u = new URL(String(raw));
    return /\/cover\.webp$/i.test(u.pathname);
  } catch {
    return false;
  }
}

function uniq(list) {
  return Array.from(new Set((Array.isArray(list) ? list : []).filter(Boolean)));
}

function absolutize(url, base) {
  try {
    return new URL(url, base).toString();
  } catch {
    return null;
  }
}

function pickFromSrcset(srcset) {
  const raw = String(srcset || "").trim();
  if (!raw) return null;
  const entries = raw
    .split(",")
    .map((x) => String(x || "").trim())
    .map((chunk) => {
      const m = chunk.match(/^(\S+)(?:\s+(\d+)w)?/);
      if (!m) return null;
      return { url: m[1], w: m[2] ? Number(m[2]) : 0 };
    })
    .filter(Boolean)
    .sort((a, b) => b.w - a.w);
  return entries[0]?.url || null;
}

function normalizeWebsiteUrl(raw) {
  if (!raw) return null;
  let s = String(raw).trim();
  if (!s) return null;
  if (s.startsWith("//")) s = `https:${s}`;
  if (!/^[a-z][a-z0-9+.-]*:\/\//i.test(s)) s = `https://${s.replace(/^\/+/, "")}`;
  return safeUrl(s);
}

function parseSqlUrlToken(token) {
  const s = String(token || "");
  if (!s || s === "NULL") return null;
  const m = s.match(/^'(.*)'$/s);
  if (!m) return null;
  return safeUrl(m[1].replace(/''/g, "'"));
}

function loadSeedLogoMap(filePath) {
  const map = new Map();
  if (!filePath || !fs.existsSync(filePath)) return map;

  const src = fs.readFileSync(filePath, "utf8");
  const re = /SELECT\s+'([^']+)'.*?,\s*(\d+)\s*,\s*(NULL|'https?:\/\/[^']+')\s*,\s*(NULL|'https?:\/\/[^']+')\s*,\s*ARRAY\[/gs;
  let m;
  while ((m = re.exec(src))) {
    const slug = String(m[1] || "");
    const logoUrl = parseSqlUrlToken(m[3]);
    if (slug && logoUrl) map.set(slug, logoUrl);
  }
  return map;
}

function extractSeoImageCandidates(seoJsonld) {
  const out = [];
  const add = (u) => {
    const safe = safeUrl(u);
    if (safe) out.push(safe);
  };

  const walk = (value, imageCtx = false) => {
    if (value === null || value === undefined) return;
    if (typeof value === "string") {
      if (imageCtx) add(value);
      return;
    }
    if (Array.isArray(value)) {
      for (const x of value) walk(x, imageCtx);
      return;
    }
    if (typeof value !== "object") return;

    for (const [k, v] of Object.entries(value)) {
      const key = String(k || "").toLowerCase();
      const nextImageCtx = imageCtx || key === "image" || key === "logo" || key === "thumbnailurl" || key === "contenturl";
      // URLs under image/logo objects are valid candidates.
      if ((key === "url" || key === "contenturl") && nextImageCtx && typeof v === "string") add(v);
      walk(v, nextImageCtx);
    }
  };

  walk(seoJsonld, false);
  return uniq(out);
}

async function fetchHtml(url) {
  try {
    const res = await fetch(url, {
      redirect: "follow",
      headers: {
        "User-Agent": USER_AGENT,
        Accept: "text/html,application/xhtml+xml,*/*;q=0.8",
      },
    });
    if (!res.ok) return null;
    const ct = String(res.headers.get("content-type") || "").toLowerCase();
    if (!ct.includes("text/html") && !ct.includes("application/xhtml+xml")) return null;
    return await res.text();
  } catch {
    return null;
  }
}

async function extractLogoCandidatesFromWebsite(websiteUrl) {
  const html = await fetchHtml(websiteUrl);
  if (!html) return [];
  const $ = cheerioLoad(html);
  const candidates = new Map();

  const add = (raw, bonus = 0) => {
    if (!raw || /^data:/i.test(String(raw))) return;
    const abs = absolutize(raw, websiteUrl);
    const safe = safeUrl(abs);
    if (!safe) return;
    const prev = candidates.get(safe) || 0;
    candidates.set(safe, prev + bonus);
  };

  const ogLogo = $('meta[property="og:logo"]').attr("content");
  const itempropLogo = $('meta[itemprop="logo"]').attr("content");
  const ogImage = $('meta[property="og:image"]').attr("content");
  if (ogLogo) add(ogLogo, 30);
  if (itempropLogo) add(itempropLogo, 25);
  if (ogImage) add(ogImage, 8);

  $("link[rel]").each((_, el) => {
    const rel = String($(el).attr("rel") || "").toLowerCase();
    if (!/(icon|apple-touch-icon|mask-icon)/.test(rel)) return;
    const href = $(el).attr("href");
    if (href) add(href, 22);
  });

  $('script[type="application/ld+json"]').each((_, el) => {
    const raw = $(el).contents().text();
    if (!raw) return;
    try {
      const parsed = JSON.parse(raw);
      const stack = [parsed];
      while (stack.length) {
        const v = stack.pop();
        if (!v) continue;
        if (typeof v === "string") continue;
        if (Array.isArray(v)) {
          for (const x of v) stack.push(x);
          continue;
        }
        if (typeof v !== "object") continue;
        if (typeof v.logo === "string") add(v.logo, 26);
        if (typeof v.image === "string") add(v.image, 18);
        if (v.logo && typeof v.logo === "object") {
          if (typeof v.logo.url === "string") add(v.logo.url, 24);
          if (typeof v.logo.contentUrl === "string") add(v.logo.contentUrl, 24);
        }
        if (v.image && typeof v.image === "object") {
          if (typeof v.image.url === "string") add(v.image.url, 16);
          if (typeof v.image.contentUrl === "string") add(v.image.contentUrl, 16);
        }
        for (const x of Object.values(v)) stack.push(x);
      }
    } catch {
      // ignore malformed JSON-LD
    }
  });

  for (const m of html.matchAll(/(?:https?:\/\/|\/)[^"'\\s<>]+\.(?:svg|png|webp|jpg|jpeg)(?:\?[^"'\\s<>]*)?/gi)) {
    const raw = String(m[0] || "");
    if (/logo|brand|favicon|site-logo|header/i.test(raw)) add(raw, 20);
  }

  $("img").each((_, el) => {
    const src =
      $(el).attr("src") ||
      $(el).attr("data-src") ||
      $(el).attr("data-lazy-src") ||
      pickFromSrcset($(el).attr("srcset"));
    if (!src) return;

    const hint = [
      src,
      $(el).attr("alt") || "",
      $(el).attr("title") || "",
      $(el).attr("class") || "",
      $(el).attr("id") || "",
    ]
      .join(" ")
      .toLowerCase();

    let bonus = 0;
    if (/(logo|site-logo|brand|header-logo|navbar-brand|custom-logo)/.test(hint)) bonus += 26;
    if (/favicon|sprite|icon/.test(hint)) bonus -= 22;
    if ($(el).closest("header, nav, .site-header, .main-header, .navbar, .topbar").length) bonus += 16;

    if (bonus >= 8 || /logo|brand/.test(hint)) add(src, bonus);
  });

  return Array.from(candidates.entries())
    .sort((a, b) => b[1] - a[1])
    .map(([url]) => url)
    .slice(0, 20);
}

async function downloadAndConvertWebp(url, maxBytes = MAX_LOGO_BYTES) {
  try {
    const res = await fetch(url, {
      redirect: "follow",
      headers: { "User-Agent": USER_AGENT, Accept: "image/*,*/*;q=0.8" },
    });
    if (!res.ok) return null;

    const ct = String(res.headers.get("content-type") || "").toLowerCase();
    if (ct && !ct.startsWith("image/")) return null;

    const len = Number(res.headers.get("content-length") || 0);
    if (len && len > maxBytes) return null;

    const buf = Buffer.from(await res.arrayBuffer());
    if (!buf.length || buf.length > maxBytes) return null;

    try {
      return await sharp(buf).webp({ quality: 80 }).toBuffer();
    } catch {
      return null;
    }
  } catch {
    return null;
  }
}

async function isLikelyWhiteLogoBuffer(buffer) {
  try {
    const { data, info } = await sharp(buffer).ensureAlpha().raw().toBuffer({ resolveWithObject: true });
    const channels = Number(info?.channels || 4);
    if (!data?.length || channels < 3) return false;

    let visible = 0;
    let nearWhite = 0;
    let dark = 0;

    for (let i = 0; i + 2 < data.length; i += channels) {
      const r = data[i];
      const g = data[i + 1];
      const b = data[i + 2];
      const a = channels >= 4 ? data[i + 3] : 255;
      if (a < 16) continue;
      visible += 1;

      const max = Math.max(r, g, b);
      const min = Math.min(r, g, b);
      const lum = 0.2126 * r + 0.7152 * g + 0.0722 * b;
      if (lum >= 242 && max - min <= 20) nearWhite += 1;
      if (lum <= 120) dark += 1;
    }

    if (visible < 150) return true;
    const whiteRatio = nearWhite / visible;
    const darkRatio = dark / visible;
    return whiteRatio >= 0.92 && darkRatio <= 0.02;
  } catch {
    return false;
  }
}

function patchSeoJsonldLogo(seoJsonld, logoUrl) {
  if (!seoJsonld || typeof seoJsonld !== "object" || Array.isArray(seoJsonld)) return seoJsonld;
  const out = JSON.parse(JSON.stringify(seoJsonld));
  out.image = logoUrl;
  out.logo = logoUrl;
  return out;
}

async function uploadLogo(slug, bufferWebp) {
  const publicPath = `companies/${slug}/logo.webp`;
  const { error } = await supabase.storage.from(SUPABASE_BUCKET).upload(publicPath, bufferWebp, {
    contentType: "image/webp",
    upsert: true,
  });
  if (error) return null;
  const { data } = supabase.storage.from(SUPABASE_BUCKET).getPublicUrl(publicPath);
  return data?.publicUrl || null;
}

async function fetchRows() {
  const all = [];
  let from = 0;

  while (true) {
    let q = supabase
      .from("companies")
      .select("slug, market, logo_url, cover_image_url, seo_jsonld, website")
      .order("slug", { ascending: true })
      .range(from, from + BATCH_SIZE - 1);

    if (MARKET) q = q.eq("market", MARKET);

    const { data, error } = await q;
    if (error) throw error;
    if (!data || !data.length) break;

    all.push(...data);
    from += data.length;
    if (MAX_ROWS > 0 && all.length >= MAX_ROWS) break;
    if (data.length < BATCH_SIZE) break;
  }

  if (MAX_ROWS > 0) return all.slice(0, MAX_ROWS);
  return all;
}

async function processRow(row) {
  const slug = row.slug;
  const currentLogo = safeUrl(row.logo_url);
  const website = normalizeWebsiteUrl(row.website);

  if (isSupabaseStorageUrl(currentLogo) && !isCoverLikeLogoUrl(currentLogo)) {
    return { slug, status: "skip_has_logo" };
  }

  const seedLogo = safeUrl(SEED_LOGO_MAP.get(String(slug)));

  const candidates = uniq([
    seedLogo,
    currentLogo && !isSupabaseStorageUrl(currentLogo) ? currentLogo : null,
    ...extractSeoImageCandidates(row.seo_jsonld),
  ]);

  let finalLogo = null;
  if (seedLogo && isSupabaseStorageUrl(seedLogo) && !isCoverLikeLogoUrl(seedLogo)) {
    finalLogo = seedLogo;
  }

  if (!finalLogo) {
    for (const c of candidates) {
      if (!c) continue;
      if (isSupabaseStorageUrl(c)) {
        if (!isCoverLikeLogoUrl(c)) {
          finalLogo = c;
          break;
        }
        continue;
      }
      const buf = await downloadAndConvertWebp(c);
      if (!buf) continue;
      if (await isLikelyWhiteLogoBuffer(buf)) continue;
      const uploaded = await uploadLogo(slug, buf);
      if (uploaded) {
        finalLogo = uploaded;
        break;
      }
    }
  }

  if (!finalLogo && website) {
    const websiteCandidates = await extractLogoCandidatesFromWebsite(website);
    for (const c of websiteCandidates) {
      const buf = await downloadAndConvertWebp(c);
      if (!buf) continue;
      if (await isLikelyWhiteLogoBuffer(buf)) continue;
      const uploaded = await uploadLogo(slug, buf);
      if (uploaded) {
        finalLogo = uploaded;
        break;
      }
    }
  }

  if (!finalLogo) {
    return { slug, status: "fail_no_logo_source", candidates: candidates.length };
  }

  const patch = {
    logo_url: finalLogo,
    seo_jsonld: patchSeoJsonldLogo(row.seo_jsonld, finalLogo),
  };

  if (DRY_RUN) {
    return { slug, status: "dry_run_update", logo_url: finalLogo };
  }

  const { error } = await supabase.from("companies").update(patch).eq("slug", slug);
  if (error) {
    return { slug, status: "db_update_error", error: String(error.message || error) };
  }
  return { slug, status: "updated", logo_url: finalLogo };
}

async function main() {
  console.log(`[LogoBackfill] seed source: ${BACKFILL_SEED_PATH} (${SEED_LOGO_MAP.size} logo candidates)`);
  console.log(
    `[LogoBackfill] market=${MARKET || "ALL"} dry_run=${DRY_RUN ? "yes" : "no"} batch=${BATCH_SIZE} concurrency=${CONCURRENCY}`
  );

  const rows = await fetchRows();
  console.log(`[LogoBackfill] rows fetched: ${rows.length}`);

  const limit = pLimit(CONCURRENCY);
  const tasks = rows.map((row) => limit(() => processRow(row)));
  const results = await Promise.all(tasks);

  const byStatus = new Map();
  for (const r of results) byStatus.set(r.status, (byStatus.get(r.status) || 0) + 1);

  console.log("[LogoBackfill] summary:");
  for (const [k, v] of Array.from(byStatus.entries()).sort((a, b) => b[1] - a[1])) {
    console.log(`  - ${k}: ${v}`);
  }

  const failed = results.filter((r) => String(r.status || "").startsWith("fail") || String(r.status || "").includes("error"));
  if (failed.length) {
    console.log("[LogoBackfill] failed sample:");
    for (const r of failed.slice(0, 20)) {
      console.log(`  - ${r.slug}: ${r.status}${r.error ? ` (${r.error})` : ""}`);
    }
  }
}

const SEED_LOGO_MAP = loadSeedLogoMap(BACKFILL_SEED_PATH);

main().catch((err) => {
  console.error("Fatal:", err);
  process.exit(1);
});
