from __future__ import annotations

import csv
import json
import re
from pathlib import Path


ROOT = Path(__file__).resolve().parents[2]
OUT_DIR = ROOT / "tmp" / "excel-corrected-audit"


FEATURE_TASK_MAP = {
    "F002": ["User password change", "User change email?", "Sign-up issues"],
    "F003": ["View draft articles"],
    "F005": ["Static pages column(FE)", "Donate"],
    "F006": [
        "Comments don't use <p>",
        "Comments, margin",
        "Opinion comments, styling",
    ],
    "F007": ["Rules for mailing"],
    "F008": ["Move Google Analytics"],
    "F009": ["Bulk upload images"],
    "F010": ["Related articles"],
    "F011": ["Location obligatory", "Maps"],
    "F013": [
        "Integrations page",
        "Hidden for newsletter subscribers? Sign-up block",
        "Newsletter modal",
    ],
    "F015": ["Mobile pay", "Rules for Stripe"],
    "F017": ["Subscriber management?", "Personal account page", "Organization Invitation"],
    "F018": ["Mobile pay"],
    "F022": [
        "Main image functionality",
        "Terminology",
        "View draft articles",
        "Set inline link target",
    ],
    "F023": ["Subscriber management?", "Move Google Analytics"],
    "F024": ["Rules for mailing", "User subscription to the project", "Sign-up issues"],
    "F027": ["Opinion Author Photo", "Main image functionality"],
    "F028": ["Maps", "Is /kort working?"],
    "F030": ["Opinion section", "Name of opinoin writer"],
    "F031": ["Visions, tasks", "Visioner cover", "Captions on Visioner"],
    "F033": ["Wrong content shown in section", "Anmeldelser (reviews)"],
    "F034": ["Content type (Photo)"],
    "F035": ["Donate", "Hidden for subscribers?", 'Hide "become member" for journalists etc.'],
    "F036": ["Organization Invitation"],
    "F039": ["Personal account page", "/profile", "User change email?"],
    "F040": ['The "Author"', "Journalist page"],
    "F041": ["Set paywall access?"],
    "F044": ["Polls (not working)"],
    "F045": ["CityChange inline widget"],
    "F046": ["Before / After slider"],
    "F047": ["Tags issues"],
    "F048": ["Basic deselect"],
    "F049": ['"Project" content type', "Terminology"],
    "F050": ['" before primary topic', "Topics are suddenly not all-caps"],
    "F052": [
        "URL structure (FE)",
        "Danish characters in slug",
        "SiteMap.xml",
        "robots.txt",
    ],
    "F056": ["Captions connected to images", "Inline captions are now messed up"],
    "F058": ["Search not working"],
    "F061": ["CityChange page?"],
    "F062": ["Set paywall access?"],
    "F063": ["/profile"],
    "F065": ["User subscription to the project", "Rules for mailing"],
    "F066": ["Set paywall access?"],
}


FINDING_TASK_MAP = {
    "A002": ["Set paywall access?"],
    "A003": ["Subscriber management?", "Personal account page", "Organization Invitation"],
    "A004": ["Bulk upload images", "Captions connected to images"],
    "A010": ["Integrations page"],
    "A017": ["Integrations page", "Hidden for newsletter subscribers? Sign-up block", "Newsletter modal"],
    "A018": ["Move Google Analytics"],
    "A020": ["Rules for Stripe"],
    "A024": ["Integrations page", "Google Ads? (FE)", "Added googke ads api_id to config BE"],
    "M002": ["Integrations page", "Hidden for newsletter subscribers? Sign-up block", "Newsletter modal"],
    "M004": ["User subscription to the project", "Rules for mailing", '"Project" content type'],
    "M006": ["Donate"],
    "M008": ["Set paywall access?"],
    "M010": ["Move Google Analytics"],
}


ACCEPTANCE_OVERRIDES = {
    "F001": "Sensitive APIs are role-protected, destructive actions require authorization, and bypass/admin data is not publicly exposed.",
    "F002": "Users can register, sign in, recover/change credentials, and manage their own account data without support-side workarounds.",
    "F003": "Editors can create, edit, schedule, preview, and publish articles end to end from the dashboard.",
    "F005": "Admins can edit static pages in the CMS and the public pages render with the expected content and styling.",
    "F006": "Users can read, write, and reply to comments with correct formatting and stable frontend behavior.",
    "F007": "Commenters receive the expected notification and unsubscribe flows when comment activity affects them.",
    "F008": "Article statistics are collected correctly and exposed in a usable reporting surface with trustworthy counts.",
    "F009": "Editors can upload, reuse, organize, and manage media assets without broken CRUD or deployment gaps.",
    "F010": "Each article shows genuinely related content using the intended relevance logic rather than placeholder matching.",
    "F011": "Posts can store and use map/location data correctly, and the public map data reflects the real geodata behind the content.",
    "F013": "Newsletter sign-up surfaces submit real subscriptions, hide correctly for already-subscribed users, and expose the needed management controls.",
    "F015": "Stripe and MobilePay payments complete reliably, access changes happen immediately, and provider sync does not leave subscribers in the wrong state.",
    "F016": "Subscribers can upgrade, cancel, restore, and otherwise manage recurring billing without stubbed or broken self-service paths.",
    "F017": "Business subscribers can manage organizations, seats, invitations, and employee access without insecure or missing admin flows.",
    "F018": "Stripe and MobilePay webhook handling is robust enough that payment state stays synchronized even when provider callbacks are delayed.",
    "F022": "Editors can manage articles from the dashboard, use the writing editor, and configure supported widgets without broken UI gaps.",
    "F023": "Admins can review user, subscriber, and business/content metrics in a usable reporting view rather than raw or incomplete plumbing.",
    "F024": "Automated emails fire for the required account, payment, business-seat, comment, and project-alert events using approved templates.",
    "F027": "Front-page layout variants and featured-image controls can be switched intentionally and the change is reflected correctly on the site.",
    "F028": "The public map page and inline map widget both render correct data and link back to the mapped content as expected.",
    "F030": "Opinion pages render correctly, including their specific author/presentation rules, on list and detail views.",
    "F031": "Visions pages render the right content, labels, widgets, and styling on both list and detail views.",
    "F033": "Reviews pages render the correct review content and metadata instead of mixing in the wrong section data.",
    "F034": "Photo stories support the expected multi-image content flow and remain readable/styled correctly on the live site.",
    "F035": "Personal subscription entry points show the right offers and CTAs, hide when inappropriate, and do not expose obsolete purchase UI.",
    "F036": "Business subscription purchase and invite flows work for master and employee subscribers from invite through access grant.",
    "F039": "The profile/account area lets users manage their details, organization context, subscription details, and related profile surfaces safely.",
    "F040": "Author, district, and tag pages exist with correct routing/data so users can actually browse those taxonomy surfaces.",
    "F041": "KBH+ and paywall gating behave correctly for each user state without unsafe access shortcuts or broken control surfaces.",
    "F044": "Editors can configure polls and visitors can use all supported answer types without the widget breaking.",
    "F045": "Editors can place the CityChange single-suggestion widget with the expected ID-driven behavior and any promised override fields.",
    "F046": "The before/after slider renders and behaves as designed, including the required split-line presentation.",
    "F047": "Editors can replace or merge tags globally without manual article-by-article cleanup.",
    "F048": "Tag-selection UX supports repeated add/remove work cleanly, including obvious deselect behavior.",
    "F049": "The editorial content-type model and creation menu reflect the intended taxonomy and not just a flattened approximation.",
    "F050": "Primary-topic rendering behaves correctly on the frontend, including the special kommentar treatment and quote/label behavior.",
    "F052": "Public URLs match the legacy structure, old paths redirect correctly, and the SEO basics around discovery surfaces are in place.",
    "F056": "Editors can adjust inline-image caption data from the image UI and the frontend renders those captions cleanly.",
    "F058": "Search matches actual post content and uses AND semantics for multi-term queries.",
    "F061": "CityChange suggestions generate the intended deep links so users land in the app correctly from the site.",
    "F062": "Guest/share links bypass the paywall only in the controlled ways specified, with working generation and safe invalidation behavior.",
    "F063": "The profile page shows the user's comments with working deep links and clear reply markers.",
    "F065": "Users can subscribe to project updates, receive the right one-time publish alerts, and unsubscribe cleanly.",
    "F066": "Paywall-bypass admin tools let staff manage tokens/IPs without broken fields, missing edit routes, or unsafe data exposure.",
}


MOCK_RECOMMENDATION_MAP = {
    "hardcoded data": "Replace the hardcoded sample with real content/data loading and remove any misleading copy until that works.",
    "dead submit handler": "Implement the real submit flow or remove the form until it actually performs the promised action.",
    "stubbed backend": "Implement the backend path before exposing the CTA or label the flow as unavailable.",
    "misrepresented behavior": "Either implement the promised behavior or change the UI copy so it stops lying to the user.",
}


def clean_text(value: str) -> str:
    return re.sub(r"\s+", " ", (value or "").replace("\r", " ").replace("\n", " ")).strip()


def compact_text(value: str, max_len: int = 220) -> str:
    text = clean_text(value)
    if len(text) <= max_len:
        return text
    clipped = text[: max_len - 3].rsplit(" ", 1)[0].rstrip(" ,;:")
    return f"{clipped}..."


def split_acceptance_blob(value: str) -> tuple[str, str]:
    raw = (value or "").replace("\r", "")
    match = re.search(r"\bACCEPTANCE\b", raw, re.IGNORECASE)
    if not match:
        return "", clean_text(raw)

    before = raw[: match.start()].strip(" :-\n")
    after = raw[match.end() :].strip(" :-\n")
    return clean_text(before), clean_text(after)


def infer_task_flag(task: dict) -> str:
    blob = " ".join(
        [
            task.get("dev_comment", ""),
            task.get("client_comment", ""),
            task.get("acceptance_criteria", ""),
        ]
    ).lower()

    duplicate_markers = [
        "duplicate",
        "can be removed",
        "should be removed",
        "forget this task",
        "forget,",
        "this should not be a task",
        "so we can forget",
        "can be deleted",
        "this one can be deleted",
        "should be skipped",
        "skip this task",
        "no longer",
    ]
    discussion_markers = [
        "discussion is needed",
        "need to discuss",
        "pending discussion",
        "discussion before",
        "discussion",
        "unclear",
        "i don't understand",
        "not fixed",
        "not done",
        "cannot test",
        "can't test",
        "needs recheck",
        "recheck",
        "confusing",
        "difficult",
    ]
    progress_markers = [
        "almost completed",
        "almost done",
        "pending a final test",
        "backend is ready",
        "done, but not deployed",
        "works but need",
        "already completed",
        "already implemented",
        "it is ready",
        "needs to be checked",
        "done.",
        "implemented",
        "fixed",
    ]

    if any(marker in blob for marker in duplicate_markers):
        return "duplicate/remove"
    if any(marker in blob for marker in discussion_markers):
        return "discussion"
    if any(marker in blob for marker in progress_markers):
        return "in progress"
    return "open"


def task_sentiment(task: dict) -> str:
    flag = task["flag"]
    if flag == "duplicate/remove":
        return "won't-do"
    if flag == "discussion":
        return "blocked"
    return "working"


def load_feature_rows() -> list[dict]:
    path = ROOT / "output" / "pdf" / "magasinet-kbh-feature-matrix.csv"
    with path.open(newline="", encoding="utf-8-sig") as handle:
        return list(csv.DictReader(handle))


def parse_task_file(path: Path, bucket_label: str) -> list[dict]:
    tasks: list[dict] = []
    current_section = ""
    headers: list[str] | None = None

    with path.open(newline="", encoding="utf-8-sig") as handle:
        rows = list(csv.reader(handle))

    for row in rows:
        cells = [cell.strip() for cell in row]
        if not any(cells):
            continue

        if cells[0] == "Task name":
            headers = row
            continue

        if headers is None:
            current_section = cells[0]
            continue

        if "Total" in cells[:2]:
            continue

        item = dict(zip(headers, row + [""] * (len(headers) - len(row))))
        task_name = (item.get("Task name") or "").strip()
        if not task_name or task_name in {"In development", "Mobile styling", "Frontend issues", "Added 26.03 Anders"}:
            continue

        dev_comment = (
            item.get("Dev coments")
            or item.get("Comment dev")
            or item.get("Dev comments")
            or ""
        ).strip()
        anders_comment = (item.get("Comment from Anders") or "").strip()
        acceptance_blob = (item.get("Acceptance") or item.get("Acceptance ") or "").strip()
        client_comment, acceptance_criteria = split_acceptance_blob(acceptance_blob)
        if anders_comment:
            client_comment = clean_text(f"{anders_comment} {client_comment}")

        use_section = bucket_label == "Other backlog" and current_section not in {"", "Task name"}

        task = {
            "task_name": task_name,
            "source_file": path.name,
            "bucket": bucket_label,
            "section": current_section if use_section else "",
            "trello_link": (item.get("Trello link") or "").strip(),
            "fe": (item.get("FE") or "").strip(),
            "be": (item.get("BE") or "").strip(),
            "dev_comment": clean_text(dev_comment),
            "client_comment": clean_text(client_comment),
            "acceptance_criteria": clean_text(acceptance_criteria),
        }
        task["flag"] = infer_task_flag(task)
        source_label = bucket_label if not task["section"] else f"{bucket_label}: {task['section']}"
        if task["flag"] not in {"open", "in progress"}:
            source_label = f"{source_label} ({task['flag']})"
        task["status_label"] = source_label
        tasks.append(task)

    return tasks


def load_tasks() -> dict[str, dict]:
    task_rows = []
    task_rows.extend(
        parse_task_file(
            ROOT / "audit" / "input" / "KBH remaining tasks - In development (almost done).csv",
            "In development (almost done)",
        )
    )
    task_rows.extend(
        parse_task_file(
            ROOT / "audit" / "input" / "KBH remaining tasks - Needed for launch tasks from DnC.csv",
            "Needed for launch",
        )
    )
    task_rows.extend(
        parse_task_file(
            ROOT / "audit" / "input" / "KBH remaining tasks - Task From Anders.csv",
            "Task from Anders",
        )
    )
    task_rows.extend(
        parse_task_file(
            ROOT / "audit" / "input" / "KBH remaining tasks - Other.csv",
            "Other backlog",
        )
    )
    return {row["task_name"]: row for row in task_rows}


def default_acceptance(feature: dict) -> str:
    feature_id = feature["id"]
    if feature_id in ACCEPTANCE_OVERRIDES:
        return ACCEPTANCE_OVERRIDES[feature_id]

    capability = clean_text(feature["capability"])
    area = clean_text(feature["area"]).lower()

    if "widget" in area:
        return f"Accepted when editors can configure {capability.lower()} and the frontend rendering matches the intended design and behavior."
    if "dashboard" in area or "editorial" in area or "cms" in area:
        return f"Accepted when the product provides {capability.lower()} in the dashboard without broken workflows."
    if "payment" in area:
        return f"Accepted when {capability.lower()} works end to end and keeps billing/access state synchronized without manual repair."
    if "frontend" in area or "profile" in area or "paywall" in area or "archive" in area:
        return f"Accepted when the live site provides {capability.lower()} with correct data, routing, and styling."
    return f"Accepted when {capability.lower()} works with current-site parity and without blocking gaps."


def build_feature_notes(feature: dict) -> str:
    status = feature["verdict"].strip().lower()
    base_note = clean_text(feature["notes"])
    evidence_note = clean_text(feature["new_evidence"])

    if status == "full":
        note = base_note or compact_text(evidence_note, 280)
    else:
        pieces = []
        if base_note:
            pieces.append(base_note)
        if evidence_note and evidence_note not in base_note:
            pieces.append(compact_text(evidence_note, 280))
        note = " ".join(pieces).strip() or "Audit evidence did not support an acceptance-grade implementation."

    scope = feature["scope_modifier"].strip().lower()
    if scope == "optional":
        note = f"{note} Scope note: optional item."
    elif scope == "withdrawn":
        note = f"{note} Scope note: withdrawn from pass/fail scoring."
    elif scope == "out_of_scope":
        note = f"{note} Scope note: out of scope for launch scoring."

    return note.strip()


def task_priority(task: dict) -> tuple[int, str]:
    bucket_order = {
        "Needed for launch": 0,
        "In development (almost done)": 1,
        "Task from Anders": 2,
        "Other backlog": 3,
    }
    return (bucket_order.get(task["bucket"], 99), task["task_name"])


def build_task_note(task: dict) -> str:
    parts = []
    if task["dev_comment"]:
        parts.append(f"Dev: {compact_text(task['dev_comment'], 150)}")
    if task["client_comment"]:
        parts.append(f"Client: {compact_text(task['client_comment'], 150)}")
    elif task["acceptance_criteria"]:
        parts.append(f"Acceptance: {compact_text(task['acceptance_criteria'], 150)}")

    suffix = "; ".join(parts)
    return f"{task['task_name']}: {suffix}" if suffix else task["task_name"]


def build_feature_rows(features: list[dict], tasks_by_name: dict[str, dict]) -> list[dict]:
    rows = []

    for feature in features:
        mapped_tasks = [
            tasks_by_name[name]
            for name in FEATURE_TASK_MAP.get(feature["id"], [])
            if name in tasks_by_name
        ]
        mapped_tasks.sort(key=task_priority)

        if mapped_tasks:
            dnc_status = "; ".join(dict.fromkeys(task["status_label"] for task in mapped_tasks))
            notes_parts = [build_task_note(task) for task in mapped_tasks[:4]]
            if len(mapped_tasks) > 4:
                notes_parts.append(f"+{len(mapped_tasks) - 4} more related DnC task(s)")
            dnc_notes = " | ".join(notes_parts)
            sentiments = {task_sentiment(task) for task in mapped_tasks}
            if sentiments == {"won't-do"}:
                dnc_sentiment = "won't-do"
            elif "blocked" in sentiments:
                dnc_sentiment = "blocked"
            else:
                dnc_sentiment = "working"
            dnc_signal = "Mentioned in DnC"
        else:
            dnc_status = "Not mentioned by DnC"
            dnc_notes = ""
            dnc_sentiment = ""
            dnc_signal = "Not mentioned by DnC"

        rows.append(
            {
                "id": feature["id"],
                "area": clean_text(feature["area"]),
                "description": clean_text(feature["capability"]),
                "acceptance": default_acceptance(feature),
                "status": feature["verdict"].strip().lower(),
                "dnc_signal": dnc_signal,
                "dnc_sentiment": dnc_sentiment,
                "dnc_status": dnc_status,
                "dnc_notes": dnc_notes,
                "notes": build_feature_notes(feature),
            }
        )

    return rows


def finding_dnc_context(finding_id: str, tasks_by_name: dict[str, dict]) -> tuple[str, str]:
    mapped_tasks = [
        tasks_by_name[name]
        for name in FINDING_TASK_MAP.get(finding_id, [])
        if name in tasks_by_name
    ]
    mapped_tasks.sort(key=task_priority)

    if not mapped_tasks:
        return ("New discovered issue", "")

    refs = []
    for task in mapped_tasks:
        label = task["task_name"]
        if task["bucket"]:
            label = f"{label} [{task['bucket']}]"
        refs.append(label)

    return ("Mentioned/related in DnC", "; ".join(refs))


def load_code_review_rows(tasks_by_name: dict[str, dict]) -> list[dict]:
    output = []

    audit_path = ROOT / "output" / "pdf" / "magasinet-kbh-audit-findings.csv"
    with audit_path.open(newline="", encoding="utf-8-sig") as handle:
        for row in csv.DictReader(handle):
            dnc_signal, dnc_reference = finding_dnc_context(row["id"], tasks_by_name)
            output.append(
                {
                    "id": row["id"],
                    "source": "Audit Findings",
                    "category": clean_text(row["category"]),
                    "title": clean_text(row["title"]),
                    "severity_1_10": int(row["impact_1_10"]),
                    "dnc_signal": dnc_signal,
                    "dnc_reference": dnc_reference,
                    "surface_or_reachability": clean_text(row["reachable"]),
                    "evidence": clean_text(row["evidence"]),
                    "impact": clean_text(row["why_it_matters"]),
                    "recommendation": clean_text(row["recommendation"]),
                }
            )

    mock_path = ROOT / "output" / "pdf" / "magasinet-kbh-mock-hardcode-findings.csv"
    with mock_path.open(newline="", encoding="utf-8-sig") as handle:
        for row in csv.DictReader(handle):
            finding_type = clean_text(row["type"]).lower()
            recommendation = MOCK_RECOMMENDATION_MAP.get(
                finding_type, "Replace the fake behavior with a real implementation and re-test the affected surface."
            )
            dnc_signal, dnc_reference = finding_dnc_context(row["id"], tasks_by_name)
            output.append(
                {
                    "id": row["id"],
                    "source": "Mock Findings",
                    "category": f"Mock / {clean_text(row['type'])}",
                    "title": clean_text(row["surface"]),
                    "severity_1_10": int(row["severity"]),
                    "dnc_signal": dnc_signal,
                    "dnc_reference": dnc_reference,
                    "surface_or_reachability": clean_text(row["surface"]),
                    "evidence": clean_text(row["evidence"]),
                    "impact": clean_text(row["user_visible_effect"]),
                    "recommendation": recommendation,
                }
            )

    output.sort(key=lambda item: (-item["severity_1_10"], item["id"]))
    return output


def main() -> None:
    OUT_DIR.mkdir(parents=True, exist_ok=True)

    features = load_feature_rows()
    tasks_by_name = load_tasks()
    feature_rows = build_feature_rows(features, tasks_by_name)
    code_review_rows = load_code_review_rows(tasks_by_name)

    payload = {
        "feature_rows": feature_rows,
        "code_review_rows": code_review_rows,
        "summary": {
            "total_features": len(feature_rows),
            "full_count": sum(1 for row in feature_rows if row["status"] == "full"),
            "partial_count": sum(1 for row in feature_rows if row["status"] == "partial"),
            "skipped_count": sum(1 for row in feature_rows if row["status"] == "skipped"),
            "dnc_mentioned_count": sum(
                1 for row in feature_rows if row["dnc_signal"] == "Mentioned in DnC"
            ),
            "dnc_not_mentioned_count": sum(
                1 for row in feature_rows if row["dnc_signal"] == "Not mentioned by DnC"
            ),
            "dnc_working_count": sum(
                1 for row in feature_rows if row["dnc_sentiment"] == "working"
            ),
            "dnc_blocked_count": sum(
                1 for row in feature_rows if row["dnc_sentiment"] == "blocked"
            ),
            "dnc_wont_do_count": sum(
                1 for row in feature_rows if row["dnc_sentiment"] == "won't-do"
            ),
            "audit_findings_count": sum(
                1 for row in code_review_rows if row["source"] == "Audit Findings"
            ),
            "mock_findings_count": sum(
                1 for row in code_review_rows if row["source"] == "Mock Findings"
            ),
            "findings_mentioned_in_dnc": sum(
                1 for row in code_review_rows if row["dnc_signal"] == "Mentioned/related in DnC"
            ),
            "findings_new_issue": sum(
                1 for row in code_review_rows if row["dnc_signal"] == "New discovered issue"
            ),
        },
    }

    output_path = OUT_DIR / "corrected-audit-data.json"
    output_path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8")
    print(output_path)


if __name__ == "__main__":
    main()