from __future__ import annotations import csv import json import re from pathlib import Path ROOT = Path(__file__).resolve().parents[2] OUT_DIR = ROOT / "tmp" / "excel-corrected-audit" FEATURE_TASK_MAP = { "F002": ["User password change", "User change email?", "Sign-up issues"], "F003": ["View draft articles"], "F005": ["Static pages column(FE)", "Donate"], "F006": [ "Comments don't use

", "Comments, margin", "Opinion comments, styling", ], "F007": ["Rules for mailing"], "F008": ["Move Google Analytics"], "F009": ["Bulk upload images"], "F010": ["Related articles"], "F011": ["Location obligatory", "Maps"], "F013": [ "Integrations page", "Hidden for newsletter subscribers? Sign-up block", "Newsletter modal", ], "F015": ["Mobile pay", "Rules for Stripe"], "F017": ["Subscriber management?", "Personal account page", "Organization Invitation"], "F018": ["Mobile pay"], "F022": [ "Main image functionality", "Terminology", "View draft articles", "Set inline link target", ], "F023": ["Subscriber management?", "Move Google Analytics"], "F024": ["Rules for mailing", "User subscription to the project", "Sign-up issues"], "F027": ["Opinion Author Photo", "Main image functionality"], "F028": ["Maps", "Is /kort working?"], "F030": ["Opinion section", "Name of opinoin writer"], "F031": ["Visions, tasks", "Visioner cover", "Captions on Visioner"], "F033": ["Wrong content shown in section", "Anmeldelser (reviews)"], "F034": ["Content type (Photo)"], "F035": ["Donate", "Hidden for subscribers?", 'Hide "become member" for journalists etc.'], "F036": ["Organization Invitation"], "F039": ["Personal account page", "/profile", "User change email?"], "F040": ['The "Author"', "Journalist page"], "F041": ["Set paywall access?"], "F044": ["Polls (not working)"], "F045": ["CityChange inline widget"], "F046": ["Before / After slider"], "F047": ["Tags issues"], "F048": ["Basic deselect"], "F049": ['"Project" content type', "Terminology"], "F050": ['" before primary topic', "Topics are suddenly not all-caps"], "F052": [ "URL structure (FE)", "Danish characters in slug", "SiteMap.xml", "robots.txt", ], "F056": ["Captions connected to images", "Inline captions are now messed up"], "F058": ["Search not working"], "F061": ["CityChange page?"], "F062": ["Set paywall access?"], "F063": ["/profile"], "F065": ["User subscription to the project", "Rules for mailing"], "F066": ["Set paywall access?"], } FINDING_TASK_MAP = { "A002": ["Set paywall access?"], "A003": ["Subscriber management?", "Personal account page", "Organization Invitation"], "A004": ["Bulk upload images", "Captions connected to images"], "A010": ["Integrations page"], "A017": ["Integrations page", "Hidden for newsletter subscribers? Sign-up block", "Newsletter modal"], "A018": ["Move Google Analytics"], "A020": ["Rules for Stripe"], "A024": ["Integrations page", "Google Ads? (FE)", "Added googke ads api_id to config BE"], "M002": ["Integrations page", "Hidden for newsletter subscribers? Sign-up block", "Newsletter modal"], "M004": ["User subscription to the project", "Rules for mailing", '"Project" content type'], "M006": ["Donate"], "M008": ["Set paywall access?"], "M010": ["Move Google Analytics"], } ACCEPTANCE_OVERRIDES = { "F001": "Sensitive APIs are role-protected, destructive actions require authorization, and bypass/admin data is not publicly exposed.", "F002": "Users can register, sign in, recover/change credentials, and manage their own account data without support-side workarounds.", "F003": "Editors can create, edit, schedule, preview, and publish articles end to end from the dashboard.", "F005": "Admins can edit static pages in the CMS and the public pages render with the expected content and styling.", "F006": "Users can read, write, and reply to comments with correct formatting and stable frontend behavior.", "F007": "Commenters receive the expected notification and unsubscribe flows when comment activity affects them.", "F008": "Article statistics are collected correctly and exposed in a usable reporting surface with trustworthy counts.", "F009": "Editors can upload, reuse, organize, and manage media assets without broken CRUD or deployment gaps.", "F010": "Each article shows genuinely related content using the intended relevance logic rather than placeholder matching.", "F011": "Posts can store and use map/location data correctly, and the public map data reflects the real geodata behind the content.", "F013": "Newsletter sign-up surfaces submit real subscriptions, hide correctly for already-subscribed users, and expose the needed management controls.", "F015": "Stripe and MobilePay payments complete reliably, access changes happen immediately, and provider sync does not leave subscribers in the wrong state.", "F016": "Subscribers can upgrade, cancel, restore, and otherwise manage recurring billing without stubbed or broken self-service paths.", "F017": "Business subscribers can manage organizations, seats, invitations, and employee access without insecure or missing admin flows.", "F018": "Stripe and MobilePay webhook handling is robust enough that payment state stays synchronized even when provider callbacks are delayed.", "F022": "Editors can manage articles from the dashboard, use the writing editor, and configure supported widgets without broken UI gaps.", "F023": "Admins can review user, subscriber, and business/content metrics in a usable reporting view rather than raw or incomplete plumbing.", "F024": "Automated emails fire for the required account, payment, business-seat, comment, and project-alert events using approved templates.", "F027": "Front-page layout variants and featured-image controls can be switched intentionally and the change is reflected correctly on the site.", "F028": "The public map page and inline map widget both render correct data and link back to the mapped content as expected.", "F030": "Opinion pages render correctly, including their specific author/presentation rules, on list and detail views.", "F031": "Visions pages render the right content, labels, widgets, and styling on both list and detail views.", "F033": "Reviews pages render the correct review content and metadata instead of mixing in the wrong section data.", "F034": "Photo stories support the expected multi-image content flow and remain readable/styled correctly on the live site.", "F035": "Personal subscription entry points show the right offers and CTAs, hide when inappropriate, and do not expose obsolete purchase UI.", "F036": "Business subscription purchase and invite flows work for master and employee subscribers from invite through access grant.", "F039": "The profile/account area lets users manage their details, organization context, subscription details, and related profile surfaces safely.", "F040": "Author, district, and tag pages exist with correct routing/data so users can actually browse those taxonomy surfaces.", "F041": "KBH+ and paywall gating behave correctly for each user state without unsafe access shortcuts or broken control surfaces.", "F044": "Editors can configure polls and visitors can use all supported answer types without the widget breaking.", "F045": "Editors can place the CityChange single-suggestion widget with the expected ID-driven behavior and any promised override fields.", "F046": "The before/after slider renders and behaves as designed, including the required split-line presentation.", "F047": "Editors can replace or merge tags globally without manual article-by-article cleanup.", "F048": "Tag-selection UX supports repeated add/remove work cleanly, including obvious deselect behavior.", "F049": "The editorial content-type model and creation menu reflect the intended taxonomy and not just a flattened approximation.", "F050": "Primary-topic rendering behaves correctly on the frontend, including the special kommentar treatment and quote/label behavior.", "F052": "Public URLs match the legacy structure, old paths redirect correctly, and the SEO basics around discovery surfaces are in place.", "F056": "Editors can adjust inline-image caption data from the image UI and the frontend renders those captions cleanly.", "F058": "Search matches actual post content and uses AND semantics for multi-term queries.", "F061": "CityChange suggestions generate the intended deep links so users land in the app correctly from the site.", "F062": "Guest/share links bypass the paywall only in the controlled ways specified, with working generation and safe invalidation behavior.", "F063": "The profile page shows the user's comments with working deep links and clear reply markers.", "F065": "Users can subscribe to project updates, receive the right one-time publish alerts, and unsubscribe cleanly.", "F066": "Paywall-bypass admin tools let staff manage tokens/IPs without broken fields, missing edit routes, or unsafe data exposure.", } MOCK_RECOMMENDATION_MAP = { "hardcoded data": "Replace the hardcoded sample with real content/data loading and remove any misleading copy until that works.", "dead submit handler": "Implement the real submit flow or remove the form until it actually performs the promised action.", "stubbed backend": "Implement the backend path before exposing the CTA or label the flow as unavailable.", "misrepresented behavior": "Either implement the promised behavior or change the UI copy so it stops lying to the user.", } def clean_text(value: str) -> str: return re.sub(r"\s+", " ", (value or "").replace("\r", " ").replace("\n", " ")).strip() def compact_text(value: str, max_len: int = 220) -> str: text = clean_text(value) if len(text) <= max_len: return text clipped = text[: max_len - 3].rsplit(" ", 1)[0].rstrip(" ,;:") return f"{clipped}..." def split_acceptance_blob(value: str) -> tuple[str, str]: raw = (value or "").replace("\r", "") match = re.search(r"\bACCEPTANCE\b", raw, re.IGNORECASE) if not match: return "", clean_text(raw) before = raw[: match.start()].strip(" :-\n") after = raw[match.end() :].strip(" :-\n") return clean_text(before), clean_text(after) def infer_task_flag(task: dict) -> str: blob = " ".join( [ task.get("dev_comment", ""), task.get("client_comment", ""), task.get("acceptance_criteria", ""), ] ).lower() duplicate_markers = [ "duplicate", "can be removed", "should be removed", "forget this task", "forget,", "this should not be a task", "so we can forget", "can be deleted", "this one can be deleted", "should be skipped", "skip this task", "no longer", ] discussion_markers = [ "discussion is needed", "need to discuss", "pending discussion", "discussion before", "discussion", "unclear", "i don't understand", "not fixed", "not done", "cannot test", "can't test", "needs recheck", "recheck", "confusing", "difficult", ] progress_markers = [ "almost completed", "almost done", "pending a final test", "backend is ready", "done, but not deployed", "works but need", "already completed", "already implemented", "it is ready", "needs to be checked", "done.", "implemented", "fixed", ] if any(marker in blob for marker in duplicate_markers): return "duplicate/remove" if any(marker in blob for marker in discussion_markers): return "discussion" if any(marker in blob for marker in progress_markers): return "in progress" return "open" def task_sentiment(task: dict) -> str: flag = task["flag"] if flag == "duplicate/remove": return "won't-do" if flag == "discussion": return "blocked" return "working" def load_feature_rows() -> list[dict]: path = ROOT / "output" / "pdf" / "magasinet-kbh-feature-matrix.csv" with path.open(newline="", encoding="utf-8-sig") as handle: return list(csv.DictReader(handle)) def parse_task_file(path: Path, bucket_label: str) -> list[dict]: tasks: list[dict] = [] current_section = "" headers: list[str] | None = None with path.open(newline="", encoding="utf-8-sig") as handle: rows = list(csv.reader(handle)) for row in rows: cells = [cell.strip() for cell in row] if not any(cells): continue if cells[0] == "Task name": headers = row continue if headers is None: current_section = cells[0] continue if "Total" in cells[:2]: continue item = dict(zip(headers, row + [""] * (len(headers) - len(row)))) task_name = (item.get("Task name") or "").strip() if not task_name or task_name in {"In development", "Mobile styling", "Frontend issues", "Added 26.03 Anders"}: continue dev_comment = ( item.get("Dev coments") or item.get("Comment dev") or item.get("Dev comments") or "" ).strip() anders_comment = (item.get("Comment from Anders") or "").strip() acceptance_blob = (item.get("Acceptance") or item.get("Acceptance ") or "").strip() client_comment, acceptance_criteria = split_acceptance_blob(acceptance_blob) if anders_comment: client_comment = clean_text(f"{anders_comment} {client_comment}") use_section = bucket_label == "Other backlog" and current_section not in {"", "Task name"} task = { "task_name": task_name, "source_file": path.name, "bucket": bucket_label, "section": current_section if use_section else "", "trello_link": (item.get("Trello link") or "").strip(), "fe": (item.get("FE") or "").strip(), "be": (item.get("BE") or "").strip(), "dev_comment": clean_text(dev_comment), "client_comment": clean_text(client_comment), "acceptance_criteria": clean_text(acceptance_criteria), } task["flag"] = infer_task_flag(task) source_label = bucket_label if not task["section"] else f"{bucket_label}: {task['section']}" if task["flag"] not in {"open", "in progress"}: source_label = f"{source_label} ({task['flag']})" task["status_label"] = source_label tasks.append(task) return tasks def load_tasks() -> dict[str, dict]: task_rows = [] task_rows.extend( parse_task_file( ROOT / "audit" / "input" / "KBH remaining tasks - In development (almost done).csv", "In development (almost done)", ) ) task_rows.extend( parse_task_file( ROOT / "audit" / "input" / "KBH remaining tasks - Needed for launch tasks from DnC.csv", "Needed for launch", ) ) task_rows.extend( parse_task_file( ROOT / "audit" / "input" / "KBH remaining tasks - Task From Anders.csv", "Task from Anders", ) ) task_rows.extend( parse_task_file( ROOT / "audit" / "input" / "KBH remaining tasks - Other.csv", "Other backlog", ) ) return {row["task_name"]: row for row in task_rows} def default_acceptance(feature: dict) -> str: feature_id = feature["id"] if feature_id in ACCEPTANCE_OVERRIDES: return ACCEPTANCE_OVERRIDES[feature_id] capability = clean_text(feature["capability"]) area = clean_text(feature["area"]).lower() if "widget" in area: return f"Accepted when editors can configure {capability.lower()} and the frontend rendering matches the intended design and behavior." if "dashboard" in area or "editorial" in area or "cms" in area: return f"Accepted when the product provides {capability.lower()} in the dashboard without broken workflows." if "payment" in area: return f"Accepted when {capability.lower()} works end to end and keeps billing/access state synchronized without manual repair." if "frontend" in area or "profile" in area or "paywall" in area or "archive" in area: return f"Accepted when the live site provides {capability.lower()} with correct data, routing, and styling." return f"Accepted when {capability.lower()} works with current-site parity and without blocking gaps." def build_feature_notes(feature: dict) -> str: status = feature["verdict"].strip().lower() base_note = clean_text(feature["notes"]) evidence_note = clean_text(feature["new_evidence"]) if status == "full": note = base_note or compact_text(evidence_note, 280) else: pieces = [] if base_note: pieces.append(base_note) if evidence_note and evidence_note not in base_note: pieces.append(compact_text(evidence_note, 280)) note = " ".join(pieces).strip() or "Audit evidence did not support an acceptance-grade implementation." scope = feature["scope_modifier"].strip().lower() if scope == "optional": note = f"{note} Scope note: optional item." elif scope == "withdrawn": note = f"{note} Scope note: withdrawn from pass/fail scoring." elif scope == "out_of_scope": note = f"{note} Scope note: out of scope for launch scoring." return note.strip() def task_priority(task: dict) -> tuple[int, str]: bucket_order = { "Needed for launch": 0, "In development (almost done)": 1, "Task from Anders": 2, "Other backlog": 3, } return (bucket_order.get(task["bucket"], 99), task["task_name"]) def build_task_note(task: dict) -> str: parts = [] if task["dev_comment"]: parts.append(f"Dev: {compact_text(task['dev_comment'], 150)}") if task["client_comment"]: parts.append(f"Client: {compact_text(task['client_comment'], 150)}") elif task["acceptance_criteria"]: parts.append(f"Acceptance: {compact_text(task['acceptance_criteria'], 150)}") suffix = "; ".join(parts) return f"{task['task_name']}: {suffix}" if suffix else task["task_name"] def build_feature_rows(features: list[dict], tasks_by_name: dict[str, dict]) -> list[dict]: rows = [] for feature in features: mapped_tasks = [ tasks_by_name[name] for name in FEATURE_TASK_MAP.get(feature["id"], []) if name in tasks_by_name ] mapped_tasks.sort(key=task_priority) if mapped_tasks: dnc_status = "; ".join(dict.fromkeys(task["status_label"] for task in mapped_tasks)) notes_parts = [build_task_note(task) for task in mapped_tasks[:4]] if len(mapped_tasks) > 4: notes_parts.append(f"+{len(mapped_tasks) - 4} more related DnC task(s)") dnc_notes = " | ".join(notes_parts) sentiments = {task_sentiment(task) for task in mapped_tasks} if sentiments == {"won't-do"}: dnc_sentiment = "won't-do" elif "blocked" in sentiments: dnc_sentiment = "blocked" else: dnc_sentiment = "working" dnc_signal = "Mentioned in DnC" else: dnc_status = "Not mentioned by DnC" dnc_notes = "" dnc_sentiment = "" dnc_signal = "Not mentioned by DnC" rows.append( { "id": feature["id"], "area": clean_text(feature["area"]), "description": clean_text(feature["capability"]), "acceptance": default_acceptance(feature), "status": feature["verdict"].strip().lower(), "dnc_signal": dnc_signal, "dnc_sentiment": dnc_sentiment, "dnc_status": dnc_status, "dnc_notes": dnc_notes, "notes": build_feature_notes(feature), } ) return rows def finding_dnc_context(finding_id: str, tasks_by_name: dict[str, dict]) -> tuple[str, str]: mapped_tasks = [ tasks_by_name[name] for name in FINDING_TASK_MAP.get(finding_id, []) if name in tasks_by_name ] mapped_tasks.sort(key=task_priority) if not mapped_tasks: return ("New discovered issue", "") refs = [] for task in mapped_tasks: label = task["task_name"] if task["bucket"]: label = f"{label} [{task['bucket']}]" refs.append(label) return ("Mentioned/related in DnC", "; ".join(refs)) def load_code_review_rows(tasks_by_name: dict[str, dict]) -> list[dict]: output = [] audit_path = ROOT / "output" / "pdf" / "magasinet-kbh-audit-findings.csv" with audit_path.open(newline="", encoding="utf-8-sig") as handle: for row in csv.DictReader(handle): dnc_signal, dnc_reference = finding_dnc_context(row["id"], tasks_by_name) output.append( { "id": row["id"], "source": "Audit Findings", "category": clean_text(row["category"]), "title": clean_text(row["title"]), "severity_1_10": int(row["impact_1_10"]), "dnc_signal": dnc_signal, "dnc_reference": dnc_reference, "surface_or_reachability": clean_text(row["reachable"]), "evidence": clean_text(row["evidence"]), "impact": clean_text(row["why_it_matters"]), "recommendation": clean_text(row["recommendation"]), } ) mock_path = ROOT / "output" / "pdf" / "magasinet-kbh-mock-hardcode-findings.csv" with mock_path.open(newline="", encoding="utf-8-sig") as handle: for row in csv.DictReader(handle): finding_type = clean_text(row["type"]).lower() recommendation = MOCK_RECOMMENDATION_MAP.get( finding_type, "Replace the fake behavior with a real implementation and re-test the affected surface." ) dnc_signal, dnc_reference = finding_dnc_context(row["id"], tasks_by_name) output.append( { "id": row["id"], "source": "Mock Findings", "category": f"Mock / {clean_text(row['type'])}", "title": clean_text(row["surface"]), "severity_1_10": int(row["severity"]), "dnc_signal": dnc_signal, "dnc_reference": dnc_reference, "surface_or_reachability": clean_text(row["surface"]), "evidence": clean_text(row["evidence"]), "impact": clean_text(row["user_visible_effect"]), "recommendation": recommendation, } ) output.sort(key=lambda item: (-item["severity_1_10"], item["id"])) return output def main() -> None: OUT_DIR.mkdir(parents=True, exist_ok=True) features = load_feature_rows() tasks_by_name = load_tasks() feature_rows = build_feature_rows(features, tasks_by_name) code_review_rows = load_code_review_rows(tasks_by_name) payload = { "feature_rows": feature_rows, "code_review_rows": code_review_rows, "summary": { "total_features": len(feature_rows), "full_count": sum(1 for row in feature_rows if row["status"] == "full"), "partial_count": sum(1 for row in feature_rows if row["status"] == "partial"), "skipped_count": sum(1 for row in feature_rows if row["status"] == "skipped"), "dnc_mentioned_count": sum( 1 for row in feature_rows if row["dnc_signal"] == "Mentioned in DnC" ), "dnc_not_mentioned_count": sum( 1 for row in feature_rows if row["dnc_signal"] == "Not mentioned by DnC" ), "dnc_working_count": sum( 1 for row in feature_rows if row["dnc_sentiment"] == "working" ), "dnc_blocked_count": sum( 1 for row in feature_rows if row["dnc_sentiment"] == "blocked" ), "dnc_wont_do_count": sum( 1 for row in feature_rows if row["dnc_sentiment"] == "won't-do" ), "audit_findings_count": sum( 1 for row in code_review_rows if row["source"] == "Audit Findings" ), "mock_findings_count": sum( 1 for row in code_review_rows if row["source"] == "Mock Findings" ), "findings_mentioned_in_dnc": sum( 1 for row in code_review_rows if row["dnc_signal"] == "Mentioned/related in DnC" ), "findings_new_issue": sum( 1 for row in code_review_rows if row["dnc_signal"] == "New discovered issue" ), }, } output_path = OUT_DIR / "corrected-audit-data.json" output_path.write_text(json.dumps(payload, indent=2, ensure_ascii=False), encoding="utf-8") print(output_path) if __name__ == "__main__": main()