1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61import { JSDOM } from "jsdom";
// Produce a flattened, text-only view of an HTML chunk that approximates what a
// screen reader announces, in order (PRD §7.8). The Reader cross-checks this
// against the HTML structure to surface reading-order problems.
export function flatten(html: string): string {
const dom = new JSDOM(`<!DOCTYPE html><body>${html}</body>`);
const doc = dom.window.document;
const out: string[] = [];
const walk = (node: Node): void => {
const ELEMENT = 1;
const TEXT = 3;
if (node.nodeType === TEXT) {
const t = node.textContent?.replace(/\s+/g, " ").trim();
if (t) out.push(t);
return;
}
if (node.nodeType !== ELEMENT) return;
const el = node as unknown as { tagName: string; getAttribute(n: string): string | null; childNodes: NodeListOf<Node> };
const tag = el.tagName.toLowerCase();
const text = (node.textContent ?? "").replace(/\s+/g, " ").trim();
switch (tag) {
case "h1": case "h2": case "h3": case "h4": case "h5": case "h6":
out.push(`[Heading ${tag[1]}] ${text}`);
return;
case "img":
out.push(`[Image] alt="${el.getAttribute("alt") ?? "(missing)"}"`);
return;
case "a":
out.push(`[Link] ${text}`);
return;
case "li":
out.push(`[List item] ${text}`);
return;
case "table":
out.push(`[Table] ${(node as unknown as { querySelector(s: string): { textContent: string } | null }).querySelector("caption")?.textContent?.trim() ?? "(no caption)"}`);
return;
case "label":
out.push(`[Label] ${text}`);
return;
case "input": case "textarea": case "select":
out.push(`[Field ${tag}] ${el.getAttribute("type") ?? ""}`.trim());
return;
case "blockquote":
out.push(`[Quote] ${text}`);
return;
case "figcaption": case "caption":
out.push(`[Caption] ${text}`);
return;
default:
for (const child of Array.from(el.childNodes)) walk(child);
}
};
for (const child of Array.from(doc.body.childNodes)) walk(child);
dom.window.close();
return out.join("\n");
}