1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99"""Unit tests for SSE init payload splitting.
Tests the _slim_init_payload helper that strips binary images from
the init event so they can be streamed individually.
"""
import pytest
from src.api.pipeline_viewer import _slim_init_payload
from src.services.pipeline_viewer_models import FigureData, PipelineViewerResult
pytestmark = pytest.mark.unit
def _make_result(
*,
page_images: dict[str, str] | None = None,
figures: list[FigureData] | None = None,
) -> PipelineViewerResult:
"""Build a minimal PipelineViewerResult for testing."""
return PipelineViewerResult(
filename="test.pdf",
total_pages=2,
versions={"v0": "# Page 1\nHello\n\n# Page 2\nWorld"},
page_images=page_images or {"1": "AAAA", "2": "BBBB"},
page_markdowns={"v0": {"1": "# Page 1\nHello", "2": "# Page 2\nWorld"}},
figures=figures or [],
stats={"total_chars": 100, "chars_per_page": 50},
warnings=["Test warning"],
)
class TestSlimInitPayload:
"""Tests for _slim_init_payload."""
def test_strips_page_images(self):
"""page_images should be an empty dict in the slim payload."""
result = _make_result(page_images={"1": "x" * 1000, "2": "y" * 1000})
slim = _slim_init_payload(result)
assert slim["page_images"] == {}
def test_strips_figure_image_base64(self):
"""Figure image_base64 should be empty string, but ref_id preserved."""
figures = [
FigureData(ref_id="fig-1", caption="A chart", page_number=1, image_base64="IMGDATA1"),
FigureData(ref_id="fig-2", caption="A photo", page_number=2, image_base64="IMGDATA2"),
]
result = _make_result(figures=figures)
slim = _slim_init_payload(result)
assert len(slim["figures"]) == 2
for fig in slim["figures"]:
assert fig["image_base64"] == ""
assert slim["figures"][0]["ref_id"] == "fig-1"
assert slim["figures"][1]["ref_id"] == "fig-2"
def test_preserves_figure_metadata(self):
"""Figure caption and page_number should be intact."""
figures = [
FigureData(ref_id="fig-1", caption="A chart", page_number=3, image_base64="DATA"),
]
result = _make_result(figures=figures)
slim = _slim_init_payload(result)
fig = slim["figures"][0]
assert fig["caption"] == "A chart"
assert fig["page_number"] == 3
def test_preserves_metadata(self):
"""filename, versions, stats, warnings should be unchanged."""
result = _make_result()
slim = _slim_init_payload(result)
assert slim["filename"] == "test.pdf"
assert slim["total_pages"] == 2
assert slim["versions"] == {"v0": "# Page 1\nHello\n\n# Page 2\nWorld"}
assert slim["stats"]["total_chars"] == 100
assert slim["warnings"] == ["Test warning"]
def test_does_not_mutate_original(self):
"""The original result object should not be modified."""
result = _make_result(
page_images={"1": "IMG1"},
figures=[FigureData(ref_id="f1", caption="c", page_number=1, image_base64="DATA")],
)
_slim_init_payload(result)
assert result.page_images == {"1": "IMG1"}
assert result.figures[0].image_base64 == "DATA"
def test_empty_figures_and_pages(self):
"""Works with no figures and no page images."""
result = _make_result(page_images={}, figures=[])
slim = _slim_init_payload(result)
assert slim["page_images"] == {}
assert slim["figures"] == []