1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206"""Unit tests for boundary hint generation (section context, running headers/footers)."""
from __future__ import annotations
import pytest
pytestmark = pytest.mark.unit
from src.services.pipeline_viewer import PipelineViewerService
from src.services.pipeline_viewer_models import (
LayoutType,
OutlineEntry,
PageAttributes,
PipelineViewerResult,
SectionEntry,
SectionMap,
StructureResult,
)
@pytest.fixture
def service():
return PipelineViewerService()
@pytest.fixture
def structure():
return StructureResult(
page_attributes={
1: PageAttributes(layout=LayoutType.SINGLE_COLUMN),
2: PageAttributes(layout=LayoutType.SINGLE_COLUMN),
3: PageAttributes(layout=LayoutType.SINGLE_COLUMN),
},
outline=[
OutlineEntry(level=1, text="Title", page=1),
OutlineEntry(level=2, text="Methods", page=1),
OutlineEntry(level=2, text="Results", page=3),
],
footnotes=[],
)
class TestBoundaryHintGeneration:
"""Test _generate_boundary_hints method."""
def test_same_section_hint(self, service, structure):
"""Pages in the same section should get a SAME SECTION hint."""
result = PipelineViewerResult(
filename="test.pdf",
total_pages=3,
versions={"v0": "combined"},
page_images={},
page_markdowns={
"v0": {
"1": "## Methods\n\nFirst part of methods.",
"2": "Continuing methods discussion.",
"3": "## Results\n\nResults here.",
},
},
figures=[],
steps=[],
stats={},
)
section_map = SectionMap(sections=[
SectionEntry(
index=0,
heading_text="Methods",
heading_level=2,
pages=[1, 2],
markdown="methods content",
outline_position=1,
),
SectionEntry(
index=1,
heading_text="Results",
heading_level=2,
pages=[3],
markdown="results content",
outline_position=2,
),
])
snippets = service._generate_boundary_hints(result, structure, section_map)
assert len(snippets) == 2 # boundary 1-2 and 2-3
# Boundary 1-2: same section
b12 = snippets[0]
assert b12["page_before"] == 1
assert b12["page_after"] == 2
assert any("SAME SECTION" in h for h in b12["hints"])
assert any("Methods" in h for h in b12["hints"])
def test_section_transition_hint(self, service, structure):
"""Boundary where one section ends and another begins."""
result = PipelineViewerResult(
filename="test.pdf",
total_pages=2,
versions={"v0": "combined"},
page_images={},
page_markdowns={
"v0": {
"1": "## Methods\n\nMethods content.",
"2": "## Results\n\nResults content.",
},
},
figures=[],
steps=[],
stats={},
)
section_map = SectionMap(sections=[
SectionEntry(
index=0,
heading_text="Methods",
heading_level=2,
pages=[1],
markdown="methods",
outline_position=1,
),
SectionEntry(
index=1,
heading_text="Results",
heading_level=2,
pages=[2],
markdown="results",
outline_position=2,
),
])
snippets = service._generate_boundary_hints(result, structure, section_map)
assert len(snippets) == 1
hints = snippets[0]["hints"]
assert any("SECTION TRANSITION" in h for h in hints)
assert any("Methods" in h and "Results" in h for h in hints)
def test_running_header_in_boundary(self, service, structure):
"""Running header at start of page_after should be flagged."""
result = PipelineViewerResult(
filename="test.pdf",
total_pages=2,
versions={"v0": "combined"},
page_images={},
page_markdowns={
"v0": {
"1": "# Title\n\nContent of page 1.",
"2": "Title 2\n\nContent of page 2.",
},
},
figures=[],
steps=[],
stats={},
)
section_map = SectionMap(sections=[])
snippets = service._generate_boundary_hints(result, structure, section_map)
hints = snippets[0]["hints"]
assert any("Running header" in h for h in hints)
def test_running_footer_in_boundary(self, service, structure):
"""Running footer at end of page_before should be flagged."""
result = PipelineViewerResult(
filename="test.pdf",
total_pages=2,
versions={"v0": "combined"},
page_images={},
page_markdowns={
"v0": {
"1": "# Title\n\nContent.\n\n1",
"2": "More content.",
},
},
figures=[],
steps=[],
stats={},
)
section_map = SectionMap(sections=[])
snippets = service._generate_boundary_hints(result, structure, section_map)
hints = snippets[0]["hints"]
assert any("Page number footer" in h for h in hints)
def test_single_page_no_boundaries(self, service, structure):
"""A single-page document should produce no boundary snippets."""
result = PipelineViewerResult(
filename="test.pdf",
total_pages=1,
versions={"v0": "combined"},
page_images={},
page_markdowns={
"v0": {
"1": "# Title\n\nContent.",
},
},
figures=[],
steps=[],
stats={},
)
section_map = SectionMap(sections=[])
snippets = service._generate_boundary_hints(result, structure, section_map)
assert len(snippets) == 0