📦 EqualifyEverything / equalify-reflow

📄 test_storage_service.py · 380 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380"""Unit tests for StorageService (core upload/download operations)."""

from io import BytesIO

import pytest
from botocore.exceptions import ClientError
from fastapi import HTTPException, UploadFile
from src.config import settings
from src.services.storage_service import StorageService

from tests.conftest_fixtures.data_factories import create_test_upload_file


@pytest.fixture
def storage_service(mock_s3_client):
    """Create storage service with mock client."""
    return StorageService(
        s3_client=mock_s3_client,
        temp_bucket=settings.s3_temp_bucket,
        results_bucket=settings.s3_results_bucket,
    )


@pytest.fixture
def sample_pdf_upload(mocker):
    """Create a sample PDF upload file using factory."""
    return create_test_upload_file(mocker, filename="test.pdf")


class TestStoreDocument:
    """Tests for store_document method."""

    @pytest.mark.asyncio
    async def test_store_document_success(self, storage_service, mock_s3_client, sample_pdf_upload):
        """Test successful PDF upload."""
        # Configure mock
        mock_s3_client.upload_fileobj.return_value = None

        # Execute
        job_id, s3_key = await storage_service.store_document(sample_pdf_upload)

        # Verify
        assert job_id is not None
        assert s3_key.startswith("temp/")
        assert s3_key.endswith(".pdf")
        mock_s3_client.upload_fileobj.assert_called_once()

    @pytest.mark.asyncio
    async def test_store_document_invalid_type(self, storage_service, sample_pdf_upload):
        """Test rejection of non-PDF files."""
        sample_pdf_upload.content_type = "text/plain"

        with pytest.raises(HTTPException) as exc:
            await storage_service.store_document(sample_pdf_upload)

        assert exc.value.status_code == 400
        assert "PDF files" in exc.value.detail

    @pytest.mark.asyncio
    async def test_store_document_too_large(self, storage_service, mock_s3_client, mocker):
        """Test rejection of oversized files."""
        # Create large file
        large_content = b"x" * (settings.max_upload_size + 1)
        file = BytesIO(large_content)

        upload_file = mocker.Mock(spec=UploadFile)
        upload_file.filename = "large.pdf"
        upload_file.file = file
        upload_file.content_type = "application/pdf"

        with pytest.raises(HTTPException) as exc:
            await storage_service.store_document(upload_file)

        assert exc.value.status_code == 413
        assert "exceeds maximum" in exc.value.detail

    @pytest.mark.asyncio
    async def test_store_document_upload_failure(self, storage_service, mock_s3_client, sample_pdf_upload):
        """Test handling of S3 upload failure."""
        mock_s3_client.upload_fileobj.side_effect = Exception("S3 error")

        with pytest.raises(HTTPException) as exc:
            await storage_service.store_document(sample_pdf_upload)

        assert exc.value.status_code == 500
        assert "Failed to upload" in exc.value.detail


class TestDownloadTempFile:
    """Tests for download_temp_file method."""

    @pytest.mark.asyncio
    async def test_download_success(self, storage_service, mock_s3_client):
        """Test successful file download."""
        expected_content = b"PDF content"
        mock_response = {"Body": BytesIO(expected_content)}
        mock_s3_client.get_object.return_value = mock_response

        content = await storage_service.download_temp_file("temp/job123/file.pdf")

        assert content == expected_content
        mock_s3_client.get_object.assert_called_once_with(
            Bucket=settings.s3_temp_bucket,
            Key="temp/job123/file.pdf"
        )

    @pytest.mark.asyncio
    async def test_download_file_not_found(self, storage_service, mock_s3_client):
        """Test download of non-existent file."""
        error_response = {"Error": {"Code": "NoSuchKey"}}
        mock_s3_client.get_object.side_effect = ClientError(error_response, "GetObject")

        with pytest.raises(HTTPException) as exc:
            await storage_service.download_temp_file("temp/missing.pdf")

        assert exc.value.status_code == 404
        assert "not found" in exc.value.detail

    @pytest.mark.asyncio
    async def test_download_unexpected_error(self, storage_service, mock_s3_client):
        """Test handling of unexpected download errors."""
        mock_s3_client.get_object.side_effect = Exception("Network error")

        with pytest.raises(HTTPException) as exc:
            await storage_service.download_temp_file("temp/file.pdf")

        assert exc.value.status_code == 500


class TestUploadResult:
    """Tests for upload_result method."""

    @pytest.mark.asyncio
    async def test_upload_markdown_result(self, storage_service, mock_s3_client):
        """Test uploading Markdown result returns S3 key."""
        mock_s3_client.put_object.return_value = None

        s3_key = await storage_service.upload_result(
            job_id="job123",
            content="# Accessible content",
            format="md"
        )

        # Should return S3 key, not URL
        assert s3_key == "job123.md"
        assert "http" not in s3_key  # Verify it's not a URL
        assert "s3.amazonaws.com" not in s3_key  # Verify it's not a URL

        mock_s3_client.put_object.assert_called_once()
        call_kwargs = mock_s3_client.put_object.call_args.kwargs
        assert call_kwargs["ContentType"] == "text/markdown"
        assert call_kwargs["Bucket"] == settings.s3_results_bucket

    @pytest.mark.asyncio
    async def test_upload_result_with_suffix(self, storage_service, mock_s3_client):
        """Test uploading result with suffix returns correct S3 key."""
        mock_s3_client.put_object.return_value = None

        s3_key = await storage_service.upload_result(
            job_id="job456",
            content="# Original markdown",
            format="md",
            suffix="original"
        )

        # Should return S3 key with suffix
        assert s3_key == "job456-original.md"
        assert "http" not in s3_key

    @pytest.mark.asyncio
    async def test_upload_unsupported_format(self, storage_service, mock_s3_client):
        """Test uploading with unsupported format defaults to text/plain."""
        mock_s3_client.put_object.return_value = None

        s3_key = await storage_service.upload_result(
            job_id="job456",
            content="# Markdown content",
            format="txt"
        )

        assert s3_key == "job456.txt"
        call_kwargs = mock_s3_client.put_object.call_args.kwargs
        assert call_kwargs["ContentType"] == "text/plain"

    @pytest.mark.asyncio
    async def test_upload_result_with_cache_control(self, storage_service, mock_s3_client):
        """Test that cache control header is set."""
        mock_s3_client.put_object.return_value = None

        await storage_service.upload_result("job789", "content", "md")

        call_kwargs = mock_s3_client.put_object.call_args.kwargs
        assert "CacheControl" in call_kwargs
        assert "max-age" in call_kwargs["CacheControl"]

    @pytest.mark.asyncio
    async def test_upload_result_failure(self, storage_service, mock_s3_client):
        """Test handling of upload failure."""
        mock_s3_client.put_object.side_effect = Exception("S3 error")

        with pytest.raises(HTTPException) as exc:
            await storage_service.upload_result("job123", "content", "md")

        assert exc.value.status_code == 500
        assert "Failed to upload result" in exc.value.detail


class TestUploadImage:
    """Tests for upload_image method."""

    @pytest.mark.asyncio
    async def test_upload_image_returns_key(self, storage_service, mock_s3_client):
        """Test uploading generic image returns S3 key."""
        mock_s3_client.put_object.return_value = None

        s3_key = await storage_service.upload_image(
            job_id="job123",
            image_data=b"PNG image data",
            image_name="figure-1.png"
        )

        # Should return S3 key, not URL
        assert s3_key == "job123/images/figure-1.png"
        assert "http" not in s3_key
        assert "s3.amazonaws.com" not in s3_key

        mock_s3_client.put_object.assert_called_once()
        call_kwargs = mock_s3_client.put_object.call_args.kwargs
        assert call_kwargs["ContentType"] == "image/png"
        assert call_kwargs["Bucket"] == settings.s3_results_bucket
        assert call_kwargs["Key"] == "job123/images/figure-1.png"

    @pytest.mark.asyncio
    async def test_upload_image_failure(self, storage_service, mock_s3_client):
        """Test handling of image upload failure."""
        mock_s3_client.put_object.side_effect = Exception("S3 error")

        with pytest.raises(HTTPException) as exc:
            await storage_service.upload_image(
                job_id="job456",
                image_data=b"PNG",
                image_name="table-1.png"
            )

        assert exc.value.status_code == 500
        assert "Failed to upload image" in exc.value.detail


class TestUploadPageImage:
    """Tests for upload_page_image method."""

    @pytest.mark.asyncio
    async def test_upload_page_image_returns_key(self, storage_service, mock_s3_client):
        """Test uploading page image returns S3 key."""
        mock_s3_client.put_object.return_value = None

        s3_key = await storage_service.upload_page_image(
            job_id="job123",
            page_num=1,
            image_data=b"PNG image data"
        )

        # Should return S3 key, not URL
        assert s3_key == "job123/pages/page-1.png"
        assert "http" not in s3_key  # Verify it's not a URL
        assert "s3.amazonaws.com" not in s3_key  # Verify it's not a URL

        mock_s3_client.put_object.assert_called_once()
        call_kwargs = mock_s3_client.put_object.call_args.kwargs
        assert call_kwargs["ContentType"] == "image/png"
        assert call_kwargs["Bucket"] == settings.s3_temp_bucket
        assert call_kwargs["Key"] == "job123/pages/page-1.png"

    @pytest.mark.asyncio
    async def test_upload_page_image_multiple_pages(self, storage_service, mock_s3_client):
        """Test uploading multiple page images returns correct keys."""
        mock_s3_client.put_object.return_value = None

        # Upload page 1
        s3_key_1 = await storage_service.upload_page_image(
            job_id="job456",
            page_num=1,
            image_data=b"Page 1 PNG"
        )
        assert s3_key_1 == "job456/pages/page-1.png"

        # Upload page 10
        s3_key_10 = await storage_service.upload_page_image(
            job_id="job456",
            page_num=10,
            image_data=b"Page 10 PNG"
        )
        assert s3_key_10 == "job456/pages/page-10.png"

    @pytest.mark.asyncio
    async def test_upload_page_image_with_cache_control(self, storage_service, mock_s3_client):
        """Test that cache control header is set for page images."""
        mock_s3_client.put_object.return_value = None

        await storage_service.upload_page_image(
            job_id="job789",
            page_num=5,
            image_data=b"PNG data"
        )

        call_kwargs = mock_s3_client.put_object.call_args.kwargs
        assert "CacheControl" in call_kwargs
        assert "604800" in call_kwargs["CacheControl"]  # 7 days

    @pytest.mark.asyncio
    async def test_upload_page_image_failure(self, storage_service, mock_s3_client):
        """Test handling of page image upload failure."""
        mock_s3_client.put_object.side_effect = Exception("S3 error")

        with pytest.raises(HTTPException) as exc:
            await storage_service.upload_page_image(
                job_id="job999",
                page_num=1,
                image_data=b"PNG"
            )

        assert exc.value.status_code == 500
        assert "Failed to upload page 1 image" in exc.value.detail


class TestFileExists:
    """Tests for file_exists method."""

    @pytest.mark.asyncio
    async def test_file_exists_true(self, storage_service, mock_s3_client):
        """Test checking existing file."""
        mock_s3_client.head_object.return_value = {"ContentLength": 1024}

        exists = await storage_service.file_exists("bucket", "key")

        assert exists is True
        mock_s3_client.head_object.assert_called_once_with(Bucket="bucket", Key="key")

    @pytest.mark.asyncio
    async def test_file_exists_false(self, storage_service, mock_s3_client):
        """Test checking non-existent file."""
        error_response = {"Error": {"Code": "404"}}
        mock_s3_client.head_object.side_effect = ClientError(error_response, "HeadObject")

        exists = await storage_service.file_exists("bucket", "key")

        assert exists is False

    @pytest.mark.asyncio
    async def test_file_exists_error(self, storage_service, mock_s3_client):
        """Test handling of unexpected errors."""
        mock_s3_client.head_object.side_effect = Exception("Network error")

        exists = await storage_service.file_exists("bucket", "key")

        assert exists is False  # Should default to False on error


class TestCheckS3Access:
    """Tests for check_s3_access method."""

    @pytest.mark.asyncio
    async def test_s3_accessible(self, storage_service, mock_s3_client):
        """Test S3 accessibility check when healthy."""
        mock_s3_client.head_bucket.return_value = {"ResponseMetadata": {"HTTPStatusCode": 200}}

        accessible = await storage_service.check_s3_access()

        assert accessible is True
        mock_s3_client.head_bucket.assert_called_once_with(Bucket=settings.s3_temp_bucket)

    @pytest.mark.asyncio
    async def test_s3_not_accessible(self, storage_service, mock_s3_client):
        """Test S3 accessibility check when unhealthy."""
        mock_s3_client.head_bucket.side_effect = Exception("Connection refused")

        accessible = await storage_service.check_s3_access()

        assert accessible is False