📦 EqualifyEverything / equalify-reflow

📄 test_production_environment.py · 364 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364"""Production environment-specific tests.

Tests for bugs that would only appear in production AWS environment:
- Bug #5: Storage URL generation broken when aws_endpoint_url is None
- Bug #6: CleanupService exception handling with botocore.exceptions.ClientError
"""

from unittest.mock import AsyncMock, MagicMock, patch

import pytest
from botocore.exceptions import ClientError
from src.config import settings
from src.services.cleanup_service import CleanupService
from src.services.s3_url_service import S3URLService
from src.services.storage_service import StorageService


class TestStorageUrlGenerationProduction:
    """Tests for Bug #5: Storage URL generation in production (aws_endpoint_url=None)."""

    @pytest.fixture
    def mock_s3_client(self):
        """Create mock S3 client."""
        return MagicMock()

    @pytest.fixture
    def s3_url_service(self, mock_s3_client):
        """Create S3 URL service with mock client."""
        return S3URLService(
            s3_client=mock_s3_client,
            temp_bucket="equalify-temp",
            results_bucket="equalify-results"
        )

    @pytest.fixture
    def storage_service(self, mock_s3_client):
        """Create storage service with mock client."""
        return StorageService(
            s3_client=mock_s3_client,
            temp_bucket="equalify-temp",
            results_bucket="equalify-results"
        )

    def test_get_result_url_production_us_east_1(self, s3_url_service):
        """Test result URL generation for production in us-east-1."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None  # Production
            mock_settings.aws_region = "us-east-1"

            url = s3_url_service.get_result_url("job123", "html")

            assert url == "https://equalify-results.s3.us-east-1.amazonaws.com/job123.html"
            assert "None" not in url  # Bug would produce: None/equalify-results/job123.html
            assert url.startswith("https://")

    def test_get_result_url_production_eu_west_1(self, s3_url_service):
        """Test result URL generation for production in eu-west-1."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None  # Production
            mock_settings.aws_region = "eu-west-1"

            url = s3_url_service.get_result_url("job456", "mdx")

            assert url == "https://equalify-results.s3.eu-west-1.amazonaws.com/job456.mdx"
            assert "eu-west-1" in url

    def test_get_result_url_production_default_region(self, s3_url_service):
        """Test result URL falls back to us-east-1 when region not set."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None  # Production
            mock_settings.aws_region = None  # No region specified

            url = s3_url_service.get_result_url("job789", "html")

            assert url == "https://equalify-results.s3.us-east-1.amazonaws.com/job789.html"
            assert "us-east-1" in url  # Should default to us-east-1

    def test_get_result_url_dev(self, s3_url_service):
        """Test result URL generation for dev environment (Floci path-style)."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = "http://floci:4566"  # Floci
            mock_settings.aws_region = "us-east-1"

            url = s3_url_service.get_result_url("job-dev", "html")

            assert url == "http://floci:4566/equalify-results/job-dev.html"
            assert "floci" in url
            assert not url.startswith("https://")

    def test_get_result_url_various_regions(self, s3_url_service):
        """Test URL generation across different AWS regions."""
        regions = [
            "us-east-1",
            "us-west-2",
            "eu-west-1",
            "eu-central-1",
            "ap-southeast-1",
            "ap-northeast-1",
        ]

        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None  # Production

            for region in regions:
                mock_settings.aws_region = region
                url = s3_url_service.get_result_url("test-job", "html")

                expected_url = f"https://equalify-results.s3.{region}.amazonaws.com/test-job.html"
                assert url == expected_url
                assert region in url

    @pytest.mark.asyncio
    async def test_upload_result_production_url(self, storage_service, mock_s3_client):
        """Test upload_result returns S3 key (Phase 3: stores keys not URLs)."""
        mock_s3_client.put_object.return_value = None

        with patch('src.services.storage_service.settings') as mock_settings:
            mock_settings.aws_endpoint_url = None  # Production
            mock_settings.aws_region = "us-east-1"

            key = await storage_service.upload_result(
                job_id="prod-job",
                content="<html>Content</html>",
                format="html"
            )

            # Phase 3: upload_result now returns S3 key instead of URL
            assert key == "prod-job.html"
            assert "None" not in key

    @pytest.mark.asyncio
    async def test_upload_result_dev_url(self, storage_service, mock_s3_client):
        """Test upload_result returns S3 key (Phase 3: stores keys not URLs)."""
        mock_s3_client.put_object.return_value = None

        with patch('src.services.storage_service.settings') as mock_settings:
            mock_settings.aws_endpoint_url = "http://floci:4566"
            mock_settings.aws_region = "us-east-1"

            key = await storage_service.upload_result(
                job_id="dev-job",
                content="# MDX Content",
                format="mdx"
            )

            # Phase 3: upload_result now returns S3 key instead of URL
            assert key == "dev-job.mdx"

    def test_url_format_virtual_hosted_style(self, s3_url_service):
        """Test production URLs use virtual-hosted-style (bucket.s3.region.amazonaws.com)."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None  # Production
            mock_settings.aws_region = "us-east-1"

            url = s3_url_service.get_result_url("test", "html")

            # Virtual-hosted-style: https://bucket.s3.region.amazonaws.com/key
            # NOT path-style: https://s3.region.amazonaws.com/bucket/key
            assert url.startswith("https://equalify-results.s3.")
            assert ".amazonaws.com/" in url


class TestCleanupServiceExceptionHandling:
    """Tests for Bug #6: CleanupService exception handling with ClientError."""

    @pytest.fixture
    def mock_s3_client(self):
        """Mock async S3 client."""
        return AsyncMock()

    @pytest.fixture
    def cleanup_service(self, mock_s3_client):
        """Create cleanup service with mock client."""
        return CleanupService(mock_s3_client)

    @pytest.mark.asyncio
    async def test_cleanup_handles_nosuchkey_error(self, cleanup_service, mock_s3_client):
        """Test cleanup handles NoSuchKey ClientError (file doesn't exist)."""
        # Mock ClientError with NoSuchKey code
        error = ClientError(
            {"Error": {"Code": "NoSuchKey", "Message": "The specified key does not exist."}},
            "DeleteObject"
        )
        mock_s3_client.delete_object.side_effect = error

        # Should return True (idempotent)
        result = await cleanup_service.cleanup_job_files("temp/missing.pdf")

        assert result is True

    @pytest.mark.asyncio
    async def test_cleanup_handles_404_error(self, cleanup_service, mock_s3_client):
        """Test cleanup handles 404 ClientError (alternative error code)."""
        error = ClientError(
            {"Error": {"Code": "404", "Message": "Not Found"}},
            "DeleteObject"
        )
        mock_s3_client.delete_object.side_effect = error

        # Should return True (idempotent)
        result = await cleanup_service.cleanup_job_files("temp/404.pdf")

        assert result is True

    @pytest.mark.asyncio
    async def test_cleanup_handles_access_denied(self, cleanup_service, mock_s3_client):
        """Test cleanup handles AccessDenied ClientError."""
        error = ClientError(
            {"Error": {"Code": "AccessDenied", "Message": "Access Denied"}},
            "DeleteObject"
        )
        mock_s3_client.delete_object.side_effect = error

        # Should return False (error, not idempotent)
        result = await cleanup_service.cleanup_job_files("temp/denied.pdf")

        assert result is False

    @pytest.mark.asyncio
    async def test_cleanup_handles_internal_error(self, cleanup_service, mock_s3_client):
        """Test cleanup handles InternalError ClientError."""
        error = ClientError(
            {"Error": {"Code": "InternalError", "Message": "We encountered an internal error"}},
            "DeleteObject"
        )
        mock_s3_client.delete_object.side_effect = error

        # Should return False (error)
        result = await cleanup_service.cleanup_job_files("temp/error.pdf")

        assert result is False

    @pytest.mark.asyncio
    async def test_cleanup_idempotent_success(self, cleanup_service, mock_s3_client):
        """Test cleanup is idempotent (multiple deletes of same file succeed)."""
        # First call succeeds
        mock_s3_client.delete_object.return_value = {}
        result1 = await cleanup_service.cleanup_job_files("temp/file.pdf")
        assert result1 is True

        # Second call gets NoSuchKey (file already deleted)
        error = ClientError(
            {"Error": {"Code": "NoSuchKey", "Message": "Key not found"}},
            "DeleteObject"
        )
        mock_s3_client.delete_object.side_effect = error
        result2 = await cleanup_service.cleanup_job_files("temp/file.pdf")
        assert result2 is True

    @pytest.mark.asyncio
    async def test_cleanup_handles_generic_exception(self, cleanup_service, mock_s3_client):
        """Test cleanup handles non-ClientError exceptions."""
        mock_s3_client.delete_object.side_effect = RuntimeError("Network timeout")

        # Should return False (error, but not crash)
        result = await cleanup_service.cleanup_job_files("temp/timeout.pdf")

        assert result is False

    @pytest.mark.asyncio
    async def test_cleanup_success_case(self, cleanup_service, mock_s3_client):
        """Test successful cleanup."""
        mock_s3_client.delete_object.return_value = {}

        result = await cleanup_service.cleanup_job_files("temp/success.pdf")

        assert result is True
        mock_s3_client.delete_object.assert_called_once_with(
            Bucket=settings.s3_temp_bucket,
            Key="temp/success.pdf"
        )


class TestProductionConfigurationScenarios:
    """Integration tests for production configuration scenarios."""

    def test_production_config_has_no_endpoint_url(self):
        """Test that production configuration doesn't set aws_endpoint_url."""
        # This verifies that .env.prod correctly leaves AWS_ENDPOINT_URL unset
        # In production, settings.aws_endpoint_url should be None or empty string
        # This test documents the expected behavior
        with patch.dict('os.environ', {'AWS_ENDPOINT_URL': ''}, clear=False):
            # Simulate production environment
            assert True  # Placeholder - actual config validation

    def test_dev_config_has_floci_endpoint(self):
        """Test that development configuration sets the Floci endpoint."""
        # In development, settings.aws_endpoint_url should be set
        # This test documents the expected behavior
        with patch.dict('os.environ', {'AWS_ENDPOINT_URL': 'http://floci:4566'}, clear=False):
            # Simulate dev environment
            assert True  # Placeholder - actual config validation

    @pytest.mark.asyncio
    async def test_storage_service_works_in_both_environments(self):
        """Test S3 URL service URL generation works in dev and prod."""
        mock_s3 = MagicMock()
        s3_url_service = S3URLService(mock_s3, "temp-bucket", "results-bucket")

        # Test production
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None
            mock_settings.aws_region = "us-east-1"
            prod_url = s3_url_service.get_result_url("test", "html")
            assert prod_url.startswith("https://")
            assert "s3.us-east-1.amazonaws.com" in prod_url

        # Test development
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = "http://floci:4566"
            mock_settings.aws_region = "us-east-1"
            dev_url = s3_url_service.get_result_url("test", "html")
            assert dev_url.startswith("http://floci")


class TestS3UrlFormats:
    """Tests for different S3 URL format styles."""

    @pytest.fixture
    def s3_url_service(self):
        """Create S3 URL service."""
        return S3URLService(
            s3_client=MagicMock(),
            temp_bucket="test-temp",
            results_bucket="test-results"
        )

    def test_virtual_hosted_style_url_format(self, s3_url_service):
        """Test virtual-hosted-style URL format (recommended by AWS)."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None
            mock_settings.aws_region = "us-west-2"

            url = s3_url_service.get_result_url("doc123", "html")

            # Virtual-hosted-style: https://<bucket>.s3.<region>.amazonaws.com/<key>
            assert url == "https://test-results.s3.us-west-2.amazonaws.com/doc123.html"
            assert url.count('/') == 3  # https://bucket.s3.region.amazonaws.com/key

    def test_path_style_url_for_dev(self, s3_url_service):
        """Test path-style URL format for dev environment (Floci)."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = "http://floci:4566"

            url = s3_url_service.get_result_url("doc456", "mdx")

            # Path-style: http://<endpoint>/<bucket>/<key>
            assert url == "http://floci:4566/test-results/doc456.mdx"

    def test_url_accessibility(self, s3_url_service):
        """Test that generated URLs are well-formed and accessible."""
        with patch('src.services.s3_url_service.settings') as mock_settings:
            mock_settings.s3_public_url = None
            mock_settings.aws_region = "us-east-1"

            url = s3_url_service.get_result_url("accessibility-test", "html")

            # URL should be valid HTTP(S) URL
            assert url.startswith("https://")
            assert " " not in url  # No spaces
            assert "None" not in url  # No None strings
            assert ".amazonaws.com/" in url
            assert url.endswith(".html")