📦 EqualifyEverything / equalify-reflow

📄 test_s3_cleanup.py · 273 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273"""Tests for S3CleanupService - S3 temporary file cleanup operations."""

from datetime import UTC, datetime, timedelta

import pytest
from src.services.s3_cleanup_service import S3CleanupService


@pytest.fixture
def mock_s3_client(mocker):
    """Create mock S3 client."""
    client = mocker.MagicMock()
    # Configure paginator for list_objects_v2
    paginator = mocker.MagicMock()
    client.get_paginator.return_value = paginator
    return client


@pytest.fixture
def s3_cleanup_service(mock_s3_client):
    """Create S3CleanupService with mocked S3 client."""
    return S3CleanupService(
        s3_client=mock_s3_client,
        temp_bucket="test-temp-bucket"
    )


@pytest.fixture
def old_temp_files():
    """Sample list of old temp files from S3."""
    return [
        {
            "Key": "temp/job-1/input.pdf",
            "Size": 1024000,
            "LastModified": datetime.now(UTC) - timedelta(hours=48)
        },
        {
            "Key": "temp/job-2/input.pdf",
            "Size": 2048000,
            "LastModified": datetime.now(UTC) - timedelta(hours=36)
        },
        {
            "Key": "temp/job-3/input.pdf",
            "Size": 512000,
            "LastModified": datetime.now(UTC) - timedelta(hours=25)
        }
    ]


class TestCleanupExpiredTempFiles:
    """Tests for cleanup_expired_temp_files method."""

    @pytest.mark.asyncio
    async def test_no_expired_files(
        self,
        s3_cleanup_service,
        mock_s3_client
    ):
        """Test when no temp files have expired."""
        # Mock paginator to return empty results
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = []

        result = await s3_cleanup_service.cleanup_expired_temp_files()

        assert result["files_deleted"] == 0
        assert result["bytes_freed"] == 0
        assert result["errors"] == 0

    @pytest.mark.asyncio
    async def test_cleanup_multiple_files(
        self,
        s3_cleanup_service,
        mock_s3_client,
        old_temp_files
    ):
        """Test cleaning up multiple expired files."""
        # Mock paginator to return old temp files
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = [{'Contents': old_temp_files}]

        # Mock successful deletion
        mock_s3_client.delete_object.return_value = {}

        result = await s3_cleanup_service.cleanup_expired_temp_files()

        assert result["files_deleted"] == 3
        assert result["bytes_freed"] == 3584000  # Sum of all file sizes
        assert result["errors"] == 0

        # Verify delete_object was called for each file
        assert mock_s3_client.delete_object.call_count == 3

    @pytest.mark.asyncio
    async def test_partial_cleanup_failure(
        self,
        s3_cleanup_service,
        mock_s3_client,
        old_temp_files
    ):
        """Test handling partial cleanup failures."""
        from botocore.exceptions import ClientError

        # Mock paginator to return old temp files
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = [{'Contents': old_temp_files}]

        # First file succeeds, second fails, third succeeds
        def delete_side_effect(*args, **kwargs):
            key = kwargs.get('Key', '')
            if 'job-2' in key:
                raise ClientError(
                    {'Error': {'Code': 'AccessDenied', 'Message': 'Access denied'}},
                    'delete_object'
                )
            return {}

        mock_s3_client.delete_object.side_effect = delete_side_effect

        result = await s3_cleanup_service.cleanup_expired_temp_files()

        assert result["files_deleted"] == 2
        assert result["bytes_freed"] == 1536000  # file 1 + file 3
        assert result["errors"] == 1

    @pytest.mark.asyncio
    async def test_cleanup_with_delete_exception(
        self,
        s3_cleanup_service,
        mock_s3_client,
        old_temp_files
    ):
        """Test handling exceptions during file deletion."""
        # Mock paginator to return old temp files
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = [{'Contents': old_temp_files}]

        # First file raises exception, others succeed
        def delete_side_effect(*args, **kwargs):
            key = kwargs.get('Key', '')
            if "job-1" in key:
                raise Exception("S3 error")
            return {}

        mock_s3_client.delete_object.side_effect = delete_side_effect

        result = await s3_cleanup_service.cleanup_expired_temp_files()

        assert result["files_deleted"] == 2
        assert result["errors"] == 1

    @pytest.mark.asyncio
    async def test_list_files_exception_handled(
        self,
        s3_cleanup_service,
        mock_s3_client
    ):
        """Test that exceptions during file listing are handled."""
        # Mock paginator to raise exception
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.side_effect = Exception("S3 list error")

        # Should not raise - errors are counted
        result = await s3_cleanup_service.cleanup_expired_temp_files()

        assert result["files_deleted"] == 0
        assert result["bytes_freed"] == 0
        assert result["errors"] == 1


class TestCleanupJobTempFiles:
    """Tests for cleanup_job_temp_files method."""

    @pytest.mark.asyncio
    async def test_cleanup_job_with_files(
        self,
        s3_cleanup_service,
        mock_s3_client
    ):
        """Test cleaning up files for a specific job."""
        # Mock paginator to return 3 files for the job
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = [{
            'Contents': [
                {'Key': 'temp/test-job-123/input.pdf'},
                {'Key': 'temp/test-job-123/page1.png'},
                {'Key': 'temp/test-job-123/page2.png'},
            ]
        }]

        # Mock successful batch deletion
        mock_s3_client.delete_objects.return_value = {
            'Deleted': [
                {'Key': 'temp/test-job-123/input.pdf'},
                {'Key': 'temp/test-job-123/page1.png'},
                {'Key': 'temp/test-job-123/page2.png'},
            ]
        }

        result = await s3_cleanup_service.cleanup_temp_files_for_job("test-job-123")

        assert result == 3
        mock_s3_client.delete_objects.assert_called_once()

    @pytest.mark.asyncio
    async def test_cleanup_job_no_files(
        self,
        s3_cleanup_service,
        mock_s3_client
    ):
        """Test cleaning up job with no temp files."""
        # Mock paginator to return no files
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = []

        result = await s3_cleanup_service.cleanup_temp_files_for_job("test-job-456")

        assert result == 0
        # delete_objects should not be called when there are no files
        mock_s3_client.delete_objects.assert_not_called()

    @pytest.mark.asyncio
    async def test_cleanup_job_exception_raises(
        self,
        s3_cleanup_service,
        mock_s3_client
    ):
        """Test that exceptions during job cleanup are raised."""
        from fastapi import HTTPException

        # Mock paginator to raise exception
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.side_effect = Exception("S3 error")

        with pytest.raises(HTTPException) as exc:
            await s3_cleanup_service.cleanup_temp_files_for_job("test-job-789")

        assert "Unexpected error during cleanup" in str(exc.value.detail)

    @pytest.mark.asyncio
    async def test_cleanup_multiple_jobs(
        self,
        s3_cleanup_service,
        mock_s3_client
    ):
        """Test cleaning up multiple jobs sequentially."""
        # Mock paginator to return 2 files per job
        paginator = mock_s3_client.get_paginator.return_value
        paginator.paginate.return_value = [{
            'Contents': [
                {'Key': 'temp/job-x/file1.pdf'},
                {'Key': 'temp/job-x/file2.pdf'},
            ]
        }]

        # Mock successful batch deletion
        mock_s3_client.delete_objects.return_value = {
            'Deleted': [
                {'Key': 'temp/job-x/file1.pdf'},
                {'Key': 'temp/job-x/file2.pdf'},
            ]
        }

        job_ids = ["job-1", "job-2", "job-3"]
        results = []

        for job_id in job_ids:
            result = await s3_cleanup_service.cleanup_temp_files_for_job(job_id)
            results.append(result)

        assert all(r == 2 for r in results)
        assert mock_s3_client.delete_objects.call_count == 3