📦 EqualifyEverything / equalify-reflow

📄 test_error_handling.py · 369 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369"""Tests for error handling edge cases (Group G bugs).

Tests for:
- Bug #11: File seek failure handling
- Bug #12: Best-effort cleanup in delete_temp_file
- Bug #13: Rate limit key collision prevention
"""

from io import BytesIO
from unittest.mock import AsyncMock, MagicMock, Mock

import pytest
from fastapi import HTTPException, UploadFile
from src.config import settings
from src.services.rate_limit_service import RateLimitService
from src.services.s3_cleanup_service import S3CleanupService
from src.services.storage_service import StorageService


class TestFileSeekErrorHandling:
    """Tests for Bug #11: File seek failure handling."""

    @pytest.fixture
    def storage_service(self, mock_s3_client):
        """Create storage service with mocked S3 client."""
        return StorageService(
            s3_client=mock_s3_client,
            temp_bucket=settings.s3_temp_bucket,
            results_bucket=settings.s3_results_bucket
        )

    @pytest.mark.parametrize("error_method,error_class,error_msg,expected_detail", [
        ("seek", OSError, "Device not ready", "Device not ready"),
        ("seek", IOError, "File handle closed", "Unable to read file"),
        ("tell", OSError, "Position unavailable", "Unable to read file"),
    ])
    @pytest.mark.asyncio
    async def test_file_operation_errors(
        self, storage_service, error_method, error_class, error_msg, expected_detail
    ):
        """Test handling of various file operation errors (Bug #11).

        Parameterized test covering:
        - OSError during seek (Device not ready)
        - IOError during seek (File handle closed)
        - OSError during tell (Position unavailable)
        """
        mock_file = Mock()

        if error_method == "seek":
            mock_file.seek.side_effect = error_class(error_msg)
        elif error_method == "tell":
            mock_file.seek.return_value = None
            mock_file.tell.side_effect = error_class(error_msg)

        upload_file = Mock(spec=UploadFile)
        upload_file.filename = "test.pdf"
        upload_file.file = mock_file
        upload_file.content_type = "application/pdf"

        with pytest.raises(HTTPException) as exc_info:
            await storage_service.store_document(upload_file)

        assert exc_info.value.status_code == 400
        assert expected_detail in exc_info.value.detail

    @pytest.mark.asyncio
    async def test_file_seek_attribute_error(self, storage_service):
        """Test handling of AttributeError for objects without seek method."""
        # Create an object without seek method
        mock_file = Mock(spec=[])  # Empty spec means no methods
        delattr(mock_file, 'seek')  # Ensure no seek attribute

        upload_file = Mock(spec=UploadFile)
        upload_file.filename = "test.pdf"
        upload_file.file = mock_file
        upload_file.content_type = "application/pdf"

        with pytest.raises(HTTPException) as exc_info:
            await storage_service.store_document(upload_file)

        assert exc_info.value.status_code == 400
        assert "Unable to read file" in exc_info.value.detail

    @pytest.mark.asyncio
    async def test_successful_file_operations(self, storage_service):
        """Test that successful file operations work as expected."""
        # Create a valid file-like object
        file_content = b"%PDF-1.4\n" + b"%Padding content\n" * 10 + b"%%EOF"
        mock_file = BytesIO(file_content)

        storage_service.s3_client.upload_fileobj = Mock()

        upload_file = Mock(spec=UploadFile)
        upload_file.filename = "test.pdf"
        upload_file.file = mock_file
        upload_file.content_type = "application/pdf"

        job_id, s3_key = await storage_service.store_document(upload_file)

        assert job_id is not None
        assert s3_key.startswith("temp/")
        assert s3_key.endswith(".pdf")
        storage_service.s3_client.upload_fileobj.assert_called_once()


class TestBestEffortCleanup:
    """Tests for Bug #12: Best-effort cleanup in delete_temp_file."""

    @pytest.fixture
    def cleanup_service(self, mock_s3_client):
        """Create cleanup service with mocked S3 client."""
        return S3CleanupService(
            s3_client=mock_s3_client,
            temp_bucket=settings.s3_temp_bucket
        )

    @pytest.mark.asyncio
    async def test_delete_success_returns_true(self, cleanup_service):
        """Test that successful deletion returns True."""
        cleanup_service.s3_client.delete_object = Mock(return_value={})

        result = await cleanup_service.delete_temp_file("temp/test.pdf")

        assert result is True
        cleanup_service.s3_client.delete_object.assert_called_once()

    @pytest.mark.parametrize("error_type,error_msg", [
        (Exception, "Unexpected S3 error"),
        (ConnectionError, "Network unreachable"),
        (TimeoutError, "Request timed out"),
    ])
    @pytest.mark.asyncio
    async def test_delete_errors_return_false(self, cleanup_service, error_type, error_msg):
        """Test that all error types return False instead of raising (Bug #12).

        Parameterized test covering:
        - Generic exceptions
        - Network errors
        - Timeout errors
        """
        cleanup_service.s3_client.delete_object = Mock(
            side_effect=error_type(error_msg)
        )

        # Should not raise exception
        result = await cleanup_service.delete_temp_file("temp/test.pdf")

        assert result is False

    @pytest.mark.asyncio
    async def test_delete_client_error_returns_false(self, cleanup_service):
        """Test that ClientError returns False instead of raising."""
        from botocore.exceptions import ClientError

        error_response = {'Error': {'Code': 'AccessDenied', 'Message': 'Access denied'}}
        cleanup_service.s3_client.delete_object = Mock(
            side_effect=ClientError(error_response, 'DeleteObject')
        )

        # Should not raise exception
        result = await cleanup_service.delete_temp_file("temp/test.pdf")

        assert result is False  # Returns False, doesn't raise

    @pytest.mark.asyncio
    async def test_delete_logs_warnings_on_error(self, cleanup_service, caplog):
        """Test that errors are logged as warnings, not errors."""
        import logging

        cleanup_service.s3_client.delete_object = Mock(
            side_effect=Exception("S3 failure")
        )

        with caplog.at_level(logging.WARNING):
            result = await cleanup_service.delete_temp_file("temp/test.pdf")

        assert result is False
        assert any("Failed to delete temp file" in record.message or
                   "Unexpected error deleting temp file" in record.message
                   for record in caplog.records)


class TestRateLimitKeyCollision:
    """Tests for Bug #13: Rate limit key collision prevention."""

    @pytest.fixture
    def redis_client(self):
        """Create mock Redis client."""
        mock_redis = MagicMock()
        # Default pipeline behavior - pipeline is synchronous, only execute() is async
        mock_pipeline = MagicMock()
        mock_pipeline.execute = AsyncMock(return_value=[None, 0])  # [zremrangebyscore result, zcard result]
        mock_redis.pipeline = MagicMock(return_value=mock_pipeline)
        mock_redis.zadd = AsyncMock()
        mock_redis.expire = AsyncMock()
        mock_redis.zremrangebyscore = AsyncMock()
        return mock_redis

    @pytest.fixture
    def rate_limit_service(self, redis_client):
        """Create rate limit service with mocked Redis."""
        return RateLimitService(redis_client)

    @pytest.mark.asyncio
    async def test_unique_members_for_concurrent_requests(self, redis_client, rate_limit_service):
        """Test that concurrent requests at same microsecond have unique members."""
        captured_members = []

        async def capture_zadd(key, mapping):
            """Capture the member keys used in zadd calls."""
            for member in mapping.keys():
                captured_members.append(member)
            return 1

        redis_client.zadd.side_effect = capture_zadd

        # Simulate multiple concurrent requests
        for _ in range(5):
            allowed, _ = await rate_limit_service.check_submit_rate_limit("192.168.1.1")
            assert allowed is True

        # All members should be unique
        assert len(captured_members) == len(set(captured_members))

        # Members should contain UUID suffix
        for member in captured_members:
            assert "-" in member  # Format: "timestamp-uuid"
            parts = member.split("-")
            assert len(parts) >= 2  # At minimum: timestamp and first uuid part

    @pytest.mark.asyncio
    async def test_member_format_includes_uuid(self, redis_client, rate_limit_service):
        """Test that members have UUID suffix to prevent collisions."""
        captured_member = None

        async def capture_zadd(key, mapping):
            nonlocal captured_member
            captured_member = list(mapping.keys())[0]
            return 1

        redis_client.zadd.side_effect = capture_zadd

        allowed, _ = await rate_limit_service.check_submit_rate_limit("192.168.1.1")
        assert allowed is True
        assert captured_member is not None

        # Verify format: timestamp-uuid (uuid is 8 hex chars)
        parts = captured_member.split("-", 1)
        assert len(parts) == 2
        timestamp_part, uuid_part = parts

        # Timestamp part should be a float-like string
        assert float(timestamp_part) > 0

        # UUID part should be hexadecimal
        assert all(c in "0123456789abcdef" for c in uuid_part[:8])

    @pytest.mark.asyncio
    async def test_score_remains_timestamp(self, redis_client, rate_limit_service):
        """Test that score value is still the timestamp for proper ordering."""
        import time

        captured_score = None

        async def capture_zadd(key, mapping):
            nonlocal captured_score
            # mapping is {member: score}
            captured_score = list(mapping.values())[0]
            return 1

        redis_client.zadd.side_effect = capture_zadd

        before_time = time.time()
        allowed, _ = await rate_limit_service.check_submit_rate_limit("192.168.1.1")
        after_time = time.time()

        assert allowed is True
        assert captured_score is not None
        assert before_time <= captured_score <= after_time

    @pytest.mark.asyncio
    async def test_cleanup_still_works_with_uuid_members(self, redis_client, rate_limit_service):
        """Test that window cleanup still functions with UUID-based members."""
        # Mock pipeline to simulate existing entries
        # Pipeline methods (zremrangebyscore, zcard) are sync, only execute() is async
        mock_pipeline = MagicMock()
        mock_pipeline.execute = AsyncMock(return_value=[5, 3])  # Removed 5 old, 3 remaining
        redis_client.pipeline.return_value = mock_pipeline

        allowed, _ = await rate_limit_service.check_submit_rate_limit("192.168.1.1")

        # Should have called zremrangebyscore to clean old entries
        assert mock_pipeline.zremrangebyscore.called


class TestIntegrationErrorHandling:
    """Integration tests for error handling across multiple bugs."""

    @pytest.mark.asyncio
    async def test_storage_service_resilience(self):
        """Test that storage service handles multiple error types gracefully."""
        mock_s3 = Mock()
        storage_service = StorageService(
            s3_client=mock_s3,
            temp_bucket="test-bucket",
            results_bucket="results-bucket"
        )

        # Test 1: File seek error
        bad_file = Mock()
        bad_file.seek.side_effect = OSError("Seek failed")
        upload = Mock(spec=UploadFile)
        upload.filename = "test.pdf"
        upload.file = bad_file
        upload.content_type = "application/pdf"

        with pytest.raises(HTTPException) as exc:
            await storage_service.store_document(upload)
        assert exc.value.status_code == 400

        # Test 2: Best-effort deletion (should not raise) - now on S3CleanupService
        cleanup_service = S3CleanupService(
            s3_client=mock_s3,
            temp_bucket="test-bucket"
        )
        mock_s3.delete_object = Mock(side_effect=Exception("S3 error"))
        result = await cleanup_service.delete_temp_file("temp/file.pdf")
        assert result is False  # Returns False, doesn't crash

    @pytest.mark.asyncio
    async def test_rate_limit_collision_resistance(self):
        """Test that rate limiting handles high concurrency without collisions."""
        # Track all zadd calls to verify uniqueness
        members_used = []

        async def track_zadd(key, mapping):
            """Async function to track zadd calls."""
            members_used.extend(mapping.keys())
            return len(mapping)

        # Create properly configured mocks
        mock_redis = MagicMock()

        # Create a single pipeline instance to reuse (avoids AsyncMock proliferation)
        mock_pipeline = MagicMock()
        mock_pipeline.zremrangebyscore = MagicMock(return_value=None)
        mock_pipeline.zcard = MagicMock(return_value=None)
        mock_pipeline.execute = AsyncMock(return_value=[None, 0])  # Always under limit

        # Return same pipeline instance every time
        mock_redis.pipeline = MagicMock(return_value=mock_pipeline)

        # Configure async methods with proper awaitable returns
        mock_redis.zadd = AsyncMock(side_effect=track_zadd)
        mock_redis.expire = AsyncMock(return_value=True)
        mock_redis.zrange = AsyncMock(return_value=[])

        service = RateLimitService(mock_redis)

        # Simulate 100 concurrent requests
        for _ in range(100):
            await service.check_submit_rate_limit("192.168.1.1")

        # All members should be unique despite high concurrency
        # Note: check_submit_rate_limit makes 2 calls (per-IP and global)
        assert len(members_used) == len(set(members_used)), \
            f"Found {len(members_used)} total members but {len(set(members_used))} unique"