1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165"""Queue payload models for Redis queue communication."""
from datetime import datetime
from pydantic import BaseModel, ConfigDict, Field
from .pii import PIIFinding
class PIIQueuePayload(BaseModel):
"""Payload for PII scanning queue.
Pushed to eq-pdf:queue:pii when new job is submitted.
PII worker consumes this to scan PDF for sensitive data.
Attributes:
job_id: Unique job identifier (UUID)
s3_key: S3 object key for PDF to scan
created_at: UTC timestamp of job creation
Example:
>>> payload = PIIQueuePayload(
... job_id="550e8400-e29b-41d4-a716-446655440000",
... s3_key="temp/550e8400.../document.pdf",
... created_at=datetime.now(timezone.utc)
... )
"""
job_id: str = Field(
...,
pattern=r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
description="UUID format job identifier"
)
s3_key: str = Field(
...,
min_length=1,
description="S3 object key for PDF"
)
created_at: datetime = Field(
...,
description="UTC creation timestamp"
)
model_config = ConfigDict(
json_schema_extra={
"example": {
"job_id": "550e8400-e29b-41d4-a716-446655440000",
"s3_key": "temp/550e8400-e29b-41d4-a716-446655440000/input.pdf",
"created_at": "2024-01-15T10:30:00Z"
}
}
)
class ApprovalQueuePayload(BaseModel):
"""Payload for approval workflow queue.
Pushed to eq-pdf:queue:approval when PII is detected.
Approval worker sends notification and waits for decision.
Attributes:
job_id: Unique job identifier (UUID)
s3_key: S3 object key for flagged PDF
pii_findings: List of detected PII entities
approval_token: Secure token for approval URL
expires_at: UTC expiration timestamp for approval
Example:
>>> payload = ApprovalQueuePayload(
... job_id="550e8400-e29b-41d4-a716-446655440000",
... s3_key="temp/550e8400.../document.pdf",
... pii_findings=[...],
... approval_token="abc123...",
... expires_at=datetime.now(timezone.utc) + timedelta(days=1)
... )
"""
job_id: str = Field(
...,
pattern=r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
description="UUID format job identifier"
)
s3_key: str = Field(
...,
min_length=1,
description="S3 object key for PDF"
)
pii_findings: list[PIIFinding] = Field(
...,
min_length=1,
description="Detected PII entities"
)
approval_token: str = Field(
...,
min_length=32,
max_length=64,
description="Secure approval token"
)
expires_at: datetime = Field(
...,
description="UTC expiration timestamp"
)
model_config = ConfigDict(
json_schema_extra={
"example": {
"job_id": "550e8400-e29b-41d4-a716-446655440000",
"s3_key": "temp/550e8400-e29b-41d4-a716-446655440000/input.pdf",
"pii_findings": [
{
"entity_type": "PERSON",
"start": 45,
"end": 57,
"score": 0.85,
"text": "John Student"
}
],
"approval_token": "a1b2c3d4e5f6g7h8i9j0k1l2m3n4o5p6",
"expires_at": "2024-01-16T10:30:00Z"
}
}
)
class ProcessingQueuePayload(BaseModel):
"""Payload for document processing queue.
Pushed to eq-pdf:queue:processing after PII scan passes or approval granted.
Processing worker performs Docling conversion and AI enhancement.
Attributes:
job_id: Unique job identifier (UUID)
s3_key: S3 object key for PDF to process
approved_at: Optional UTC timestamp if approval was required
Example:
>>> payload = ProcessingQueuePayload(
... job_id="550e8400-e29b-41d4-a716-446655440000",
... s3_key="temp/550e8400.../document.pdf",
... approved_at=None # No PII detected
... )
"""
job_id: str = Field(
...,
pattern=r'^[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}$',
description="UUID format job identifier"
)
s3_key: str = Field(
...,
min_length=1,
description="S3 object key for PDF"
)
approved_at: datetime | None = Field(
default=None,
description="UTC approval timestamp if required"
)
model_config = ConfigDict(
json_schema_extra={
"example": {
"job_id": "550e8400-e29b-41d4-a716-446655440000",
"s3_key": "temp/550e8400-e29b-41d4-a716-446655440000/input.pdf",
"approved_at": "2024-01-15T11:00:00Z"
}
}
)