📦 EqualifyEverything / equalify-reflow

📄 ci.yml · 290 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290name: CI

on:
  push:
    branches: [main]
  pull_request:
    branches: [main]
  workflow_dispatch:

# Cancel in-progress runs for the same branch/PR
concurrency:
  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
  cancel-in-progress: true

env:
  PYTHON_VERSION: "3.11"

jobs:
  # ============================================================================
  # Stage 1: Fast feedback (< 2 min)
  # ============================================================================
  unit-tests:
    name: Unit Tests
    runs-on: ubuntu-latest
    timeout-minutes: 10

    steps:
      - name: Free disk space
        run: |
          sudo rm -rf /opt/hostedtoolcache/CodeQL /opt/hostedtoolcache/go \
            /opt/hostedtoolcache/Ruby /usr/share/dotnet /usr/local/lib/android \
            /usr/local/share/boost /usr/local/.ghcup /opt/ghc /usr/share/swift &

      - name: Checkout
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}

      - name: Cache uv
        uses: actions/cache@v4
        with:
          path: ~/.cache/uv
          key: uv-${{ runner.os }}-${{ hashFiles('pyproject.toml') }}
          restore-keys: uv-${{ runner.os }}-

      - name: Install uv and dependencies
        run: |
          pip install uv
          uv sync --all-extras

      - name: Run unit tests
        env:
          ANTHROPIC_API_KEY: test-key-for-ci
          AWS_ACCESS_KEY_ID: test
          AWS_SECRET_ACCESS_KEY: test
          AWS_DEFAULT_REGION: us-east-1
        run: |
          uv run pytest tests/unit \
            -v \
            --tb=short \
            -m "unit" \
            --cov=src \
            --cov-report=term-missing \
            --cov-report=xml \
            --maxfail=5 \
            --durations=10

      - name: Upload coverage
        uses: actions/upload-artifact@v4
        if: always()
        with:
          name: unit-coverage
          path: coverage.xml
          retention-days: 7

  # ============================================================================
  # Stage 2: Integration tests with real services (< 5 min)
  # ============================================================================
  integration-tests:
    name: Integration Tests
    runs-on: ubuntu-latest
    timeout-minutes: 10
    needs: unit-tests

    services:
      redis:
        image: redis:7-alpine
        options: >-
          --health-cmd "redis-cli ping"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 5
        ports:
          - 6379:6379

      floci:
        image: hectorvent/floci:1.5.3
        env:
          FLOCI_DEFAULT_REGION: us-east-1
        options: >-
          --health-cmd "curl -sf http://localhost:4566/ || exit 1"
          --health-interval 10s
          --health-timeout 5s
          --health-retries 10
        ports:
          - 4566:4566

    steps:
      - name: Free disk space
        run: |
          sudo rm -rf /opt/hostedtoolcache/CodeQL /opt/hostedtoolcache/go \
            /opt/hostedtoolcache/Ruby /usr/share/dotnet /usr/local/lib/android \
            /usr/local/share/boost /usr/local/.ghcup /opt/ghc /usr/share/swift &

      - name: Checkout
        uses: actions/checkout@v4

      - name: Set up Python
        uses: actions/setup-python@v5
        with:
          python-version: ${{ env.PYTHON_VERSION }}

      - name: Cache uv
        uses: actions/cache@v4
        with:
          path: ~/.cache/uv
          key: uv-${{ runner.os }}-${{ hashFiles('pyproject.toml') }}
          restore-keys: uv-${{ runner.os }}-

      - name: Install uv and dependencies
        run: |
          pip install uv
          uv sync --all-extras

      - name: Initialize S3 buckets on Floci
        # GitHub Actions service containers can't run init sidecars, so the
        # same init-aws.sh that docker-compose.dev.yml uses is invoked here
        # against the floci service container. AWS_ENDPOINT_URL routes every
        # `aws` command to Floci; see infrastructure/floci/init-aws.sh.
        env:
          AWS_ACCESS_KEY_ID: test
          AWS_SECRET_ACCESS_KEY: test
          AWS_DEFAULT_REGION: us-east-1
          AWS_ENDPOINT_URL: http://localhost:4566
        run: |
          pip install awscli
          bash infrastructure/floci/init-aws.sh

      - name: Run integration tests
        env:
          ANTHROPIC_API_KEY: test-key-for-ci
          AWS_ACCESS_KEY_ID: test
          AWS_SECRET_ACCESS_KEY: test
          AWS_DEFAULT_REGION: us-east-1
          AWS_ENDPOINT_URL: http://localhost:4566
          REDIS_URL: redis://localhost:6379
          SKIP_BEDROCK_TESTS: "1"
        run: |
          uv run pytest tests/integration \
            -v \
            --tb=short \
            -m "integration" \
            --cov=src \
            --cov-report=term-missing \
            --cov-report=xml \
            --maxfail=5 \
            --durations=10

      - name: Upload coverage
        uses: actions/upload-artifact@v4
        if: always()
        with:
          name: integration-coverage
          path: coverage.xml
          retention-days: 7

  # ============================================================================
  # Stage 3: E2E tests - full Docker stack (< 10 min)
  # Only runs on PRs to main and pushes to main
  # ============================================================================
  e2e-tests:
    name: E2E Tests (Docker)
    runs-on: ubuntu-latest
    timeout-minutes: 25
    needs: integration-tests

    steps:
      - name: Checkout
        uses: actions/checkout@v4

      - name: Free disk space
        run: |
          sudo rm -rf /opt/hostedtoolcache /usr/share/dotnet /usr/local/lib/android \
            /usr/local/share/boost /usr/local/.ghcup /opt/ghc /usr/share/swift \
            /usr/local/share/chromium /usr/local/share/powershell /usr/local/graalvm \
            /usr/local/julia* /usr/local/sqlpackage /opt/pipx
          sudo apt-get clean || true
          docker system prune -af --volumes || true
          df -h /

      - name: Set up Docker Buildx
        uses: docker/setup-buildx-action@v3

      - name: Cache Docker layers
        uses: actions/cache@v4
        with:
          path: /tmp/.buildx-cache
          key: docker-${{ runner.os }}-${{ hashFiles('Dockerfile', 'pyproject.toml', 'uv.lock') }}
          restore-keys: docker-${{ runner.os }}-

      - name: Create .env file
        run: cp .env.example .env

      - name: Build Docker image with cache
        run: |
          docker buildx build \
            --file Dockerfile \
            --target development \
            --cache-from type=local,src=/tmp/.buildx-cache \
            --cache-to type=local,dest=/tmp/.buildx-cache-new,mode=max \
            --load \
            -t equalify-reflow:ci \
            .

      - name: Rotate buildx cache
        run: |
          rm -rf /tmp/.buildx-cache
          mv /tmp/.buildx-cache-new /tmp/.buildx-cache

      - name: Start services
        run: |
          docker compose -f docker-compose.yml -f docker-compose.ci.yml up -d --no-build

      - name: Wait for services
        run: |
          echo "Waiting for API Gateway..."
          timeout 90 bash -c 'until docker compose -f docker-compose.yml -f docker-compose.ci.yml exec -T api-gateway echo "ready" 2>/dev/null; do sleep 3; done'

      - name: Run E2E tests
        run: |
          docker compose -f docker-compose.yml -f docker-compose.ci.yml exec -T \
            -e SKIP_BEDROCK_TESTS=1 \
            api-gateway \
            uv run pytest tests/e2e \
              -v \
              --tb=short \
              -m "slow" \
              --maxfail=3 \
              --durations=20

      - name: Collect logs on failure
        if: failure()
        run: |
          docker compose -f docker-compose.yml -f docker-compose.ci.yml logs > docker-logs.txt 2>&1

      - name: Upload logs on failure
        if: failure()
        uses: actions/upload-artifact@v4
        with:
          name: docker-logs
          path: docker-logs.txt
          retention-days: 7

      - name: Stop services
        if: always()
        run: docker compose -f docker-compose.yml -f docker-compose.ci.yml down -v

  # ============================================================================
  # Final status check for branch protection
  # ============================================================================
  ci-success:
    name: CI Success
    runs-on: ubuntu-latest
    needs: [unit-tests, integration-tests, e2e-tests]
    if: always()

    steps:
      - name: Check all jobs passed
        run: |
          if [[ "${{ needs.unit-tests.result }}" != "success" ]] || \
             [[ "${{ needs.integration-tests.result }}" != "success" ]] || \
             [[ "${{ needs.e2e-tests.result }}" != "success" ]]; then
            echo "::error::One or more CI jobs failed"
            exit 1
          fi
          echo "All CI jobs passed successfully"