📦 EqualifyEverything / equalify

📄 runEveryMinute.ts · 138 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138import { db, graphqlQuery } from "#src/utils";
import { LambdaClient, InvokeCommand } from "@aws-sdk/client-lambda";
import { syncAuditUrlsFromRemoteCsv } from "../internal";
const lambda = new LambdaClient();

export const runEveryMinute = async () => {
  // Perform health check
  const response = await graphqlQuery({ query: `{users(limit:1){id}}` });
  if (!response?.users?.[0]?.id) {
    await fetch(process.env.SLACK_WEBHOOK, {
      method: "POST",
      body: JSON.stringify({
        text: `*Equalify UIC* - Database connection failure detected`,
      }),
    });
  }

  // Determine whether we should run scheduled audits
  await db.connect();
  const scheduledAuditIds = (
    await db.query({
      text: `SELECT "id" FROM "audits" 
               WHERE 
                 EXTRACT(HOUR FROM "scheduled_at") = EXTRACT(HOUR FROM NOW())
                 AND EXTRACT(MINUTE FROM "scheduled_at") = EXTRACT(MINUTE FROM NOW())
                 AND (
                   ("interval" = 'Daily')
                   OR ("interval" = 'Weekly' AND EXTRACT(DOW FROM "scheduled_at") = EXTRACT(DOW FROM NOW()))
                   OR (
                     "interval" = 'Monthly' 
                     AND (
                       EXTRACT(DAY FROM "scheduled_at") = EXTRACT(DAY FROM NOW())
                       OR (
                         EXTRACT(DAY FROM "scheduled_at") >= 29
                         AND EXTRACT(DAY FROM NOW()) = EXTRACT(DAY FROM (DATE_TRUNC('MONTH', NOW()) + INTERVAL '1 MONTH - 1 DAY'))
                       )
                     )
                   )
                 )`,
    })
  ).rows.map((obj) => obj.id);
  for (const scheduledAuditId of scheduledAuditIds) {
    
    // hook to check for remote CSV
    await syncAuditUrlsFromRemoteCsv(scheduledAuditId);

    const urls = (
      await db.query({
        text: `SELECT * FROM "urls" WHERE "audit_id"=$1`,
        values: [scheduledAuditId],
      })
    ).rows;

    // Skip scheduled audits with no URLs to prevent hung scans
    if (!urls || urls.length === 0) {
      console.log("Skipping scheduled audit with no URLs:", scheduledAuditId);
      continue;
    }

    const scanId = (
      await db.query({
        text: `INSERT INTO "scans" ("audit_id", "status", "pages") VALUES ($1, $2, $3) RETURNING "id"`,
        values: [
          scheduledAuditId,
          "processing",
          JSON.stringify(urls.map((obj) => ({ url: obj.url, type: obj.type }))),
        ],
      })
    ).rows[0].id;
    await lambda.send(
      new InvokeCommand({
        FunctionName: "aws-lambda-scan-sqs-router",
        InvocationType: "Event",
        Payload: JSON.stringify({
          urls: urls?.map((url) => ({
            auditId: scheduledAuditId,
            scanId: scanId,
            urlId: url.id,
            url: url.url,
            type: url.type,
          })),
        }),
      }),
    );
    console.log("Scan jobs queued for audit:", scheduledAuditId);
  }

  // See if there are any "stuck" scans that we should error out!
  const stuckScans = (
    await db.query({
      text: `SELECT s."id", s."errors", s."audit_id" FROM "scans" s
               WHERE s."status" = 'processing' 
               AND (NOW() - s."updated_at") > INTERVAL '15 minutes'`,
    })
  ).rows;

  for (const scan of stuckScans) {
    const timeoutError = {
      type: "scan_timeout",
      message: "Scan timed out after 15 minutes of inactivity",
      timestamp: new Date().toISOString(),
    };
    const updatedErrors = [...(scan.errors || []), timeoutError];
    await db.query({
      text: `UPDATE "scans" 
                   SET "status" = $1, "errors" = $2 
                   WHERE "id" = $3`,
      values: ["complete", updatedErrors, scan.id],
    });

    // Also update the parent audit so the frontend stops showing the spinner
    if (scan.audit_id) {
      const hasSuccessfulPages =
        (
          await db.query({
            text: `SELECT COUNT(*) FROM "blockers" WHERE "scan_id"=$1`,
            values: [scan.id],
          })
        ).rows[0].count > 0;

      await db.query({
        text: `UPDATE "audits" SET "status" = $1 WHERE "id" = $2 AND "status" NOT IN ('complete', 'failed')`,
        values: [hasSuccessfulPages ? "complete" : "failed", scan.audit_id],
      });
    }

    console.log(
      "Marked stuck scan as complete:",
      scan.id,
      "audit:",
      scan.audit_id,
    );
  }

  await db.clean();
  return;
};