๐Ÿ“ฆ EqualifyEverything / crawler

๐Ÿ“„ update.py ยท 108 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108import psycopg2
from database.access import connection
from logger.config import logger
from database.access import connection

# Log Emoji: ๐Ÿ—„๏ธ๐Ÿ”ง

def execute_update(query, params=None, fetchone=True):
   # logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Executing query: {query}'')
   # logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Query parameters: {params}... ')

    # Connect to the database
    conn = connection()
    conn.open()
    logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Database connection opened')

    # Create a cursor
    cur = conn.conn.cursor()

    try:
        # Execute the query
        cur.execute(query, params)
        conn.conn.commit()
        logger.info(f'๐Ÿ—„๏ธ๐Ÿ”ง Query executed and committed')

        # Fetch the results if requested
        result = None
        if fetchone:
            result = cur.fetchone() or ()  # return an empty tuple if None is returned
        else:
            result = cur.fetchall() or []  # return an empty list if None is returned
            logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Fetched results: {result}')
    except Exception as e:
        logger.error(f'๐Ÿ—„๏ธ๐Ÿ”ง Error executing update query: {e}')
        result = None

    # Close the cursor and connection
    cur.close()
    conn.close()
    logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Cursor and connection closed')

    return result



def update_crawl_status(status, crawl_uuid):
    logger.info(f'๐Ÿ—„๏ธ๐Ÿ”ง Updating crawl status for {crawl_uuid} to {status}')
    query = """
        UPDATE events.crawls
        SET status = %s
        WHERE crawl_uuid = %s
        RETURNING status;
    """
    updated_status = execute_update(query, (status, crawl_uuid))
    logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Crawl status updated to {updated_status[0]}')
    return updated_status[0] if updated_status else None

def update_crawl_user_agent(user_agent_id, crawl_uuid):
    logger.info(f'๐Ÿ—„๏ธ๐Ÿ”ง Updating crawl {crawl_uuid} user agent to {user_agent_id}')
    query = """
        UPDATE events.crawls
        SET user_agent_id = %s, status = 'processing'
        WHERE crawl_uuid = %s;
    """
    try:
        execute_update(query, (user_agent_id, crawl_uuid))
        logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Crawl user agent updated to {user_agent_id}')
        return True
    except:
        logger.error(f'๐Ÿ—„๏ธ๐Ÿ”ง Failed to update crawl {crawl_uuid} user agent')
        return False


def update_crawl_complete(crawl_uuid):
    logger.info(f'๐Ÿ—„๏ธ๐Ÿ”ง Updating crawl {crawl_uuid} as completed')
    query = """
        UPDATE events.crawls
        SET status = 'completed',
            ended_at = NOW()
        WHERE crawl_uuid = %s;
    """
    try:
        execute_update(query, (crawl_uuid))
        logger.debug(f'๐Ÿ—„๏ธ๐Ÿ”ง Crawl {crawl_uuid} completed successfully')
        return True
    except:
        logger.error(f'๐Ÿ—„๏ธ๐Ÿ”ง Failed to complete crawl {crawl_uuid}')
        return False

def update_sitemap_status(sitemap_id, crawl_id):
   logger.debug(f'Updating Sitemap Status: {sitemap_id}...')
   query = """
      UPDATE targets.sitemaps
         SET status = 'completed'
            recent_crawl_id = %s
         WHERE id = %s
         RETURNING id;
   """
   try:
      result = execute_update(query, (sitemap_id,))
      logger.debug(f'Update: {sitemap_id} Updated')
      return True
   except:
      logger.error(f'๐Ÿ—„๏ธ๐Ÿ”ง Failed to update Sitemap {sitemap_id}')
      return False