1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166import psycopg2
import json
import traceback
from database.access import connection
from utils.watch import logger
from psycopg2.pool import SimpleConnectionPool
# Set use_pooling to True to enable connection pooling
use_pooling = True
# Connection pool
pool = None
if use_pooling:
conn_params = connection().get_connection_params()
pool = SimpleConnectionPool(
minconn=1,
maxconn=10,
**conn_params
)
def connection_pooling():
return pool.getconn()
def release_pooling(conn):
pool.putconn(conn)
def execute_update(query, params=None, fetchone=True):
# logger.debug(f'๐๏ธ๐ง Executing query: {query}'')
# logger.debug(f'๐๏ธ๐ง Query parameters: {params}... ')
# Connect to the database
conn = connection()
conn.open()
logger.debug(f'๐๏ธ๐ง Database connection opened')
# Create a cursor
cur = conn.conn.cursor()
try:
# Execute the query
cur.execute(query, params)
conn.conn.commit()
logger.info(f'๐๏ธ๐ง Query executed and committed')
# Fetch the results if requested
result = None
if fetchone:
result = cur.fetchone() or () # return an empty tuple if None is returned
else:
result = cur.fetchall() or [] # return an empty list if None is returned
logger.debug(f'๐๏ธ๐ง Fetched results: {result}')
except Exception as e:
logger.error(f'๐๏ธ๐ง Error executing update query: {e}')
result = None
# Close the cursor and connection
cur.close()
conn.close()
logger.debug(f'๐๏ธ๐ง Cursor and connection closed')
return result
# # # # # # # # # #
# Bulk Updates
def execute_bulk_update(query, params_list):
# Connect to the database
if use_pooling:
conn = connection_pooling()
else:
conn = connection()
conn.open()
# Create a cursor
cur = conn.cursor()
try:
# Execute the query
with conn:
cur.executemany(query, params_list)
logger.info("๐๏ธโ๏ธ๐ข Query executed and committed")
except Exception as e:
logger.error(f"๐๏ธโ๏ธ Error executing bulk insert query: {e}\n{traceback.format_exc()}")
# Close the cursor and connection
cur.close()
if use_pooling:
release_pooling(conn)
else:
conn.close()
#########################################################
## Queries
def update_crawl_status(status, crawl_uuid):
logger.info(f'๐๏ธ๐ง Updating crawl status for {crawl_uuid} to {status}')
query = """
UPDATE events.crawls
SET status = %s
WHERE crawl_uuid = %s
RETURNING status;
"""
updated_status = execute_update(query, (status, crawl_uuid))
logger.debug(f'๐๏ธ๐ง Crawl status updated to {updated_status[0]}')
return updated_status[0] if updated_status else None
def update_crawl_user_agent(user_agent_id, crawl_uuid):
logger.info(f'๐๏ธ๐ง Updating crawl {crawl_uuid} user agent to {user_agent_id}')
query = """
UPDATE events.crawls
SET user_agent_id = %s, status = 'processing'
WHERE crawl_uuid = %s;
"""
try:
execute_update(query, (user_agent_id, crawl_uuid))
logger.debug(f'๐๏ธ๐ง Crawl user agent updated to {user_agent_id}')
return True
except:
logger.error(f'๐๏ธ๐ง Failed to update crawl {crawl_uuid} user agent')
return False
def update_crawl_complete(crawl_uuid):
logger.info(f'๐๏ธ๐ง Updating crawl {crawl_uuid} as completed')
query = """
UPDATE events.crawls
SET status = 'completed',
ended_at = NOW()
WHERE crawl_uuid = %s;
"""
try:
execute_update(query, (crawl_uuid))
logger.debug(f'๐๏ธ๐ง Crawl {crawl_uuid} completed successfully')
return True
except:
logger.error(f'๐๏ธ๐ง Failed to complete crawl {crawl_uuid}')
return False
def update_sitemap_status(sitemap_id, crawl_id):
logger.debug(f'Updating Sitemap Status: {sitemap_id}...')
query = """
UPDATE targets.sitemaps
SET status = 'completed'
recent_crawl_id = %s
WHERE id = %s
RETURNING id;
"""
try:
result = execute_update(query, (sitemap_id,))
logger.debug(f'Update: {sitemap_id} Updated')
return True
except:
logger.error(f'๐๏ธ๐ง Failed to update Sitemap {sitemap_id}')
return False