1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83import psycopg2
from database.access import connection
from logger.config import logger
def execute_insert(query, params=None, fetchone=True):
# logger.debug(f"๐๏ธโ๏ธ Executing query: {query}")
logger.debug(f"๐๏ธโ๏ธ Query parameters: {params}")
# Connect to the database
conn = connection()
conn.open()
logger.debug("๐๏ธโ๏ธ Database connection opened")
# Create a cursor
cur = conn.conn.cursor()
try:
# Execute the query
cur.execute(query, params)
conn.conn.commit()
logger.info("๐๏ธโ๏ธ Query executed and committed")
# Fetch the results if requested
result = None
if fetchone:
result = cur.fetchone() if cur.rowcount > 0 else None
else:
result = cur.fetchall()
logger.debug(f'๐๏ธโ๏ธ Fetched results: {result}')
except Exception as e:
logger.error(f"๐๏ธโ๏ธ Error executing insert query: {e}")
result = None
# Close the cursor and connection
cur.close()
conn.close()
logger.debug("๐๏ธโ๏ธ Cursor and connection closed")
return result
def record_new_crawl(actor_id, started_by, crawl_uuid, crawl_type, domain_id, start_url):
logger.info(f"๐๏ธโ๏ธ Recording new crawl: {crawl_uuid}")
query = """
INSERT INTO events.crawls (
actor_id,
status,
started_by,
crawl_uuid,
crawl_type,
domain_id,
start_url
)
VALUES (
%s, 'new', %s,
%s, %s, %s, %s
)
RETURNING crawl_uuid
"""
result = execute_insert(query, (actor_id, started_by, crawl_uuid, crawl_type, domain_id, start_url))
logger.debug(f'๐๏ธโ๏ธ Record New Crawl Output: {result} ')
if result[0] == crawl_uuid:
logger.debug(f'๐๏ธโ๏ธ Crawl {crawl_uuid} created... ')
return True
else:
logger.error(f'๐๏ธโ๏ธ Record New Crawl had an issue with: {crawl_uuid} ')
return False
def record_crawled_url(url, crawl_uuid, source_url):
logger.info(f"๐๏ธโ๏ธ Recording crawled URL: {url}")
query = """
INSERT INTO results.urls (url,
crawl_uuid,
source_url,
last_crawled_at)
VALUES (%s, %s, %s, NOW())
ON CONFLICT (url) DO UPDATE SET last_crawled_at = NOW();
"""
logger.debug(f'๐๏ธ โ๏ธ URL To Record: {url} ')
execute_insert(query, (url, crawl_uuid, source_url))