๐Ÿ“ฆ EqualifyEverything / scan

๐Ÿ“„ intake.py ยท 99 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99import requests
import sys
import json
import time
from utils.watch import logger
from src.data.insert import record_tech_apps


# Emoji Key:    ๐Ÿงฐ ๐Ÿšš Use as a prefix to logs

# Wappalyzer Import Functions

# Import Wapplayzer Techs from: https://github.com/wappalyzer/wappalyzer/blob/master/src/technologies

def import_tech_apps():
    logger.info(f'๐Ÿงฐ ๐Ÿšš Importing Tech Apps')

    # List of files to import from the Wappalyzer repository
    files = ['_','a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
             'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z']

   # Loop through each file and import its contents
    for file in files:
        logger.debug(f'๐Ÿงฐ ๐Ÿšš Sending request to {file}.json now')
        response = requests.get(f"https://raw.githubusercontent.com/wappalyzer/wappalyzer/master/src/technologies/{file}.json")
        logger.debug(f'๐Ÿงฐ ๐Ÿšš Response Status Code: {response.status_code}')
        if response.status_code == 200:
            # Response was good
            # Parse the response to insert into Postgres
            data = response.json()
            for app_name, app_data in data.items():
                # Values must be in this format before sending to postgres
                name = app_name
                description = app_data.get('description')
                icon = app_data.get('icon')
                saas = app_data.get('implies') is not None
                website = app_data.get('website')
                pricing = app_data.get('pricing')
                scriptsrc = app_data.get('scriptSrc')
                headers = app_data.get('headers')
                cookies = app_data.get('cookies')
                dom = app_data.get('dom')
                implies = app_data.get("implies") if app_data.get("implies") is not None else None
                cat_implies = app_data.get("cats") if app_data.get("cats") is not None else None
                requires = app_data.get("requires") if app_data.get("requires") is not None else None
                requires_cat = app_data.get("requiresCategory") if app_data.get("requiresCategory") is not None else None
                cats = app_data.get("cats")
                js = app_data.get('js')
                meta = app_data.get('meta')

                # Properly format scriptSrc as an array containing the regex pattern
                if isinstance(scriptsrc, str):
                    scriptsrc = [scriptsrc]
                elif scriptsrc is None:
                    scriptsrc = []

                # Properly format dom as an array containing the regex pattern(s)
                if isinstance(dom, str):
                    dom = [dom]
                elif dom is None or dom == []:
                    dom = None
                else:
                    dom = dom

                # Properly format dom as an array containing the regex pattern(s)
                if isinstance(dom, str):
                    dom = [dom]
                elif dom is None or dom == []:
                    dom = None
                else:
                    dom = dom

                # Convert dictionaries to JSON strings
                headers = json.dumps(headers) if headers is not None else 'null'
                cookies = json.dumps(cookies) if cookies is not None else 'null'
                js = json.dumps(js) if js is not None else 'null'
                meta = json.dumps(meta) if meta is not None else 'null'

                #Insert each value into Postgres
                if record_tech_apps(name, description, icon, saas, website, pricing, scriptsrc, headers, cookies, dom, implies, cat_implies, js, requires, requires_cat, meta, cats):
                    logger.info(f'App Added: {name}')
                else:
                    logger.error(f'App Add Error: {name}')
                    time.sleep(.25)

        else:
            logger.error(f'Error: {response.status_code}. Please check the URL and try again.')

        # When all apps inserted from this file, move to next file
        logger.info(f'All {file}.json Apps Inserted, moving on...')

    # If all files have been processed, run this log and complete
    logger.info('All Apps & Files Processed... calling it a day... ')



if __name__ == '__main__':
    import_tech_apps()