๐Ÿ“ฆ EqualifyEverything / crawler

๐Ÿ“„ manager.py ยท 35 lines
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35import os
from logger.config import logger
from utils.check import should_sitemap_continue

# Crawl Types
from crawls.kraken import setup_kraken_cartocrawler, setup_kraken_spinnocracy


# Spider Router
# Which spider should call upon?
def spider_finder(payload):
    # Get the spider parameter from the payload
    spider_type = payload.get('spider')
# TODO break this out into crawl_type kraken|harpoon and then spider
    if spider_type == 'cartocrawler':
        response_message = 'CartoCrawler has been started!'
        logger.info(f' ๐Ÿ•ท๏ธ ๐Ÿš€ CartoCrawler')
        return setup_kraken_cartocrawler()

    # Spinocracy
    # INFO Search for all URLs on a domain, not just the sitemap.
    elif spider_type == 'spinocracy':
        response_message = 'Spinocracy has started!'
        logger.info(f' ๐Ÿ•ท๏ธ ๐Ÿš€ Spinnocracy')
        setup_kraken_spinnocracy()

    # No defined spider or other error
    else:
        # Return an error message if the kraken_type is invalid
        error_msg = {'error': 'Bad Spider. Check spider variable. '}
        logger.error(f' ๐Ÿ•ท๏ธ ๐Ÿ’€ Spider selection failed. Check spider_type ')
        response = jsonify(error_msg)
        response.status_code = 400
        return response