from multiprocessing import set_start_method from multiprocessing import get_context from itertools import product import time from datetime import datetime import os import json import sys import os.path import psycopg2 import aiohttp import asyncio import socket import pdb client_exceptions = ( aiohttp.ClientResponseError, aiohttp.ClientConnectionError, aiohttp.ClientConnectorError, aiohttp.ClientError, asyncio.TimeoutError, socket.gaierror, ) def write_api(server, software, users, alive, api, soft_version): now = datetime.now() fediverse_db, fediverse_db_user = get_db_config() insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING" conn = None try: conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432") cur = conn.cursor() print(f'Writing {server} nodeinfo data...') cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version)) cur.execute( "UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)", (now, software, users, alive, api, soft_version, server) ) cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,)) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() async def getsoft(server): fediverse_db, fediverse_db_user = get_db_config() try: socket.gethostbyname(server) except socket.gaierror as g_error: print(f'Server {server} error: {g_error}') pass return soft = '' url = 'https://' + server user_agent = {'User-agent': 'Mozilla/5.0'} timeout = aiohttp.ClientTimeout(total=3) async with aiohttp.ClientSession(timeout=timeout, headers=user_agent) as session: try: async with session.get(url + '/.well-known/nodeinfo') as response: if response.status == 200: try: response_json = await response.json() nodeinfo = response_json['links'][0]['href'].replace(f'https://{server}','') except: pass else: print(f'Server {server} not responding: {response.status}') pass async with session.get(url + nodeinfo) as nodeinfo_response: if nodeinfo_response.status == 200: try: nodeinfo_json = await nodeinfo_response.json() except: pass else: print(f"Server {server}'s nodeinfo not responding: {response.status}") pass except aiohttp.ClientConnectorError as cc_err: pass except aiohttp.client_exceptions.ClientConnectorSSLError as ccssl_as: pass else: if nodeinfo_response.status == 200 and nodeinfo != '/api/v1/instance?': if nodeinfo != '/.well-known/x-nodeinfo2?': try: soft = nodeinfo_json['software']['name'] soft = soft.lower() soft_version = nodeinfo_json['software']['version'] users = nodeinfo_json['usage']['users']['total'] if users > 1000000: return alive = True write_api(server, soft, users, alive, nodeinfo, soft_version) print('*********************************************************************') print("Server " + server + " (" + soft + " " + soft_version + ") is alive!") print('*********************************************************************') return except: pass else: try: soft = nodeinfo_json['server']['software'] soft = soft.lower() soft_version = nodeinfo_json['server']['version'] users = nodeinfo_json['usage']['users']['total'] if users > 1000000: return alive = True if soft == 'socialhome': write_api(server, soft, users, alive, api, soft_version) print('*********************************************************************') print("Server " + server + " (" + soft + " " + soft_version + ") is alive!") print('*********************************************************************') return except: pass if nodeinfo_response.status == 200 and soft == '' and nodeinfo == "/api/v1/instance?": soft = 'mastodon' users = nodeinfo_json['stats']['user_count'] soft_version = nodeinfo_json['version'] if users > 1000000: return alive = True write_api(server, soft, users, alive, api) print('*********************************************************************') print("Server " + server + " (" + soft + ") is alive!") print('*********************************************************************') else: print(f'Server {server} is dead') def getserver(server, *args): if len(args) != 0: server = server[0].rstrip('.').lower() if server.find(".") == -1: return if server.find("@") != -1: return if server.find("/") != -1: return if server.find(":") != -1: return try: loop = asyncio.get_event_loop() coroutines = [getsoft(server)] soft = loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True)) except: pass def get_world_servers(): world_servers = [] try: conn = None conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432") cur = conn.cursor() # get world servers list cur.execute("select server from world where checked='f'") rows = cur.fetchall() for row in rows: world_servers.append(row[0]) cur.close() print("Remaining servers: " + str(len(world_servers))) except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() return world_servers def get_parameter(parameter, file_path): # Check if secrets file exists if not os.path.isfile(file_path): print("File %s not found, exiting." % file_path) sys.exit(0) # Find parameter in file with open(file_path) as f: for line in f: if line.startswith(parameter): return line.replace(parameter + ":", "").strip() # Cannot find parameter, exit print(file_path + " Missing parameter %s " % parameter) sys.exit(0) def usage(): print('usage: python ' + sys.argv[0] + ' --multi' + ' (multiprocessing, fast)') print('usage: python ' + sys.argv[0] + ' --mono' + ' (one process, slow)') def get_config(): # Load configuration from config file config_filepath = "config/config.txt" mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) return mastodon_hostname def get_db_config(): # Load database config from db_config file db_config_filepath = "config/db_config.txt" fediverse_db = get_parameter("fediverse_db", db_config_filepath) fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) return (fediverse_db, fediverse_db_user) ############################################################################### # main if __name__ == '__main__': # usage modes if len(sys.argv) == 1: usage() elif len(sys.argv) == 2: if sys.argv[1] == '--multi': now = datetime.now() start_time = time.time() mastodon_hostname = get_config() fediverse_db, fediverse_db_user = get_db_config() world_servers = get_world_servers() with get_context("spawn").Pool(processes=32) as pool: res = pool.starmap(getserver, product(world_servers)) pool.close() pool.join() print('Done.') elif sys.argv[1] == '--mono': now = datetime.now() start_time = time.time() mastodon_hostname = get_config() fediverse_db, fediverse_db_user = get_db_config() world_servers = get_world_servers() for server in world_servers: getserver(server) print('Done.')