import time from datetime import datetime import os import json import sys import os.path import psycopg2 import requests import urllib3 import socket import ray ray.init(num_cpus = 32) # Specify this system CPUs. from ray.exceptions import ( RaySystemError, RayError, RayTaskError, ObjectStoreFullError, ) apis = ['/api/v1/instance?', '/api/v1/nodeinfo?', '/nodeinfo/2.0?', '/nodeinfo/2.0.json?', '/nodeinfo/2.1.json?', '/main/nodeinfo/2.0?', '/api/statusnet/config?', '/api/nodeinfo/2.0.json?', '/api/nodeinfo?', '/wp-json/nodeinfo/2.0?', '/api/v1/instance/nodeinfo/2.0?', '/.well-known/x-nodeinfo2?' ] def is_json(myjson): try: json_object = json.loads(myjson) except ValueError as e: return False return True def write_api(server, software, users, alive, api, soft_version): fediverse_db, fediverse_db_user = get_db_config() insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING" conn = None try: conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432") cur = conn.cursor() print(f'Writing {server} nodeinfo data...') cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version)) cur.execute( "UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)", (now, software, users, alive, api, soft_version, server) ) cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,)) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() @ray.remote def getsoft(server): if server.find(".") == -1: return if server.find("@") != -1: return if server.find("/") != -1: return if server.find(":") != -1: return if server == 'z.fedipen.xyz': return soft = '' is_nodeinfo = False url = 'https://' + server user_agent = {'User-agent': "fediverse's stats (fediverse@mastodont.cat)"} try: response = requests.get(url + '/.well-known/nodeinfo', headers = user_agent, timeout=3) if response.status_code == 200: try: response_json = response.json() nodeinfo = response_json['links'][0]['href'].replace(f'https://{server}','') try: nodeinfo_data = requests.get(url + nodeinfo, headers = user_agent, timeout=3) if nodeinfo_data.status_code == 200: nodeinfo_json = nodeinfo_data.json() is_nodeinfo = True else: print(f"Server {server}'s nodeinfo not responding: error code {nodeinfo_data.status_code}") except: pass except: print(f'Server {server} not responding: error code {response.status_code}') print('*********************************************************************') pass else: for api in apis: try: response = requests.get(url + api, headers = user_agent, timeout=3) if is_json(response.text): nodeinfo_json = response.json() if 'software' in nodeinfo_json: nodeinfo = api is_nodeinfo = True break elif 'title' in nodeinfo_json: if nodeinfo_json['title'] == 'Zap': nodeinfo = api is_nodeinfo = True soft = 'zap' break elif 'version' in nodeinfo_json: nodeinfo = api is_nodeinfo = True break except: pass except requests.exceptions.SSLError as errssl: pass except requests.exceptions.HTTPError as errh: pass except requests.exceptions.ConnectionError as errc: pass except requests.exceptions.ReadTimeout as to_err: pass except requests.exceptions.TooManyRedirects as tmr_err: pass except urllib3.exceptions.LocationParseError as lp_err: pass except requests.exceptions.InvalidURL as iu_err: pass except requests.exceptions.ChunkedEncodingError as chunk_err: print(f'ChunkedEncodingError! {server}') pass except ray.exceptions.RaySystemError as ray_sys_error: print(ray_sys_error) pass else: if is_nodeinfo: if nodeinfo != '/api/v1/instance?': if nodeinfo != '/.well-known/x-nodeinfo2?': try: soft = nodeinfo_json['software']['name'] soft = soft.lower() soft_version = nodeinfo_json['software']['version'] users = nodeinfo_json['usage']['users']['total'] if users > 1000000: return alive = True write_api(server, soft, users, alive, nodeinfo, soft_version) print(f"Server {server} ({soft} {soft_version}) is alive!") print('*********************************************************************') return except: pass else: try: soft = nodeinfo_json['server']['software'] soft = soft.lower() soft_version = nodeinfo_json['server']['version'] users = nodeinfo_json['usage']['users']['total'] if users > 1000000: return alive = True if soft == 'socialhome': write_api(server, soft, users, alive, nodeinfo, soft_version) print('*********************************************************************') print(f"Server {serve}r ({soft} {soft_version}) is alive!") print('*********************************************************************') return except: pass if soft == '' and nodeinfo == "/api/v1/instance?": soft = 'mastodon' try: users = nodeinfo_json['stats']['user_count'] if users > 1000000: return except: users = 0 try: soft_version = nodeinfo_json['version'] except: soft_version = 'unknown' alive = True write_api(server, soft, users, alive, nodeinfo, soft_version) print('*********************************************************************') print(f"Server {server} ({soft}) is alive!") elif soft == 'zap' and nodeinfo == "/api/v1/instance?": soft = 'zap' users = nodeinfo_json['stats']['user_count'] soft_version = nodeinfo_json['version'] alive = True print(server, soft, users, alive, api) print('*********************************************************************') print(f"Server {server} ({soft}) is alive!") else: print(f'Server {server} is dead') print('*********************************************************************') def get_world_servers(): world_servers = [] try: conn = None conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432") cur = conn.cursor() # get world servers list cur.execute("select server from world where checked='f'") rows = cur.fetchall() for row in rows: world_servers.append(row[0]) cur.close() print("Remaining servers: " + str(len(world_servers))) except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() return world_servers def save_time(program, start, finish): insert_sql = "INSERT INTO execution_time(program, start, finish) VALUES(%s,%s,%s) ON CONFLICT DO NOTHING" conn = None try: conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute(insert_sql, (program, start, finish,)) cur.execute("UPDATE execution_time SET start=(%s), finish=(%s) where program=(%s)", (start, finish, program)) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() def get_parameter(parameter, file_path): # Check if secrets file exists if not os.path.isfile(file_path): print("File %s not found, exiting." % file_path) sys.exit(0) # Find parameter in file with open(file_path) as f: for line in f: if line.startswith(parameter): return line.replace(parameter + ":", "").strip() # Cannot find parameter, exit print(file_path + " Missing parameter %s " % parameter) sys.exit(0) def get_config(): # Load configuration from config file config_filepath = "config/config.txt" mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) return mastodon_hostname def get_db_config(): # Load database config from db_config file db_config_filepath = "config/db_config.txt" fediverse_db = get_parameter("fediverse_db", db_config_filepath) fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) return (fediverse_db, fediverse_db_user) ############################################################################### # main if __name__ == '__main__': ## name: fetchservers.py fediverse_db, fediverse_db_user = get_db_config() start = datetime.now() program = 'fetchservers' finish = start save_time(program, start, finish) now = start mastodon_hostname = get_config() world_servers = get_world_servers() ray_start = time.time() try: results = ray.get([getsoft.remote(server) for server in world_servers]) print(f"duration = {time.time() - ray_start}.\nprocessed servers: {len(results)}") except: pass finish = datetime.now() save_time(program, start, finish)