import os import sys import time from datetime import datetime import requests import threading import json import psycopg2 import pdb def write_servers(world_servers): i = 0 while i < len(world_servers): saved_at = datetime.now() insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING" conn = None try: conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute(insert_sql, (world_servers[i], peer, updated_at, saved_at,)) print('writing to database ' + str(i) + ' of ' + str(len(world_servers)) + ': ' + world_servers[i]) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() i += 1 def get_peers(peer): try: user_agent = {'User-agent': 'Mozilla/5.0'} response = requests.get('https://' + peer + peers_api, headers = user_agent, timeout=3) response_json = response.json() if response.status_code == 200: try: print("Server: " + peer + ", " + "federated with " + str(len(response_json)) + " servers") for peer_peer in response_json: exist_count = world_peers.count(peer_peer) if exist_count == 0: world_peers.append(peer_peer) except: pass except: pass class Peerthreads(threading.Thread): def __init__(self, peer, peers_semaphore): threading.Thread.__init__(self) self.sql_conn = None self.peer = peer self.peers_semaphore = peers_semaphore def run(self): self.peers_semaphore.acquire() get_peers(self.peer) self.peers_semaphore.release() # Returns the parameter from the specified file def get_parameter( parameter, file_path ): # Check if secrets file exists if not os.path.isfile(file_path): print("File %s not found, exiting."%file_path) sys.exit(0) # Find parameter in file with open( file_path ) as f: for line in f: if line.startswith( parameter ): return line.replace(parameter + ":", "").strip() # Cannot find parameter, exit print(file_path + " Missing parameter %s "%parameter) sys.exit(0) ############################################################################### # main if __name__ == '__main__': start_time = time.time() updated_at = datetime.now() peers_api = '/api/v1/instance/peers?' lemmy_api = '/api/v2/site?' # Load configuration from config file config_filepath = "config/config.txt" mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) # Load database config from db_config file db_config_filepath = "config/db_config.txt" fediverse_db = get_parameter("fediverse_db", db_config_filepath) fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) world_peers = [] user_agent = {'User-agent': 'Mozilla/5.0'} res = requests.get('https://' + mastodon_hostname + peers_api, headers = user_agent, timeout=3) hostname_peers = res.json() for peer in hostname_peers: exist_count = world_peers.count(peer) if exist_count == 0: world_peers.append(peer) peers_semaphore = threading.Semaphore(100) peer_threads = [] for peer in hostname_peers: p_thread = Peerthreads(peer, peers_semaphore) peer_threads.append(p_thread) p_thread.start() for p_thread in peer_threads: p_thread.join() write_servers(world_peers) exec_time = str(round((time.time() - start_time), 2)) print(exec_time)