From c9142101ebf6b8710cac8319bc58d261cdb1e3d9 Mon Sep 17 00:00:00 2001 From: spla Date: Sat, 29 May 2021 12:44:12 +0200 Subject: [PATCH] Deleted getworld.py, new getpeers.py --- README.md | 2 +- fetchservers.py | 13 ++- getpeers.py | 153 ++++++++++++++++++++++++++ getworld.py | 282 ------------------------------------------------ 4 files changed, 162 insertions(+), 288 deletions(-) create mode 100644 getpeers.py delete mode 100644 getworld.py diff --git a/README.md b/README.md index 22a3305..29416a0 100644 --- a/README.md +++ b/README.md @@ -19,7 +19,7 @@ Within Python Virtual Environment: 3. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use. -4. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world). +4. Run `python getpeers.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world). 5. Run `python fetchservers.py` to add servers to alive servers database. diff --git a/fetchservers.py b/fetchservers.py index 850da48..1a89af6 100644 --- a/fetchservers.py +++ b/fetchservers.py @@ -65,6 +65,7 @@ def write_api(server, software, users, alive, api, soft_version): async def getsoft(server): + try: socket.gethostbyname(server) @@ -119,7 +120,7 @@ async def getsoft(server): def getserver(server, x): server = server[0].rstrip('.').lower() - + if server.find(".") == -1: return if server.find("@") != -1: @@ -171,7 +172,7 @@ fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) # main if __name__ == '__main__': - + now = datetime.now() start_time = time.time() @@ -181,8 +182,7 @@ if __name__ == '__main__': conn = None - conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", - port="5432") + conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432") cur = conn.cursor() @@ -190,7 +190,10 @@ if __name__ == '__main__': cur.execute("select server from world where checked='f'") - for row in cur: + rows = cur.fetchall() + + for row in rows: + world_servers.append(row[0]) cur.close() diff --git a/getpeers.py b/getpeers.py new file mode 100644 index 0000000..cd60fe8 --- /dev/null +++ b/getpeers.py @@ -0,0 +1,153 @@ +import os +import sys +import time +from datetime import datetime +import requests +import threading +import json +import psycopg2 +import pdb + +def write_servers(world_servers): + + i = 0 + + while i < len(world_servers): + + saved_at = datetime.now() + + insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING" + + conn = None + + try: + + conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") + + cur = conn.cursor() + + cur.execute(insert_sql, (world_servers[i], peer, updated_at, saved_at,)) + + print('writing to database ' + str(i) + ' of ' + str(len(world_servers)) + ': ' + world_servers[i]) + + conn.commit() + + cur.close() + + except (Exception, psycopg2.DatabaseError) as error: + + print(error) + + finally: + + if conn is not None: + + conn.close() + + i += 1 + +def get_peers(peer): + + try: + + response = requests.get('https://' + peer + peers_api, timeout=2) + + response_json = response.json() + + if response.status_code == 200: + + try: + + print("Server: " + peer + ", " + "federated with " + str(len(response_json)) + " servers") + + for peer_peer in response_json: + + exist_count = world_peers.count(peer_peer) + + if exist_count == 0: + + world_peers.append(peer_peer) + except: + + pass + except: + + pass + +class Peerthreads(threading.Thread): + def __init__(self, peer, peers_semaphore): + threading.Thread.__init__(self) + self.sql_conn = None + self.peer = peer + self.peers_semaphore = peers_semaphore + + def run(self): + self.peers_semaphore.acquire() + get_peers(self.peer) + self.peers_semaphore.release() + +# Returns the parameter from the specified file +def get_parameter( parameter, file_path ): + # Check if secrets file exists + if not os.path.isfile(file_path): + print("File %s not found, exiting."%file_path) + sys.exit(0) + + # Find parameter in file + with open( file_path ) as f: + for line in f: + if line.startswith( parameter ): + return line.replace(parameter + ":", "").strip() + + # Cannot find parameter, exit + print(file_path + " Missing parameter %s "%parameter) + sys.exit(0) + +############################################################################### +# main + +if __name__ == '__main__': + + start_time = time.time() + + updated_at = datetime.now() + + peers_api = '/api/v1/instance/peers?' + lemmy_api = '/api/v2/site?' + + # Load configuration from config file + config_filepath = "config/config.txt" + mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) + + # Load database config from db_config file + db_config_filepath = "config/db_config.txt" + fediverse_db = get_parameter("fediverse_db", db_config_filepath) + fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) + + world_peers = [] + + res = requests.get('https://' + mastodon_hostname + peers_api) + + hostname_peers = res.json() + + for peer in hostname_peers: + + exist_count = world_peers.count(peer) + + if exist_count == 0: + + world_peers.append(peer) + + peers_semaphore = threading.Semaphore(100) + peer_threads = [] + for peer in hostname_peers: + p_thread = Peerthreads(peer, peers_semaphore) + peer_threads.append(p_thread) + p_thread.start() + for p_thread in peer_threads: + p_thread.join() + + write_servers(world_peers) + + exec_time = str(round((time.time() - start_time), 2)) + print(exec_time) diff --git a/getworld.py b/getworld.py deleted file mode 100644 index 506e797..0000000 --- a/getworld.py +++ /dev/null @@ -1,282 +0,0 @@ -import time -start_time = time.time() -from six.moves import urllib -from datetime import datetime -from subprocess import call -from mastodon import Mastodon -import threading -import os -import json -import signal -import sys -import os.path -import requests -import operator -import calendar -import psycopg2 -from itertools import product - -from multiprocessing import Pool, Lock, Process, Queue, current_process -import queue -import multiprocessing - -import aiohttp -import aiodns -import asyncio -from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError -from asyncio import TimeoutError -import socket -from socket import gaierror, gethostbyname - -updated_at = datetime.now() -peers_api = '/api/v1/instance/peers?' -lemmy_api = '/api/v2/site?' - -def is_json(myjson): - try: - json_object = json.loads(myjson) - except ValueError as e: - return False - return True - -def get_lemmy_server(server): - - if server.find(".") == -1: - return - if server.find("@") != -1: - return - if server.find("/") != -1: - return - if server.find(":") != -1: - return - - try: - - loop = asyncio.get_event_loop() - coroutines = [get_lemmy_peers(server)] - loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True)) - - except: - - pass - -def getserver(server): - - if server.find(".") == -1: - return - if server.find("@") != -1: - return - if server.find("/") != -1: - return - if server.find(":") != -1: - return - - try: - - loop = asyncio.get_event_loop() - coroutines = [getpeers(server)] - loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True)) - - except: - - pass - -async def get_lemmy_peers(server): - - try: - - socket.gethostbyname(server) - - except socket.gaierror: - - return - - url = 'https://' + server - - timeout = aiohttp.ClientTimeout(total=3) - async with aiohttp.ClientSession(timeout=timeout) as session: - - try: - async with session.get(url+lemmy_api) as resp: - response = await resp.json() - if resp.status == 200: - try: - data = response['federated_instances']['linked'] - print("Server: " + server + ", " + "federated with " + str(len(data)) + " servers") - i = 0 - while i < len(data): - - saved_at = datetime.now() - insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING" - conn = None - try: - - conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") - - cur = conn.cursor() - - cur.execute(insert_sql, (data[i], server, updated_at, saved_at,)) - - conn.commit() - - cur.close() - - except (Exception, psycopg2.DatabaseError) as error: - - print(error) - - finally: - - if conn is not None: - - conn.close() - - i += 1 - - except: - - pass - - except aiohttp.ClientConnectorError as err: - - pass - - -async def getpeers(server): - - try: - - socket.gethostbyname(server) - - except socket.gaierror: - - return - - url = 'https://' + server - - timeout = aiohttp.ClientTimeout(total=3) - - async with aiohttp.ClientSession(timeout=timeout) as session: - - try: - - async with session.get(url+peers_api) as resp: - - response = await resp.json() - - if resp.status == 200: - - try: - - response_json = response - - print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers") - - i = 0 - - while i < len(response_json): - - saved_at = datetime.now() - - insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING" - - conn = None - - try: - - conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") - - cur = conn.cursor() - - cur.execute(insert_sql, (response_json[i], server, updated_at, saved_at,)) - - conn.commit() - - cur.close() - - except (Exception, psycopg2.DatabaseError) as error: - - print(error) - - finally: - - if conn is not None: - - conn.close() - - i += 1 - - except: - - pass - - except aiohttp.ClientConnectorError as err: - - pass - -############################################################################### -# INITIALISATION -############################################################################### - -# Returns the parameter from the specified file -def get_parameter( parameter, file_path ): - # Check if secrets file exists - if not os.path.isfile(file_path): - print("File %s not found, exiting."%file_path) - sys.exit(0) - - # Find parameter in file - with open( file_path ) as f: - for line in f: - if line.startswith( parameter ): - return line.replace(parameter + ":", "").strip() - - # Cannot find parameter, exit - print(file_path + " Missing parameter %s "%parameter) - sys.exit(0) - -# Load secrets from secrets file -secrets_filepath = "secrets/secrets.txt" -uc_client_id = get_parameter("uc_client_id", secrets_filepath) -uc_client_secret = get_parameter("uc_client_secret", secrets_filepath) -uc_access_token = get_parameter("uc_access_token", secrets_filepath) - -# Load configuration from config file -config_filepath = "config/config.txt" -mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) - -# Load database config from db_config file -db_config_filepath = "config/db_config.txt" -fediverse_db = get_parameter("fediverse_db", db_config_filepath) -fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) - -# Initialise Mastodon API -mastodon = Mastodon( - client_id = uc_client_id, - client_secret = uc_client_secret, - access_token = uc_access_token, - api_base_url = 'https://' + mastodon_hostname, -) - -# Initialise access headers -headers={ 'Authorization': 'Bearer %s'%uc_access_token } - -############################################################################### -# main - -if __name__ == '__main__': - - lemmy_server = 'lemmy.ml' - get_lemmy_server(lemmy_server) - - getserver(mastodon_hostname) - self_peers = mastodon.instance_peers() - - ########################################################################### - - nprocs = multiprocessing.cpu_count() - with multiprocessing.Pool(processes=nprocs) as pool: - results = pool.starmap(getserver, product(self_peers)) - - exec_time = str(round((time.time() - start_time), 2)) - print(exec_time)