2020-05-17 21:28:03 +02:00
|
|
|
import time
|
|
|
|
start_time = time.time()
|
|
|
|
from six.moves import urllib
|
|
|
|
from datetime import datetime
|
|
|
|
from subprocess import call
|
|
|
|
from mastodon import Mastodon
|
|
|
|
import threading
|
|
|
|
import os
|
|
|
|
import json
|
|
|
|
import signal
|
|
|
|
import sys
|
|
|
|
import os.path
|
|
|
|
import requests
|
|
|
|
import operator
|
|
|
|
import calendar
|
|
|
|
import psycopg2
|
|
|
|
from itertools import product
|
|
|
|
|
|
|
|
from multiprocessing import Pool, Lock, Process, Queue, current_process
|
2020-06-05 13:24:58 +02:00
|
|
|
import queue
|
2020-05-17 21:28:03 +02:00
|
|
|
import multiprocessing
|
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
import aiohttp
|
|
|
|
import aiodns
|
|
|
|
import asyncio
|
|
|
|
from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError
|
|
|
|
from asyncio import TimeoutError
|
|
|
|
import socket
|
|
|
|
from socket import gaierror, gethostbyname
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
updated_at = datetime.now()
|
|
|
|
peers_api = '/api/v1/instance/peers?'
|
2021-05-14 10:39:26 +02:00
|
|
|
lemmy_api = '/api/v2/site?'
|
2020-05-17 21:28:03 +02:00
|
|
|
|
|
|
|
def is_json(myjson):
|
|
|
|
try:
|
|
|
|
json_object = json.loads(myjson)
|
|
|
|
except ValueError as e:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
2021-05-14 10:39:26 +02:00
|
|
|
def get_lemmy_server(server):
|
|
|
|
|
|
|
|
if server.find(".") == -1:
|
|
|
|
return
|
|
|
|
if server.find("@") != -1:
|
|
|
|
return
|
|
|
|
if server.find("/") != -1:
|
|
|
|
return
|
|
|
|
if server.find(":") != -1:
|
|
|
|
return
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
coroutines = [get_lemmy_peers(server)]
|
|
|
|
loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
def getserver(server):
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
if server.find(".") == -1:
|
|
|
|
return
|
|
|
|
if server.find("@") != -1:
|
|
|
|
return
|
|
|
|
if server.find("/") != -1:
|
|
|
|
return
|
|
|
|
if server.find(":") != -1:
|
|
|
|
return
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
try:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
loop = asyncio.get_event_loop()
|
|
|
|
coroutines = [getpeers(server)]
|
|
|
|
loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
except:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
pass
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2021-05-14 10:39:26 +02:00
|
|
|
async def get_lemmy_peers(server):
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
socket.gethostbyname(server)
|
|
|
|
|
|
|
|
except socket.gaierror:
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
url = 'https://' + server
|
|
|
|
|
|
|
|
timeout = aiohttp.ClientTimeout(total=3)
|
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
|
|
|
|
|
|
|
try:
|
|
|
|
async with session.get(url+lemmy_api) as resp:
|
|
|
|
response = await resp.json()
|
|
|
|
if resp.status == 200:
|
|
|
|
try:
|
|
|
|
data = response['federated_instances']['linked']
|
|
|
|
print("Server: " + server + ", " + "federated with " + str(len(data)) + " servers")
|
|
|
|
i = 0
|
|
|
|
while i < len(data):
|
|
|
|
|
|
|
|
saved_at = datetime.now()
|
|
|
|
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
|
|
|
|
conn = None
|
|
|
|
try:
|
|
|
|
|
|
|
|
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
|
|
|
|
|
|
|
cur = conn.cursor()
|
|
|
|
|
|
|
|
cur.execute(insert_sql, (data[i], server, updated_at, saved_at,))
|
|
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
cur.close()
|
|
|
|
|
|
|
|
except (Exception, psycopg2.DatabaseError) as error:
|
|
|
|
|
|
|
|
print(error)
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
|
|
if conn is not None:
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
|
|
|
i += 1
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
except aiohttp.ClientConnectorError as err:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
async def getpeers(server):
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
try:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
socket.gethostbyname(server)
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
except socket.gaierror:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
return
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
url = 'https://' + server
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
timeout = aiohttp.ClientTimeout(total=3)
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
try:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
|
|
|
async with session.get(url+peers_api) as resp:
|
|
|
|
|
|
|
|
response = await resp.json()
|
|
|
|
|
|
|
|
if resp.status == 200:
|
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
try:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
|
|
|
response_json = response
|
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers")
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
i = 0
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
while i < len(response_json):
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
saved_at = datetime.now()
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
conn = None
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
try:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
cur = conn.cursor()
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
cur.execute(insert_sql, (response_json[i], server, updated_at, saved_at,))
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
conn.commit()
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
cur.close()
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
except (Exception, psycopg2.DatabaseError) as error:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
print(error)
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
finally:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
if conn is not None:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
conn.close()
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
i += 1
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
except:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
pass
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
except aiohttp.ClientConnectorError as err:
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
pass
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
###############################################################################
|
|
|
|
# INITIALISATION
|
|
|
|
###############################################################################
|
2020-05-17 21:28:03 +02:00
|
|
|
|
|
|
|
# Returns the parameter from the specified file
|
|
|
|
def get_parameter( parameter, file_path ):
|
|
|
|
# Check if secrets file exists
|
|
|
|
if not os.path.isfile(file_path):
|
|
|
|
print("File %s not found, exiting."%file_path)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
# Find parameter in file
|
|
|
|
with open( file_path ) as f:
|
|
|
|
for line in f:
|
|
|
|
if line.startswith( parameter ):
|
|
|
|
return line.replace(parameter + ":", "").strip()
|
|
|
|
|
|
|
|
# Cannot find parameter, exit
|
|
|
|
print(file_path + " Missing parameter %s "%parameter)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
# Load secrets from secrets file
|
|
|
|
secrets_filepath = "secrets/secrets.txt"
|
|
|
|
uc_client_id = get_parameter("uc_client_id", secrets_filepath)
|
|
|
|
uc_client_secret = get_parameter("uc_client_secret", secrets_filepath)
|
|
|
|
uc_access_token = get_parameter("uc_access_token", secrets_filepath)
|
|
|
|
|
|
|
|
# Load configuration from config file
|
2020-05-21 12:49:12 +02:00
|
|
|
config_filepath = "config/config.txt"
|
2020-05-17 21:28:03 +02:00
|
|
|
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
|
|
|
|
|
|
|
|
# Load database config from db_config file
|
|
|
|
db_config_filepath = "config/db_config.txt"
|
|
|
|
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
|
|
|
|
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
|
|
|
|
|
|
|
|
# Initialise Mastodon API
|
|
|
|
mastodon = Mastodon(
|
|
|
|
client_id = uc_client_id,
|
|
|
|
client_secret = uc_client_secret,
|
|
|
|
access_token = uc_access_token,
|
|
|
|
api_base_url = 'https://' + mastodon_hostname,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Initialise access headers
|
|
|
|
headers={ 'Authorization': 'Bearer %s'%uc_access_token }
|
|
|
|
|
|
|
|
###############################################################################
|
2020-06-05 13:24:58 +02:00
|
|
|
# main
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
if __name__ == '__main__':
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2021-05-14 10:39:26 +02:00
|
|
|
lemmy_server = 'lemmy.ml'
|
|
|
|
get_lemmy_server(lemmy_server)
|
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
getserver(mastodon_hostname)
|
|
|
|
self_peers = mastodon.instance_peers()
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
###########################################################################
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
nprocs = multiprocessing.cpu_count()
|
|
|
|
with multiprocessing.Pool(processes=nprocs) as pool:
|
|
|
|
results = pool.starmap(getserver, product(self_peers))
|
2020-05-17 21:28:03 +02:00
|
|
|
|
2020-06-05 13:24:58 +02:00
|
|
|
exec_time = str(round((time.time() - start_time), 2))
|
|
|
|
print(exec_time)
|