fediverse/fetchservers.py

226 líneas
6.1 KiB
Python

import time
from datetime import datetime
import os
import json
import sys
import os.path
import psycopg2
from multiprocessing import Pool, Manager
import aiohttp
import asyncio
import socket
apis = ['/nodeinfo/2.0?', '/nodeinfo/2.0.json?', '/main/nodeinfo/2.0?', '/api/statusnet/config?',
'/api/nodeinfo/2.0.json?', '/api/nodeinfo?', '/api/v1/instance?', '/wp-json/nodeinfo/2.0?']
client_exceptions = (
aiohttp.ClientResponseError,
aiohttp.ClientConnectionError,
aiohttp.ClientConnectorError,
aiohttp.ClientError,
asyncio.TimeoutError,
socket.gaierror,
)
def is_json(myjson):
try:
json_object = json.loads(myjson)
except ValueError as e:
return False
return True
def write_api(server, software, users, alive, api, soft_version):
insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql",
port="5432")
cur = conn.cursor()
cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version))
cur.execute(
"UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)",
(now, software, users, alive, api, soft_version, server))
cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
async def getsoft(server):
try:
socket.gethostbyname(server)
except socket.gaierror:
pass
return
soft = ''
url = 'https://' + server
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
for api in apis:
try:
async with session.get(url + api) as response:
if response.status == 200:
try:
response_json = await response.json()
except:
pass
except aiohttp.ClientConnectorError as err:
pass
else:
if response.status == 200 and api != '/api/v1/instance?':
try:
soft = response_json['software']['name']
soft = soft.lower()
soft_version = response_json['software']['version']
users = response_json['usage']['users']['total']
if users > 1000000:
return
alive = True
write_api(server, soft, users, alive, api, soft_version)
print("Server " + server + " (" + soft + " " + soft_version + ") is alive!")
return
except:
pass
if response.status == 200 and soft == '' and api == "/api/v1/instance?":
soft = 'mastodon'
users = response_json['stats']['user_count']
soft_version = response_json['version']
if users > 1000000:
return
alive = True
write_api(server, soft, users, alive, api)
print("Server " + server + " (" + soft + ") is alive!")
def getserver(server, x):
server = server[0].rstrip('.').lower()
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
try:
loop = asyncio.get_event_loop()
coroutines = [getsoft(server)]
soft = loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
except:
pass
# Returns the parameter from the specified file
def get_parameter(parameter, file_path):
# Check if secrets file exists
if not os.path.isfile(file_path):
print("File %s not found, exiting." % file_path)
sys.exit(0)
# Find parameter in file
with open(file_path) as f:
for line in f:
if line.startswith(parameter):
return line.replace(parameter + ":", "").strip()
# Cannot find parameter, exit
print(file_path + " Missing parameter %s " % parameter)
sys.exit(0)
# Load configuration from config file
config_filepath = "config/config.txt"
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
# Load database config from db_config file
db_config_filepath = "config/db_config.txt"
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
###############################################################################
# main
if __name__ == '__main__':
now = datetime.now()
start_time = time.time()
world_servers = []
try:
conn = None
conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432")
cur = conn.cursor()
# get world servers list
cur.execute("select server from world where checked='f'")
rows = cur.fetchall()
for row in rows:
world_servers.append(row[0])
cur.close()
print("Remaining servers: " + str(len(world_servers)))
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
###########################################################################
# multiprocessing!
m = Manager()
q = m.Queue()
z = zip(world_servers)
serv_number = len(world_servers)
pool_tuple = [(x, q) for x in z]
with Pool(processes=64) as pool:
pool.starmap(getserver, pool_tuple)
print('Done.')