fediverse/fetchservers.py
2021-05-14 10:39:26 +02:00

242 líneas
6,8 KiB
Python

import time
start_time = time.time()
import urllib3
from urllib3 import exceptions
from datetime import datetime
from subprocess import call
from mastodon import Mastodon
import threading
import os
import json
import signal
import sys
import os.path
import requests
from requests import exceptions
import operator
import calendar
import psycopg2
from itertools import product
from multiprocessing import Pool, Lock, Process, Queue, current_process, Manager
import multiprocessing
import aiohttp
import aiodns
import asyncio
from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError
from asyncio import TimeoutError
import socket
from socket import gaierror, gethostbyname
from decimal import *
getcontext().prec = 2
apis = ['/nodeinfo/2.0?', '/nodeinfo/2.0.json?', '/main/nodeinfo/2.0?', '/api/statusnet/config?', '/api/nodeinfo/2.0.json?', '/api/nodeinfo?', '/api/v1/instance?','/wp-json/nodeinfo/2.0?']
client_exceptions = (
aiohttp.ClientResponseError,
aiohttp.ClientConnectionError,
aiohttp.ClientConnectorError,
aiohttp.ClientError,
asyncio.TimeoutError,
socket.gaierror,
)
now = datetime.now()
###############################################################################
# INITIALISATION
###############################################################################
def is_json(myjson):
try:
json_object = json.loads(myjson)
except ValueError as e:
return False
return True
def write_api(server, software, users, alive, api, soft_version):
insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version))
cur.execute("UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)", (now, software, users, alive, api, soft_version, server))
cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
async def getsoft(server):
try:
socket.gethostbyname(server)
except socket.gaierror:
pass
return
soft = ''
url = 'https://' + server
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
for api in apis:
try:
async with session.get(url+api) as response:
if response.status == 200:
try:
response_json = await response.json()
except:
pass
except aiohttp.ClientConnectorError as err:
pass
else:
if response.status == 200 and api != '/api/v1/instance?':
try:
soft = response_json['software']['name']
soft = soft.lower()
soft_version = response_json['software']['version']
users = response_json['usage']['users']['total']
if users > 1000000:
return
alive = True
write_api(server, soft, users, alive, api, soft_version)
print("Server " + server + " (" + soft + " " + soft_version + ") is alive!")
return
except:
pass
if response.status == 200 and soft == '' and api == "/api/v1/instance?":
soft = 'mastodon'
users = response_json['stats']['user_count']
soft_version = response_json['version']
if users > 1000000:
return
alive = True
write_api(server, soft, users, alive, api)
print("Server " + server + " (" + soft + ") is alive!")
def getserver(server, x):
server = server[0].rstrip('.').lower()
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
try:
loop = asyncio.get_event_loop()
coroutines = [getsoft(server)]
soft = loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
except:
pass
# Returns the parameter from the specified file
def get_parameter( parameter, file_path ):
# Check if secrets file exists
if not os.path.isfile(file_path):
print("File %s not found, exiting."%file_path)
sys.exit(0)
# Find parameter in file
with open( file_path ) as f:
for line in f:
if line.startswith( parameter ):
return line.replace(parameter + ":", "").strip()
# Cannot find parameter, exit
print(file_path + " Missing parameter %s "%parameter)
sys.exit(0)
# Load configuration from config file
config_filepath = "config/config.txt"
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
# Load database config from db_config file
db_config_filepath = "config/db_config.txt"
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
###############################################################################
# main
if __name__ == '__main__':
world_servers = []
try:
conn = None
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
### get world servers list
cur.execute("select server from world where checked='f'")
for row in cur:
world_servers.append(row[0])
cur.close()
print("Remaining servers: " + str(len(world_servers)))
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
###########################################################################
# multiprocessing!
m = Manager()
q = m.Queue()
z = zip(world_servers)
serv_number = len(world_servers)
pool_tuple = [(x, q) for x in z]
with Pool(processes=64) as pool:
pool.starmap(getserver, pool_tuple)
print('Done.')