fediverse/fetchservers.py
2023-01-10 01:11:34 +01:00

336 líneas
8,3 KiB
Python

import time
from datetime import datetime
import os
import json
import sys
import os.path
from setup import Setup
from database import Database
import requests
import urllib3
import socket
import ray
import pdb
ray.init(num_cpus = 25) # Specify this system CPUs.
from ray.exceptions import (
RaySystemError,
RayError,
RayTaskError,
ObjectStoreFullError,
)
apis = ['/api/v1/instance',
'/api/v1/nodeinfo',
'/nodeinfo/2.0',
'/nodeinfo/2.0.json',
'/nodeinfo/2.1.json',
'/main/nodeinfo/2.0',
'/api/statusnet/config',
'/api/nodeinfo/2.0.json',
'/api/nodeinfo',
'/wp-json/nodeinfo/2.0',
'/api/v1/instance/nodeinfo/2.0',
'/.well-known/x-nodeinfo2'
]
def is_json(myjson):
try:
json_object = json.loads(myjson)
except ValueError as e:
return False
return True
@ray.remote
def getsoft(server):
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
soft = ''
is_nodeinfo = False
url = 'https://' + server
try:
response = requests.get(url + '/.well-known/nodeinfo', headers = setup.user_agent, timeout=3)
if response.status_code == 200:
try:
response_json = response.json()
if len(response_json['links']) == 1:
nodeinfo = response_json['links'][0]['href'].replace(f'https://{server}','')
elif len(response_json['links']) == 2:
nodeinfo = response_json['links'][1]['href'].replace(f'https://{server}','')
try:
nodeinfo_data = requests.get(url + nodeinfo, headers = setup.user_agent, timeout=3)
if nodeinfo_data.status_code == 200:
nodeinfo_json = nodeinfo_data.json()
is_nodeinfo = True
else:
print(f"{nodeinfo} not responding: error code {nodeinfo_data.status_code}")
except:
pass
except:
print(f'{server} is not responding: error code {response.status_code}')
print('*********************************************************************')
pass
else:
for api in apis:
try:
response = requests.get(url + api, headers = setup.user_agent, timeout=3)
if response.status_code == 200:
if is_json(response.text):
nodeinfo_json = response.json()
if 'software' in nodeinfo_json:
nodeinfo = api
is_nodeinfo = True
break
elif 'title' in nodeinfo_json:
if nodeinfo_json['title'] == 'Zap':
nodeinfo = api
is_nodeinfo = True
soft = 'zap'
break
elif 'version' in nodeinfo_json:
nodeinfo = api
is_nodeinfo = True
break
except:
pass
except requests.exceptions.SSLError as errssl:
pass
except requests.exceptions.HTTPError as errh:
pass
except requests.exceptions.ConnectionError as errc:
pass
except requests.exceptions.ReadTimeout as to_err:
pass
except requests.exceptions.TooManyRedirects as tmr_err:
pass
except urllib3.exceptions.LocationParseError as lp_err:
pass
except requests.exceptions.InvalidURL as iu_err:
pass
except requests.exceptions.ChunkedEncodingError as chunk_err:
print(f'ChunkedEncodingError! {server}')
pass
except ray.exceptions.RaySystemError as ray_sys_error:
print(ray_sys_error)
pass
else:
if is_nodeinfo:
if nodeinfo != '/api/v1/instance?':
if nodeinfo != '/.well-known/x-nodeinfo2?':
try:
soft = nodeinfo_json['software']['name']
soft = soft.lower()
soft_version = nodeinfo_json['software']['version']
users = nodeinfo_json.get('usage').get('users').get('total') or '0'
if int(users) > 1000000:
return
self.mau = nodeinfo_json.get('usage').get('users').get('activeMonth') or 0
alive = True
db.write_api(server, soft, users, mau, alive, nodeinfo, soft_version)
print(f"Server {server} ({soft} {soft_version}) is alive!")
print('*********************************************************************')
return
except:
pass
else:
try:
soft = nodeinfo_json['server']['software']
soft = soft.lower()
soft_version = nodeinfo_json['server']['version']
users = nodeinfo_json['usage']['users']['total']
if int(users) > 1000000:
return
self.mau = nodeinfo_json.get('usage').get('users').get('activeMonth') or 0
alive = True
if soft == 'socialhome':
db.write_api(server, soft, users, mau, alive, nodeinfo, soft_version)
print('*********************************************************************')
print(f"Server {serve}r ({soft} {soft_version}) is alive!")
print('*********************************************************************')
return
except:
pass
if soft == '' and nodeinfo == "/api/v1/instance?":
soft = 'mastodon'
try:
users = nodeinfo_json['stats']['user_count']
if int(users) > 1000000:
return
except:
users = 0
try:
soft_version = nodeinfo_json['version']
except:
soft_version = 'unknown'
mau = 0
alive = True
db.write_api(server, soft, users, mau, alive, nodeinfo, soft_version)
print('*********************************************************************')
print(f"Server {server} ({soft}) is alive!")
elif soft == 'zap' and nodeinfo == "/api/v1/instance?":
soft = 'zap'
users = nodeinfo_json['stats']['user_count']
soft_version = nodeinfo_json['version']
alive = True
print(server, soft, users, alive, api)
print('*********************************************************************')
print(f"Server {server} ({soft}) is alive!")
else:
print(f'Server {server} is dead')
print('*********************************************************************')
###############################################################################
# main
if __name__ == '__main__':
## name: fetchservers.py
setup = Setup()
db = Database()
res = requests.get('https://' + 'mastodon.social' + setup.peers_api, headers = setup.user_agent, timeout=3)
hostname_peers = res.json()
start = datetime.now()
program = 'fetchservers'
finish = start
db.save_time(program, start, finish)
now = start
ray_start = time.time()
try:
results = ray.get([getsoft.remote(server) for server in hostname_peers])
print(f"duration = {time.time() - ray_start}.\nprocessed servers: {len(results)}")
except:
pass
finish = datetime.now()
db.save_time(program, start, finish)