fediverse/scripts/fetchservers.py

336 líneas
8,3 KiB
Python
Original Vista normal Històric

2021-05-14 10:39:26 +02:00
import time
from datetime import datetime
import os
import json
import sys
import os.path
2023-01-05 00:01:36 +01:00
from setup import Setup
from database import Database
import requests
import urllib3
2021-05-14 10:39:26 +02:00
import socket
import ray
2023-01-05 00:01:36 +01:00
import pdb
2021-05-14 10:39:26 +02:00
2023-01-10 01:11:34 +01:00
ray.init(num_cpus = 25) # Specify this system CPUs.
2021-05-14 10:39:26 +02:00
2022-05-06 13:53:53 +02:00
from ray.exceptions import (
RaySystemError,
RayError,
RayTaskError,
ObjectStoreFullError,
)
2023-01-05 00:01:36 +01:00
apis = ['/api/v1/instance',
'/api/v1/nodeinfo',
'/nodeinfo/2.0',
'/nodeinfo/2.0.json',
'/nodeinfo/2.1.json',
'/main/nodeinfo/2.0',
'/api/statusnet/config',
'/api/nodeinfo/2.0.json',
'/api/nodeinfo',
'/wp-json/nodeinfo/2.0',
'/api/v1/instance/nodeinfo/2.0',
'/.well-known/x-nodeinfo2'
]
2021-05-14 10:39:26 +02:00
def is_json(myjson):
try:
json_object = json.loads(myjson)
except ValueError as e:
return False
return True
@ray.remote
def getsoft(server):
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
soft = ''
is_nodeinfo = False
url = 'https://' + server
2021-05-14 10:39:26 +02:00
try:
2023-01-05 00:01:36 +01:00
response = requests.get(url + '/.well-known/nodeinfo', headers = setup.user_agent, timeout=3)
2021-05-14 10:39:26 +02:00
if response.status_code == 200:
2021-05-14 10:39:26 +02:00
try:
2021-05-14 10:39:26 +02:00
response_json = response.json()
2021-05-14 10:39:26 +02:00
2023-01-05 00:01:36 +01:00
if len(response_json['links']) == 1:
nodeinfo = response_json['links'][0]['href'].replace(f'https://{server}','')
elif len(response_json['links']) == 2:
nodeinfo = response_json['links'][1]['href'].replace(f'https://{server}','')
2021-05-14 10:39:26 +02:00
try:
2023-01-05 00:01:36 +01:00
nodeinfo_data = requests.get(url + nodeinfo, headers = setup.user_agent, timeout=3)
2022-03-02 22:26:05 +01:00
if nodeinfo_data.status_code == 200:
nodeinfo_json = nodeinfo_data.json()
is_nodeinfo = True
else:
2023-01-05 00:01:36 +01:00
print(f"{nodeinfo} not responding: error code {nodeinfo_data.status_code}")
except:
2022-03-02 22:26:05 +01:00
pass
2022-03-02 22:26:05 +01:00
except:
2022-03-02 22:26:05 +01:00
2023-01-05 00:01:36 +01:00
print(f'{server} is not responding: error code {response.status_code}')
print('*********************************************************************')
pass
else:
for api in apis:
try:
2023-01-05 00:01:36 +01:00
response = requests.get(url + api, headers = setup.user_agent, timeout=3)
2023-01-05 00:01:36 +01:00
if response.status_code == 200:
2023-01-05 00:01:36 +01:00
if is_json(response.text):
2023-01-05 00:01:36 +01:00
nodeinfo_json = response.json()
2023-01-05 00:01:36 +01:00
if 'software' in nodeinfo_json:
2023-01-05 00:01:36 +01:00
nodeinfo = api
2023-01-05 00:01:36 +01:00
is_nodeinfo = True
2023-01-05 00:01:36 +01:00
break
2023-01-05 00:01:36 +01:00
elif 'title' in nodeinfo_json:
2023-01-05 00:01:36 +01:00
if nodeinfo_json['title'] == 'Zap':
2023-01-05 00:01:36 +01:00
nodeinfo = api
2023-01-05 00:01:36 +01:00
is_nodeinfo = True
2023-01-05 00:01:36 +01:00
soft = 'zap'
2023-01-05 00:01:36 +01:00
break
2023-01-05 00:01:36 +01:00
elif 'version' in nodeinfo_json:
2023-01-05 00:01:36 +01:00
nodeinfo = api
2023-01-05 00:01:36 +01:00
is_nodeinfo = True
break
except:
2022-03-02 22:26:05 +01:00
pass
except requests.exceptions.SSLError as errssl:
2022-03-02 22:26:05 +01:00
pass
except requests.exceptions.HTTPError as errh:
2022-03-02 22:26:05 +01:00
pass
2022-03-02 22:26:05 +01:00
except requests.exceptions.ConnectionError as errc:
2022-03-02 22:26:05 +01:00
pass
2022-03-02 22:26:05 +01:00
except requests.exceptions.ReadTimeout as to_err:
pass
2022-03-02 22:26:05 +01:00
except requests.exceptions.TooManyRedirects as tmr_err:
pass
2022-03-02 22:26:05 +01:00
except urllib3.exceptions.LocationParseError as lp_err:
2022-03-02 22:26:05 +01:00
pass
except requests.exceptions.InvalidURL as iu_err:
2022-03-02 22:26:05 +01:00
pass
2022-03-02 22:26:05 +01:00
2022-05-06 13:53:53 +02:00
except requests.exceptions.ChunkedEncodingError as chunk_err:
print(f'ChunkedEncodingError! {server}')
pass
except ray.exceptions.RaySystemError as ray_sys_error:
print(ray_sys_error)
pass
else:
2022-03-02 22:26:05 +01:00
if is_nodeinfo:
2022-03-02 22:26:05 +01:00
if nodeinfo != '/api/v1/instance?':
2022-03-02 22:26:05 +01:00
if nodeinfo != '/.well-known/x-nodeinfo2?':
try:
soft = nodeinfo_json['software']['name']
soft = soft.lower()
soft_version = nodeinfo_json['software']['version']
2023-01-05 00:01:36 +01:00
users = nodeinfo_json.get('usage').get('users').get('total') or '0'
2023-01-10 01:11:34 +01:00
if int(users) > 1000000:
return
2023-01-08 10:55:24 +01:00
self.mau = nodeinfo_json.get('usage').get('users').get('activeMonth') or 0
2022-03-02 22:26:05 +01:00
alive = True
2023-01-08 10:55:24 +01:00
db.write_api(server, soft, users, mau, alive, nodeinfo, soft_version)
2022-03-02 22:26:05 +01:00
print(f"Server {server} ({soft} {soft_version}) is alive!")
2022-03-02 22:26:05 +01:00
print('*********************************************************************')
2021-05-14 13:44:19 +02:00
return
2022-03-02 22:26:05 +01:00
except:
pass
else:
try:
soft = nodeinfo_json['server']['software']
soft = soft.lower()
soft_version = nodeinfo_json['server']['version']
users = nodeinfo_json['usage']['users']['total']
2023-01-10 01:11:34 +01:00
if int(users) > 1000000:
2022-03-02 22:26:05 +01:00
return
2023-01-08 10:55:24 +01:00
self.mau = nodeinfo_json.get('usage').get('users').get('activeMonth') or 0
2022-03-02 22:26:05 +01:00
alive = True
if soft == 'socialhome':
2023-01-08 10:55:24 +01:00
db.write_api(server, soft, users, mau, alive, nodeinfo, soft_version)
2022-03-02 22:26:05 +01:00
print('*********************************************************************')
print(f"Server {serve}r ({soft} {soft_version}) is alive!")
2022-03-02 22:26:05 +01:00
print('*********************************************************************')
return
except:
pass
if soft == '' and nodeinfo == "/api/v1/instance?":
2022-03-02 22:26:05 +01:00
soft = 'mastodon'
try:
2022-03-02 22:26:05 +01:00
users = nodeinfo_json['stats']['user_count']
2023-01-10 01:11:34 +01:00
if int(users) > 1000000:
2022-03-02 22:26:05 +01:00
return
2022-03-02 22:26:05 +01:00
except:
2022-03-02 22:26:05 +01:00
users = 0
2022-03-02 22:26:05 +01:00
try:
2021-05-14 10:39:26 +02:00
soft_version = nodeinfo_json['version']
except:
2021-05-14 10:39:26 +02:00
soft_version = 'unknown'
2021-05-14 10:39:26 +02:00
2023-01-08 10:55:24 +01:00
mau = 0
alive = True
2021-05-14 10:39:26 +02:00
2023-01-08 10:55:24 +01:00
db.write_api(server, soft, users, mau, alive, nodeinfo, soft_version)
2021-05-14 10:39:26 +02:00
print('*********************************************************************')
print(f"Server {server} ({soft}) is alive!")
elif soft == 'zap' and nodeinfo == "/api/v1/instance?":
soft = 'zap'
users = nodeinfo_json['stats']['user_count']
soft_version = nodeinfo_json['version']
alive = True
print(server, soft, users, alive, api)
print('*********************************************************************')
print(f"Server {server} ({soft}) is alive!")
else:
print(f'Server {server} is dead')
print('*********************************************************************')
2021-05-14 10:39:26 +02:00
###############################################################################
# main
if __name__ == '__main__':
2021-05-14 13:44:19 +02:00
## name: fetchservers.py
2021-05-14 13:44:19 +02:00
2023-01-05 00:01:36 +01:00
setup = Setup()
db = Database()
res = requests.get('https://' + 'mastodon.social' + setup.peers_api, headers = setup.user_agent, timeout=3)
hostname_peers = res.json()
2022-03-02 22:26:05 +01:00
start = datetime.now()
2021-05-14 10:39:26 +02:00
program = 'fetchservers'
finish = start
2023-01-05 00:01:36 +01:00
db.save_time(program, start, finish)
now = start
ray_start = time.time()
2021-05-14 10:39:26 +02:00
2022-05-06 13:53:53 +02:00
try:
2023-01-05 00:01:36 +01:00
results = ray.get([getsoft.remote(server) for server in hostname_peers])
2021-05-14 10:39:26 +02:00
2022-05-06 13:53:53 +02:00
print(f"duration = {time.time() - ray_start}.\nprocessed servers: {len(results)}")
except:
pass
finish = datetime.now()
2023-01-05 00:01:36 +01:00
db.save_time(program, start, finish)