2021-05-14 10:39:26 +02:00
|
|
|
import time
|
|
|
|
from datetime import datetime
|
|
|
|
import os
|
|
|
|
import json
|
|
|
|
import sys
|
|
|
|
import os.path
|
2023-01-05 00:01:36 +01:00
|
|
|
from setup import Setup
|
|
|
|
from database import Database
|
2022-03-13 18:06:07 +01:00
|
|
|
import requests
|
|
|
|
import urllib3
|
2021-05-14 10:39:26 +02:00
|
|
|
import socket
|
2022-03-13 18:06:07 +01:00
|
|
|
import ray
|
2023-01-05 00:01:36 +01:00
|
|
|
import pdb
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
#ray.init(num_cpus = 25) # Specify this system CPUs.
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-05-06 13:53:53 +02:00
|
|
|
from ray.exceptions import (
|
|
|
|
RaySystemError,
|
|
|
|
RayError,
|
|
|
|
RayTaskError,
|
|
|
|
ObjectStoreFullError,
|
|
|
|
)
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
apis = ['/api/v1/instance',
|
|
|
|
'/api/v1/nodeinfo',
|
|
|
|
'/nodeinfo/2.0',
|
|
|
|
'/nodeinfo/2.0.json',
|
|
|
|
'/nodeinfo/2.1.json',
|
|
|
|
'/main/nodeinfo/2.0',
|
|
|
|
'/api/statusnet/config',
|
|
|
|
'/api/nodeinfo/2.0.json',
|
|
|
|
'/api/nodeinfo',
|
|
|
|
'/wp-json/nodeinfo/2.0',
|
|
|
|
'/api/v1/instance/nodeinfo/2.0',
|
|
|
|
'/.well-known/x-nodeinfo2'
|
2022-03-13 18:06:07 +01:00
|
|
|
]
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
def is_json(myjson):
|
|
|
|
|
|
|
|
try:
|
|
|
|
json_object = json.loads(myjson)
|
|
|
|
except ValueError as e:
|
|
|
|
return False
|
|
|
|
return True
|
|
|
|
|
|
|
|
@ray.remote
|
|
|
|
def getsoft(server):
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
if server.find(".") == -1:
|
|
|
|
return
|
|
|
|
if server.find("@") != -1:
|
|
|
|
return
|
|
|
|
if server.find("/") != -1:
|
|
|
|
return
|
|
|
|
if server.find(":") != -1:
|
|
|
|
return
|
|
|
|
|
|
|
|
soft = ''
|
|
|
|
|
|
|
|
is_nodeinfo = False
|
|
|
|
|
|
|
|
url = 'https://' + server
|
|
|
|
|
2021-05-14 10:39:26 +02:00
|
|
|
try:
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
response = requests.get(url + '/.well-known/nodeinfo', headers = setup.user_agent, timeout=3)
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
if response.status_code == 200:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
try:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
response_json = response.json()
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
if len(response_json['links']) == 1:
|
|
|
|
|
|
|
|
nodeinfo = response_json['links'][0]['href'].replace(f'https://{server}','')
|
|
|
|
|
|
|
|
elif len(response_json['links']) == 2:
|
|
|
|
|
|
|
|
nodeinfo = response_json['links'][1]['href'].replace(f'https://{server}','')
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
try:
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
nodeinfo_data = requests.get(url + nodeinfo, headers = setup.user_agent, timeout=3)
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
if nodeinfo_data.status_code == 200:
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
nodeinfo_json = nodeinfo_data.json()
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
is_nodeinfo = True
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
else:
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
print(f"{nodeinfo} not responding: error code {nodeinfo_data.status_code}")
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
print(f'{server} is not responding: error code {response.status_code}')
|
2022-03-13 18:06:07 +01:00
|
|
|
print('*********************************************************************')
|
|
|
|
|
|
|
|
pass
|
|
|
|
else:
|
|
|
|
|
|
|
|
for api in apis:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
response = requests.get(url + api, headers = setup.user_agent, timeout=3)
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
if response.status_code == 200:
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
if is_json(response.text):
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
nodeinfo_json = response.json()
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
if 'software' in nodeinfo_json:
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
nodeinfo = api
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
is_nodeinfo = True
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
break
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
elif 'title' in nodeinfo_json:
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
if nodeinfo_json['title'] == 'Zap':
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
nodeinfo = api
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
is_nodeinfo = True
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
soft = 'zap'
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
break
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
elif 'version' in nodeinfo_json:
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
nodeinfo = api
|
2022-03-13 18:06:07 +01:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
is_nodeinfo = True
|
|
|
|
|
|
|
|
break
|
2022-03-13 18:06:07 +01:00
|
|
|
|
|
|
|
except:
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-02 22:26:05 +01:00
|
|
|
pass
|
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except requests.exceptions.SSLError as errssl:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except requests.exceptions.HTTPError as errh:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except requests.exceptions.ConnectionError as errc:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except requests.exceptions.ReadTimeout as to_err:
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except requests.exceptions.TooManyRedirects as tmr_err:
|
|
|
|
|
|
|
|
pass
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except urllib3.exceptions.LocationParseError as lp_err:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
|
|
|
|
|
|
|
except requests.exceptions.InvalidURL as iu_err:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
pass
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-05-06 13:53:53 +02:00
|
|
|
except requests.exceptions.ChunkedEncodingError as chunk_err:
|
|
|
|
|
|
|
|
print(f'ChunkedEncodingError! {server}')
|
|
|
|
pass
|
|
|
|
|
|
|
|
except ray.exceptions.RaySystemError as ray_sys_error:
|
|
|
|
|
|
|
|
print(ray_sys_error)
|
|
|
|
pass
|
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
else:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
if is_nodeinfo:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
if nodeinfo != '/api/v1/instance?':
|
2022-03-02 22:26:05 +01:00
|
|
|
|
|
|
|
if nodeinfo != '/.well-known/x-nodeinfo2?':
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
soft = nodeinfo_json['software']['name']
|
|
|
|
soft = soft.lower()
|
|
|
|
soft_version = nodeinfo_json['software']['version']
|
2023-01-05 00:01:36 +01:00
|
|
|
users = nodeinfo_json.get('usage').get('users').get('total') or '0'
|
2022-03-02 22:26:05 +01:00
|
|
|
if users > 1000000:
|
2021-10-26 13:38:12 +02:00
|
|
|
return
|
2023-01-05 00:01:36 +01:00
|
|
|
|
2022-03-02 22:26:05 +01:00
|
|
|
alive = True
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
db.write_api(server, soft, users, alive, nodeinfo, soft_version)
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
print(f"Server {server} ({soft} {soft_version}) is alive!")
|
2022-03-02 22:26:05 +01:00
|
|
|
print('*********************************************************************')
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2021-05-14 13:44:19 +02:00
|
|
|
return
|
|
|
|
|
2022-03-02 22:26:05 +01:00
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
soft = nodeinfo_json['server']['software']
|
|
|
|
soft = soft.lower()
|
|
|
|
soft_version = nodeinfo_json['server']['version']
|
|
|
|
users = nodeinfo_json['usage']['users']['total']
|
|
|
|
if users > 1000000:
|
|
|
|
return
|
2023-01-05 00:01:36 +01:00
|
|
|
|
2022-03-02 22:26:05 +01:00
|
|
|
alive = True
|
|
|
|
|
|
|
|
if soft == 'socialhome':
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
db.write_api(server, soft, users, alive, nodeinfo, soft_version)
|
2022-03-02 22:26:05 +01:00
|
|
|
|
|
|
|
print('*********************************************************************')
|
2022-03-13 18:06:07 +01:00
|
|
|
print(f"Server {serve}r ({soft} {soft_version}) is alive!")
|
2022-03-02 22:26:05 +01:00
|
|
|
print('*********************************************************************')
|
|
|
|
|
|
|
|
return
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
if soft == '' and nodeinfo == "/api/v1/instance?":
|
2022-03-02 22:26:05 +01:00
|
|
|
|
|
|
|
soft = 'mastodon'
|
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
try:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
users = nodeinfo_json['stats']['user_count']
|
|
|
|
|
|
|
|
if users > 1000000:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
return
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except:
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
users = 0
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
try:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
soft_version = nodeinfo_json['version']
|
2021-10-26 13:38:12 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
except:
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
soft_version = 'unknown'
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
alive = True
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
db.write_api(server, soft, users, alive, nodeinfo, soft_version)
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
print('*********************************************************************')
|
|
|
|
print(f"Server {server} ({soft}) is alive!")
|
|
|
|
|
|
|
|
elif soft == 'zap' and nodeinfo == "/api/v1/instance?":
|
|
|
|
|
|
|
|
soft = 'zap'
|
|
|
|
users = nodeinfo_json['stats']['user_count']
|
|
|
|
soft_version = nodeinfo_json['version']
|
|
|
|
alive = True
|
|
|
|
|
|
|
|
print(server, soft, users, alive, api)
|
|
|
|
|
|
|
|
print('*********************************************************************')
|
|
|
|
print(f"Server {server} ({soft}) is alive!")
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
print(f'Server {server} is dead')
|
|
|
|
print('*********************************************************************')
|
2021-05-14 10:39:26 +02:00
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# main
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
2021-05-14 13:44:19 +02:00
|
|
|
|
2022-03-13 18:06:07 +01:00
|
|
|
## name: fetchservers.py
|
2021-05-14 13:44:19 +02:00
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
setup = Setup()
|
|
|
|
|
|
|
|
db = Database()
|
|
|
|
|
|
|
|
res = requests.get('https://' + 'mastodon.social' + setup.peers_api, headers = setup.user_agent, timeout=3)
|
|
|
|
|
|
|
|
hostname_peers = res.json()
|
2022-03-02 22:26:05 +01:00
|
|
|
|
2022-03-14 12:48:10 +01:00
|
|
|
start = datetime.now()
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-14 12:48:10 +01:00
|
|
|
program = 'fetchservers'
|
|
|
|
|
|
|
|
finish = start
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
db.save_time(program, start, finish)
|
2022-03-14 12:48:10 +01:00
|
|
|
|
|
|
|
now = start
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
#world_servers = db.get_world_servers()
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-03-14 12:48:10 +01:00
|
|
|
ray_start = time.time()
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-05-06 13:53:53 +02:00
|
|
|
try:
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
#results = ray.get([getsoft.remote(server) for server in world_servers])
|
|
|
|
results = ray.get([getsoft.remote(server) for server in hostname_peers])
|
|
|
|
#[getsoft(server) for server in world_servers]
|
2021-05-14 10:39:26 +02:00
|
|
|
|
2022-05-06 13:53:53 +02:00
|
|
|
print(f"duration = {time.time() - ray_start}.\nprocessed servers: {len(results)}")
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
pass
|
2022-03-14 12:48:10 +01:00
|
|
|
|
|
|
|
finish = datetime.now()
|
|
|
|
|
2023-01-05 00:01:36 +01:00
|
|
|
db.save_time(program, start, finish)
|