Added fetchservers.py

This commit is contained in:
spla 2021-05-14 10:39:26 +02:00
pare 322368dfb6
commit a935ef8029
S'han modificat 4 arxius amb 358 adicions i 16 eliminacions

Veure arxiu

@ -13,13 +13,17 @@ At the end it post the results to host server bot account.
Within Python Virtual Environment:
1. Run `python db-setup.py` to setup and create new Postgresql database and needed tables in it.
1. Run `pip install -r requirements.txt` to install needed libraries.
2. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use.
2. Run `python db-setup.py` to setup and create new Postgresql database and needed tables in it.
3. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world).
3. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use.
4. Run `python fediverse.py` to query world alive servers API. It gets data from server's API according this table:
4. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world).
5. Run `python fetchservers.py` to add servers to alive servers database.
6. Run `python fediverse.py` to query world alive servers API. It gets data from server's API according this table:
| Software | API peers | API users (nodeinfo/2.0.json) | API users (nodeinfo/2.0) | API users (api/v1/instance) | API users (main/nodeinfo/2.0) | API users (api/nodeinfo/2.0.json) | API users (api/nodeinfo) | Software |
|:--------------:|:---------------------:|:------------------------------------------:|:----------------------------------------------------------------------------------------------:|:---------------------------:|:-----------------------------:|:---------------------------------:|:---------------------------:|:--------------:|
@ -69,9 +73,7 @@ Within Python Virtual Environment:
| writefreely | api/nodeinfo | ['software']['name'] |
| zap | nodeinfo/2.0.json | ['software']['name'] |
5. Use your favourite scheduling method to set `python fediverse.py` after `python getworld.py` to run regularly.
Note: install all needed packages with 'pip install package' or use 'pip install -r requirements.txt' to install them.
5. Use your favourite scheduling method to set `python fediverse.py` to run twice daily, `python fetchservers.py` one time daily and `python getworld.py` to run monthly.
18.2.21 - New feature! Added [Lemmy project](https://join.lemmy.ml)
12.5.21 - New feature! Added Wordpress support. The code can now detect Wordpress instances with ActivityPub enabled plugin.

242
fetchservers.py Normal file
Veure arxiu

@ -0,0 +1,242 @@
import time
start_time = time.time()
import urllib3
from urllib3 import exceptions
from datetime import datetime
from subprocess import call
from mastodon import Mastodon
import threading
import os
import json
import signal
import sys
import os.path
import requests
from requests import exceptions
import operator
import calendar
import psycopg2
from itertools import product
from multiprocessing import Pool, Lock, Process, Queue, current_process, Manager
import multiprocessing
import aiohttp
import aiodns
import asyncio
from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError
from asyncio import TimeoutError
import socket
from socket import gaierror, gethostbyname
from decimal import *
getcontext().prec = 2
apis = ['/nodeinfo/2.0?', '/nodeinfo/2.0.json?', '/main/nodeinfo/2.0?', '/api/statusnet/config?', '/api/nodeinfo/2.0.json?', '/api/nodeinfo?', '/api/v1/instance?','/wp-json/nodeinfo/2.0?']
client_exceptions = (
aiohttp.ClientResponseError,
aiohttp.ClientConnectionError,
aiohttp.ClientConnectorError,
aiohttp.ClientError,
asyncio.TimeoutError,
socket.gaierror,
)
now = datetime.now()
###############################################################################
# INITIALISATION
###############################################################################
def is_json(myjson):
try:
json_object = json.loads(myjson)
except ValueError as e:
return False
return True
def write_api(server, software, users, alive, api, soft_version):
insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version))
cur.execute("UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)", (now, software, users, alive, api, soft_version, server))
cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
async def getsoft(server):
try:
socket.gethostbyname(server)
except socket.gaierror:
pass
return
soft = ''
url = 'https://' + server
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
for api in apis:
try:
async with session.get(url+api) as response:
if response.status == 200:
try:
response_json = await response.json()
except:
pass
except aiohttp.ClientConnectorError as err:
pass
else:
if response.status == 200 and api != '/api/v1/instance?':
try:
soft = response_json['software']['name']
soft = soft.lower()
soft_version = response_json['software']['version']
users = response_json['usage']['users']['total']
if users > 1000000:
return
alive = True
write_api(server, soft, users, alive, api, soft_version)
print("Server " + server + " (" + soft + " " + soft_version + ") is alive!")
return
except:
pass
if response.status == 200 and soft == '' and api == "/api/v1/instance?":
soft = 'mastodon'
users = response_json['stats']['user_count']
soft_version = response_json['version']
if users > 1000000:
return
alive = True
write_api(server, soft, users, alive, api)
print("Server " + server + " (" + soft + ") is alive!")
def getserver(server, x):
server = server[0].rstrip('.').lower()
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
try:
loop = asyncio.get_event_loop()
coroutines = [getsoft(server)]
soft = loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
except:
pass
# Returns the parameter from the specified file
def get_parameter( parameter, file_path ):
# Check if secrets file exists
if not os.path.isfile(file_path):
print("File %s not found, exiting."%file_path)
sys.exit(0)
# Find parameter in file
with open( file_path ) as f:
for line in f:
if line.startswith( parameter ):
return line.replace(parameter + ":", "").strip()
# Cannot find parameter, exit
print(file_path + " Missing parameter %s "%parameter)
sys.exit(0)
# Load configuration from config file
config_filepath = "config/config.txt"
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
# Load database config from db_config file
db_config_filepath = "config/db_config.txt"
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
###############################################################################
# main
if __name__ == '__main__':
world_servers = []
try:
conn = None
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
### get world servers list
cur.execute("select server from world where checked='f'")
for row in cur:
world_servers.append(row[0])
cur.close()
print("Remaining servers: " + str(len(world_servers)))
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
###########################################################################
# multiprocessing!
m = Manager()
q = m.Queue()
z = zip(world_servers)
serv_number = len(world_servers)
pool_tuple = [(x, q) for x in z]
with Pool(processes=64) as pool:
pool.starmap(getserver, pool_tuple)
print('Done.')

Veure arxiu

@ -1,6 +1,3 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import time
start_time = time.time()
from six.moves import urllib
@ -33,6 +30,7 @@ from socket import gaierror, gethostbyname
updated_at = datetime.now()
peers_api = '/api/v1/instance/peers?'
lemmy_api = '/api/v2/site?'
def is_json(myjson):
try:
@ -41,6 +39,27 @@ def is_json(myjson):
return False
return True
def get_lemmy_server(server):
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
try:
loop = asyncio.get_event_loop()
coroutines = [get_lemmy_peers(server)]
loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
except:
pass
def getserver(server):
if server.find(".") == -1:
@ -62,7 +81,7 @@ def getserver(server):
pass
async def getpeers(server):
async def get_lemmy_peers(server):
try:
@ -76,18 +95,93 @@ async def getpeers(server):
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
async with session.get(url+peers_api) as response:
if response.status == 200:
async with session.get(url+lemmy_api) as resp:
response = await resp.json()
if resp.status == 200:
try:
response_json = await response.json()
print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers")
data = response['federated_instances']['linked']
print("Server: " + server + ", " + "federated with " + str(len(data)) + " servers")
i = 0
while i < len(response_json):
while i < len(data):
saved_at = datetime.now()
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
cur.execute(insert_sql, (data[i], server, updated_at, saved_at,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
i += 1
except:
pass
except aiohttp.ClientConnectorError as err:
pass
async def getpeers(server):
try:
socket.gethostbyname(server)
except socket.gaierror:
return
url = 'https://' + server
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
async with session.get(url+peers_api) as resp:
response = await resp.json()
if resp.status == 200:
try:
response_json = response
print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers")
i = 0
while i < len(response_json):
saved_at = datetime.now()
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
@ -172,6 +266,9 @@ headers={ 'Authorization': 'Bearer %s'%uc_access_token }
if __name__ == '__main__':
lemmy_server = 'lemmy.ml'
get_lemmy_server(lemmy_server)
getserver(mastodon_hostname)
self_peers = mastodon.instance_peers()

Veure arxiu

@ -2,3 +2,4 @@ Mastodon.py>=1.5.1
psycopg2-binary>=2.8.4
aiohttp>=3.6.2
aiodns>=2.0.0
matplotlib>=3.3.4