forkeado de spla/fediverse
Added fetchservers.py
This commit is contained in:
pare
322368dfb6
commit
a935ef8029
S'han modificat 4 arxius amb 358 adicions i 16 eliminacions
16
README.md
16
README.md
|
@ -13,13 +13,17 @@ At the end it post the results to host server bot account.
|
||||||
|
|
||||||
Within Python Virtual Environment:
|
Within Python Virtual Environment:
|
||||||
|
|
||||||
1. Run `python db-setup.py` to setup and create new Postgresql database and needed tables in it.
|
1. Run `pip install -r requirements.txt` to install needed libraries.
|
||||||
|
|
||||||
2. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use.
|
2. Run `python db-setup.py` to setup and create new Postgresql database and needed tables in it.
|
||||||
|
|
||||||
3. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world).
|
3. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use.
|
||||||
|
|
||||||
4. Run `python fediverse.py` to query world alive servers API. It gets data from server's API according this table:
|
4. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world).
|
||||||
|
|
||||||
|
5. Run `python fetchservers.py` to add servers to alive servers database.
|
||||||
|
|
||||||
|
6. Run `python fediverse.py` to query world alive servers API. It gets data from server's API according this table:
|
||||||
|
|
||||||
| Software | API peers | API users (nodeinfo/2.0.json) | API users (nodeinfo/2.0) | API users (api/v1/instance) | API users (main/nodeinfo/2.0) | API users (api/nodeinfo/2.0.json) | API users (api/nodeinfo) | Software |
|
| Software | API peers | API users (nodeinfo/2.0.json) | API users (nodeinfo/2.0) | API users (api/v1/instance) | API users (main/nodeinfo/2.0) | API users (api/nodeinfo/2.0.json) | API users (api/nodeinfo) | Software |
|
||||||
|:--------------:|:---------------------:|:------------------------------------------:|:----------------------------------------------------------------------------------------------:|:---------------------------:|:-----------------------------:|:---------------------------------:|:---------------------------:|:--------------:|
|
|:--------------:|:---------------------:|:------------------------------------------:|:----------------------------------------------------------------------------------------------:|:---------------------------:|:-----------------------------:|:---------------------------------:|:---------------------------:|:--------------:|
|
||||||
|
@ -69,9 +73,7 @@ Within Python Virtual Environment:
|
||||||
| writefreely | api/nodeinfo | ['software']['name'] |
|
| writefreely | api/nodeinfo | ['software']['name'] |
|
||||||
| zap | nodeinfo/2.0.json | ['software']['name'] |
|
| zap | nodeinfo/2.0.json | ['software']['name'] |
|
||||||
|
|
||||||
5. Use your favourite scheduling method to set `python fediverse.py` after `python getworld.py` to run regularly.
|
5. Use your favourite scheduling method to set `python fediverse.py` to run twice daily, `python fetchservers.py` one time daily and `python getworld.py` to run monthly.
|
||||||
|
|
||||||
Note: install all needed packages with 'pip install package' or use 'pip install -r requirements.txt' to install them.
|
|
||||||
|
|
||||||
18.2.21 - New feature! Added [Lemmy project](https://join.lemmy.ml)
|
18.2.21 - New feature! Added [Lemmy project](https://join.lemmy.ml)
|
||||||
12.5.21 - New feature! Added Wordpress support. The code can now detect Wordpress instances with ActivityPub enabled plugin.
|
12.5.21 - New feature! Added Wordpress support. The code can now detect Wordpress instances with ActivityPub enabled plugin.
|
||||||
|
|
242
fetchservers.py
Normal file
242
fetchservers.py
Normal file
|
@ -0,0 +1,242 @@
|
||||||
|
import time
|
||||||
|
start_time = time.time()
|
||||||
|
import urllib3
|
||||||
|
from urllib3 import exceptions
|
||||||
|
from datetime import datetime
|
||||||
|
from subprocess import call
|
||||||
|
from mastodon import Mastodon
|
||||||
|
import threading
|
||||||
|
import os
|
||||||
|
import json
|
||||||
|
import signal
|
||||||
|
import sys
|
||||||
|
import os.path
|
||||||
|
import requests
|
||||||
|
from requests import exceptions
|
||||||
|
import operator
|
||||||
|
import calendar
|
||||||
|
import psycopg2
|
||||||
|
from itertools import product
|
||||||
|
|
||||||
|
from multiprocessing import Pool, Lock, Process, Queue, current_process, Manager
|
||||||
|
import multiprocessing
|
||||||
|
|
||||||
|
import aiohttp
|
||||||
|
import aiodns
|
||||||
|
import asyncio
|
||||||
|
from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError
|
||||||
|
from asyncio import TimeoutError
|
||||||
|
import socket
|
||||||
|
from socket import gaierror, gethostbyname
|
||||||
|
|
||||||
|
from decimal import *
|
||||||
|
getcontext().prec = 2
|
||||||
|
|
||||||
|
apis = ['/nodeinfo/2.0?', '/nodeinfo/2.0.json?', '/main/nodeinfo/2.0?', '/api/statusnet/config?', '/api/nodeinfo/2.0.json?', '/api/nodeinfo?', '/api/v1/instance?','/wp-json/nodeinfo/2.0?']
|
||||||
|
|
||||||
|
client_exceptions = (
|
||||||
|
aiohttp.ClientResponseError,
|
||||||
|
aiohttp.ClientConnectionError,
|
||||||
|
aiohttp.ClientConnectorError,
|
||||||
|
aiohttp.ClientError,
|
||||||
|
asyncio.TimeoutError,
|
||||||
|
socket.gaierror,
|
||||||
|
)
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# INITIALISATION
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
def is_json(myjson):
|
||||||
|
try:
|
||||||
|
json_object = json.loads(myjson)
|
||||||
|
except ValueError as e:
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def write_api(server, software, users, alive, api, soft_version):
|
||||||
|
|
||||||
|
insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING"
|
||||||
|
conn = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version))
|
||||||
|
|
||||||
|
cur.execute("UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)", (now, software, users, alive, api, soft_version, server))
|
||||||
|
|
||||||
|
cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
except (Exception, psycopg2.DatabaseError) as error:
|
||||||
|
|
||||||
|
print(error)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
|
||||||
|
if conn is not None:
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
async def getsoft(server):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
socket.gethostbyname(server)
|
||||||
|
|
||||||
|
except socket.gaierror:
|
||||||
|
|
||||||
|
pass
|
||||||
|
return
|
||||||
|
|
||||||
|
soft = ''
|
||||||
|
|
||||||
|
url = 'https://' + server
|
||||||
|
|
||||||
|
timeout = aiohttp.ClientTimeout(total=3)
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
for api in apis:
|
||||||
|
try:
|
||||||
|
async with session.get(url+api) as response:
|
||||||
|
if response.status == 200:
|
||||||
|
try:
|
||||||
|
response_json = await response.json()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
except aiohttp.ClientConnectorError as err:
|
||||||
|
pass
|
||||||
|
else:
|
||||||
|
if response.status == 200 and api != '/api/v1/instance?':
|
||||||
|
try:
|
||||||
|
soft = response_json['software']['name']
|
||||||
|
soft = soft.lower()
|
||||||
|
soft_version = response_json['software']['version']
|
||||||
|
users = response_json['usage']['users']['total']
|
||||||
|
if users > 1000000:
|
||||||
|
return
|
||||||
|
alive = True
|
||||||
|
write_api(server, soft, users, alive, api, soft_version)
|
||||||
|
print("Server " + server + " (" + soft + " " + soft_version + ") is alive!")
|
||||||
|
return
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
if response.status == 200 and soft == '' and api == "/api/v1/instance?":
|
||||||
|
soft = 'mastodon'
|
||||||
|
users = response_json['stats']['user_count']
|
||||||
|
soft_version = response_json['version']
|
||||||
|
if users > 1000000:
|
||||||
|
return
|
||||||
|
alive = True
|
||||||
|
write_api(server, soft, users, alive, api)
|
||||||
|
print("Server " + server + " (" + soft + ") is alive!")
|
||||||
|
|
||||||
|
def getserver(server, x):
|
||||||
|
|
||||||
|
server = server[0].rstrip('.').lower()
|
||||||
|
|
||||||
|
if server.find(".") == -1:
|
||||||
|
return
|
||||||
|
if server.find("@") != -1:
|
||||||
|
return
|
||||||
|
if server.find("/") != -1:
|
||||||
|
return
|
||||||
|
if server.find(":") != -1:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
coroutines = [getsoft(server)]
|
||||||
|
soft = loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
|
||||||
|
|
||||||
|
except:
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Returns the parameter from the specified file
|
||||||
|
def get_parameter( parameter, file_path ):
|
||||||
|
# Check if secrets file exists
|
||||||
|
if not os.path.isfile(file_path):
|
||||||
|
print("File %s not found, exiting."%file_path)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
# Find parameter in file
|
||||||
|
with open( file_path ) as f:
|
||||||
|
for line in f:
|
||||||
|
if line.startswith( parameter ):
|
||||||
|
return line.replace(parameter + ":", "").strip()
|
||||||
|
|
||||||
|
# Cannot find parameter, exit
|
||||||
|
print(file_path + " Missing parameter %s "%parameter)
|
||||||
|
sys.exit(0)
|
||||||
|
|
||||||
|
|
||||||
|
# Load configuration from config file
|
||||||
|
config_filepath = "config/config.txt"
|
||||||
|
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
|
||||||
|
|
||||||
|
# Load database config from db_config file
|
||||||
|
db_config_filepath = "config/db_config.txt"
|
||||||
|
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
|
||||||
|
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# main
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
world_servers = []
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
conn = None
|
||||||
|
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
### get world servers list
|
||||||
|
|
||||||
|
cur.execute("select server from world where checked='f'")
|
||||||
|
|
||||||
|
for row in cur:
|
||||||
|
|
||||||
|
world_servers.append(row[0])
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
print("Remaining servers: " + str(len(world_servers)))
|
||||||
|
|
||||||
|
except (Exception, psycopg2.DatabaseError) as error:
|
||||||
|
|
||||||
|
print(error)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
|
||||||
|
if conn is not None:
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
###########################################################################
|
||||||
|
# multiprocessing!
|
||||||
|
|
||||||
|
m = Manager()
|
||||||
|
q = m.Queue()
|
||||||
|
z = zip(world_servers)
|
||||||
|
|
||||||
|
serv_number = len(world_servers)
|
||||||
|
|
||||||
|
pool_tuple = [(x, q) for x in z]
|
||||||
|
with Pool(processes=64) as pool:
|
||||||
|
pool.starmap(getserver, pool_tuple)
|
||||||
|
|
||||||
|
print('Done.')
|
||||||
|
|
115
getworld.py
115
getworld.py
|
@ -1,6 +1,3 @@
|
||||||
#!/usr/bin/env python
|
|
||||||
# -*- coding: utf-8 -*-
|
|
||||||
|
|
||||||
import time
|
import time
|
||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
from six.moves import urllib
|
from six.moves import urllib
|
||||||
|
@ -33,6 +30,7 @@ from socket import gaierror, gethostbyname
|
||||||
|
|
||||||
updated_at = datetime.now()
|
updated_at = datetime.now()
|
||||||
peers_api = '/api/v1/instance/peers?'
|
peers_api = '/api/v1/instance/peers?'
|
||||||
|
lemmy_api = '/api/v2/site?'
|
||||||
|
|
||||||
def is_json(myjson):
|
def is_json(myjson):
|
||||||
try:
|
try:
|
||||||
|
@ -41,6 +39,27 @@ def is_json(myjson):
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def get_lemmy_server(server):
|
||||||
|
|
||||||
|
if server.find(".") == -1:
|
||||||
|
return
|
||||||
|
if server.find("@") != -1:
|
||||||
|
return
|
||||||
|
if server.find("/") != -1:
|
||||||
|
return
|
||||||
|
if server.find(":") != -1:
|
||||||
|
return
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
loop = asyncio.get_event_loop()
|
||||||
|
coroutines = [get_lemmy_peers(server)]
|
||||||
|
loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
|
||||||
|
|
||||||
|
except:
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
def getserver(server):
|
def getserver(server):
|
||||||
|
|
||||||
if server.find(".") == -1:
|
if server.find(".") == -1:
|
||||||
|
@ -62,7 +81,7 @@ def getserver(server):
|
||||||
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
async def getpeers(server):
|
async def get_lemmy_peers(server):
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
|
@ -76,18 +95,93 @@ async def getpeers(server):
|
||||||
|
|
||||||
timeout = aiohttp.ClientTimeout(total=3)
|
timeout = aiohttp.ClientTimeout(total=3)
|
||||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
|
||||||
try:
|
try:
|
||||||
async with session.get(url+peers_api) as response:
|
async with session.get(url+lemmy_api) as resp:
|
||||||
if response.status == 200:
|
response = await resp.json()
|
||||||
|
if resp.status == 200:
|
||||||
try:
|
try:
|
||||||
response_json = await response.json()
|
data = response['federated_instances']['linked']
|
||||||
print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers")
|
print("Server: " + server + ", " + "federated with " + str(len(data)) + " servers")
|
||||||
i = 0
|
i = 0
|
||||||
while i < len(response_json):
|
while i < len(data):
|
||||||
|
|
||||||
saved_at = datetime.now()
|
saved_at = datetime.now()
|
||||||
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
|
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
|
||||||
conn = None
|
conn = None
|
||||||
|
try:
|
||||||
|
|
||||||
|
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
||||||
|
|
||||||
|
cur = conn.cursor()
|
||||||
|
|
||||||
|
cur.execute(insert_sql, (data[i], server, updated_at, saved_at,))
|
||||||
|
|
||||||
|
conn.commit()
|
||||||
|
|
||||||
|
cur.close()
|
||||||
|
|
||||||
|
except (Exception, psycopg2.DatabaseError) as error:
|
||||||
|
|
||||||
|
print(error)
|
||||||
|
|
||||||
|
finally:
|
||||||
|
|
||||||
|
if conn is not None:
|
||||||
|
|
||||||
|
conn.close()
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
except:
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
except aiohttp.ClientConnectorError as err:
|
||||||
|
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
async def getpeers(server):
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
socket.gethostbyname(server)
|
||||||
|
|
||||||
|
except socket.gaierror:
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
url = 'https://' + server
|
||||||
|
|
||||||
|
timeout = aiohttp.ClientTimeout(total=3)
|
||||||
|
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
async with session.get(url+peers_api) as resp:
|
||||||
|
|
||||||
|
response = await resp.json()
|
||||||
|
|
||||||
|
if resp.status == 200:
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
response_json = response
|
||||||
|
|
||||||
|
print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers")
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
while i < len(response_json):
|
||||||
|
|
||||||
|
saved_at = datetime.now()
|
||||||
|
|
||||||
|
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
|
||||||
|
|
||||||
|
conn = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
||||||
|
@ -172,6 +266,9 @@ headers={ 'Authorization': 'Bearer %s'%uc_access_token }
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
lemmy_server = 'lemmy.ml'
|
||||||
|
get_lemmy_server(lemmy_server)
|
||||||
|
|
||||||
getserver(mastodon_hostname)
|
getserver(mastodon_hostname)
|
||||||
self_peers = mastodon.instance_peers()
|
self_peers = mastodon.instance_peers()
|
||||||
|
|
||||||
|
|
|
@ -2,3 +2,4 @@ Mastodon.py>=1.5.1
|
||||||
psycopg2-binary>=2.8.4
|
psycopg2-binary>=2.8.4
|
||||||
aiohttp>=3.6.2
|
aiohttp>=3.6.2
|
||||||
aiodns>=2.0.0
|
aiodns>=2.0.0
|
||||||
|
matplotlib>=3.3.4
|
||||||
|
|
Loading…
Referencia en una nova incidència