diff --git a/README.md b/README.md index f86d068..22a3305 100644 --- a/README.md +++ b/README.md @@ -13,13 +13,17 @@ At the end it post the results to host server bot account. Within Python Virtual Environment: -1. Run `python db-setup.py` to setup and create new Postgresql database and needed tables in it. +1. Run `pip install -r requirements.txt` to install needed libraries. -2. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use. +2. Run `python db-setup.py` to setup and create new Postgresql database and needed tables in it. -3. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world). +3. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use. -4. Run `python fediverse.py` to query world alive servers API. It gets data from server's API according this table: +4. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world). + +5. Run `python fetchservers.py` to add servers to alive servers database. + +6. Run `python fediverse.py` to query world alive servers API. It gets data from server's API according this table: | Software | API peers | API users (nodeinfo/2.0.json) | API users (nodeinfo/2.0) | API users (api/v1/instance) | API users (main/nodeinfo/2.0) | API users (api/nodeinfo/2.0.json) | API users (api/nodeinfo) | Software | |:--------------:|:---------------------:|:------------------------------------------:|:----------------------------------------------------------------------------------------------:|:---------------------------:|:-----------------------------:|:---------------------------------:|:---------------------------:|:--------------:| @@ -69,9 +73,7 @@ Within Python Virtual Environment: | writefreely | api/nodeinfo | ['software']['name'] | | zap | nodeinfo/2.0.json | ['software']['name'] | -5. Use your favourite scheduling method to set `python fediverse.py` after `python getworld.py` to run regularly. - -Note: install all needed packages with 'pip install package' or use 'pip install -r requirements.txt' to install them. +5. Use your favourite scheduling method to set `python fediverse.py` to run twice daily, `python fetchservers.py` one time daily and `python getworld.py` to run monthly. 18.2.21 - New feature! Added [Lemmy project](https://join.lemmy.ml) 12.5.21 - New feature! Added Wordpress support. The code can now detect Wordpress instances with ActivityPub enabled plugin. diff --git a/fetchservers.py b/fetchservers.py new file mode 100644 index 0000000..d8b2278 --- /dev/null +++ b/fetchservers.py @@ -0,0 +1,242 @@ +import time +start_time = time.time() +import urllib3 +from urllib3 import exceptions +from datetime import datetime +from subprocess import call +from mastodon import Mastodon +import threading +import os +import json +import signal +import sys +import os.path +import requests +from requests import exceptions +import operator +import calendar +import psycopg2 +from itertools import product + +from multiprocessing import Pool, Lock, Process, Queue, current_process, Manager +import multiprocessing + +import aiohttp +import aiodns +import asyncio +from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError +from asyncio import TimeoutError +import socket +from socket import gaierror, gethostbyname + +from decimal import * +getcontext().prec = 2 + +apis = ['/nodeinfo/2.0?', '/nodeinfo/2.0.json?', '/main/nodeinfo/2.0?', '/api/statusnet/config?', '/api/nodeinfo/2.0.json?', '/api/nodeinfo?', '/api/v1/instance?','/wp-json/nodeinfo/2.0?'] + +client_exceptions = ( + aiohttp.ClientResponseError, + aiohttp.ClientConnectionError, + aiohttp.ClientConnectorError, + aiohttp.ClientError, + asyncio.TimeoutError, + socket.gaierror, +) + +now = datetime.now() + +############################################################################### +# INITIALISATION +############################################################################### + +def is_json(myjson): + try: + json_object = json.loads(myjson) + except ValueError as e: + return False + return True + +def write_api(server, software, users, alive, api, soft_version): + + insert_sql = "INSERT INTO fediverse(server, updated_at, software, users, alive, users_api, version) VALUES(%s,%s,%s,%s,%s,%s,%s) ON CONFLICT DO NOTHING" + conn = None + + try: + + conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") + + cur = conn.cursor() + + cur.execute(insert_sql, (server, now, software, users, alive, api, soft_version)) + + cur.execute("UPDATE fediverse SET updated_at=(%s), software=(%s), users=(%s), alive=(%s), users_api=(%s), version=(%s) where server=(%s)", (now, software, users, alive, api, soft_version, server)) + + cur.execute("UPDATE world SET checked='t' where server=(%s)", (server,)) + + conn.commit() + + cur.close() + + except (Exception, psycopg2.DatabaseError) as error: + + print(error) + + finally: + + if conn is not None: + + conn.close() + +async def getsoft(server): + + try: + + socket.gethostbyname(server) + + except socket.gaierror: + + pass + return + + soft = '' + + url = 'https://' + server + + timeout = aiohttp.ClientTimeout(total=3) + async with aiohttp.ClientSession(timeout=timeout) as session: + for api in apis: + try: + async with session.get(url+api) as response: + if response.status == 200: + try: + response_json = await response.json() + except: + pass + except aiohttp.ClientConnectorError as err: + pass + else: + if response.status == 200 and api != '/api/v1/instance?': + try: + soft = response_json['software']['name'] + soft = soft.lower() + soft_version = response_json['software']['version'] + users = response_json['usage']['users']['total'] + if users > 1000000: + return + alive = True + write_api(server, soft, users, alive, api, soft_version) + print("Server " + server + " (" + soft + " " + soft_version + ") is alive!") + return + except: + pass + if response.status == 200 and soft == '' and api == "/api/v1/instance?": + soft = 'mastodon' + users = response_json['stats']['user_count'] + soft_version = response_json['version'] + if users > 1000000: + return + alive = True + write_api(server, soft, users, alive, api) + print("Server " + server + " (" + soft + ") is alive!") + +def getserver(server, x): + + server = server[0].rstrip('.').lower() + + if server.find(".") == -1: + return + if server.find("@") != -1: + return + if server.find("/") != -1: + return + if server.find(":") != -1: + return + + try: + + loop = asyncio.get_event_loop() + coroutines = [getsoft(server)] + soft = loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True)) + + except: + + pass + +# Returns the parameter from the specified file +def get_parameter( parameter, file_path ): + # Check if secrets file exists + if not os.path.isfile(file_path): + print("File %s not found, exiting."%file_path) + sys.exit(0) + + # Find parameter in file + with open( file_path ) as f: + for line in f: + if line.startswith( parameter ): + return line.replace(parameter + ":", "").strip() + + # Cannot find parameter, exit + print(file_path + " Missing parameter %s "%parameter) + sys.exit(0) + + +# Load configuration from config file +config_filepath = "config/config.txt" +mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) + +# Load database config from db_config file +db_config_filepath = "config/db_config.txt" +fediverse_db = get_parameter("fediverse_db", db_config_filepath) +fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath) + +############################################################################### +# main + +if __name__ == '__main__': + + world_servers = [] + + try: + + conn = None + conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") + + cur = conn.cursor() + + ### get world servers list + + cur.execute("select server from world where checked='f'") + + for row in cur: + + world_servers.append(row[0]) + + cur.close() + + print("Remaining servers: " + str(len(world_servers))) + + except (Exception, psycopg2.DatabaseError) as error: + + print(error) + + finally: + + if conn is not None: + + conn.close() + + ########################################################################### + # multiprocessing! + + m = Manager() + q = m.Queue() + z = zip(world_servers) + + serv_number = len(world_servers) + + pool_tuple = [(x, q) for x in z] + with Pool(processes=64) as pool: + pool.starmap(getserver, pool_tuple) + + print('Done.') + diff --git a/getworld.py b/getworld.py index d09cb7e..506e797 100644 --- a/getworld.py +++ b/getworld.py @@ -1,6 +1,3 @@ -#!/usr/bin/env python -# -*- coding: utf-8 -*- - import time start_time = time.time() from six.moves import urllib @@ -33,6 +30,7 @@ from socket import gaierror, gethostbyname updated_at = datetime.now() peers_api = '/api/v1/instance/peers?' +lemmy_api = '/api/v2/site?' def is_json(myjson): try: @@ -41,6 +39,27 @@ def is_json(myjson): return False return True +def get_lemmy_server(server): + + if server.find(".") == -1: + return + if server.find("@") != -1: + return + if server.find("/") != -1: + return + if server.find(":") != -1: + return + + try: + + loop = asyncio.get_event_loop() + coroutines = [get_lemmy_peers(server)] + loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True)) + + except: + + pass + def getserver(server): if server.find(".") == -1: @@ -62,7 +81,7 @@ def getserver(server): pass -async def getpeers(server): +async def get_lemmy_peers(server): try: @@ -76,18 +95,93 @@ async def getpeers(server): timeout = aiohttp.ClientTimeout(total=3) async with aiohttp.ClientSession(timeout=timeout) as session: + try: - async with session.get(url+peers_api) as response: - if response.status == 200: + async with session.get(url+lemmy_api) as resp: + response = await resp.json() + if resp.status == 200: try: - response_json = await response.json() - print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers") + data = response['federated_instances']['linked'] + print("Server: " + server + ", " + "federated with " + str(len(data)) + " servers") i = 0 - while i < len(response_json): + while i < len(data): saved_at = datetime.now() insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING" conn = None + try: + + conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") + + cur = conn.cursor() + + cur.execute(insert_sql, (data[i], server, updated_at, saved_at,)) + + conn.commit() + + cur.close() + + except (Exception, psycopg2.DatabaseError) as error: + + print(error) + + finally: + + if conn is not None: + + conn.close() + + i += 1 + + except: + + pass + + except aiohttp.ClientConnectorError as err: + + pass + + +async def getpeers(server): + + try: + + socket.gethostbyname(server) + + except socket.gaierror: + + return + + url = 'https://' + server + + timeout = aiohttp.ClientTimeout(total=3) + + async with aiohttp.ClientSession(timeout=timeout) as session: + + try: + + async with session.get(url+peers_api) as resp: + + response = await resp.json() + + if resp.status == 200: + + try: + + response_json = response + + print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers") + + i = 0 + + while i < len(response_json): + + saved_at = datetime.now() + + insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING" + + conn = None + try: conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432") @@ -172,6 +266,9 @@ headers={ 'Authorization': 'Bearer %s'%uc_access_token } if __name__ == '__main__': + lemmy_server = 'lemmy.ml' + get_lemmy_server(lemmy_server) + getserver(mastodon_hostname) self_peers = mastodon.instance_peers() diff --git a/requirements.txt b/requirements.txt index 6fdabf0..d1dfc34 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,3 +2,4 @@ Mastodon.py>=1.5.1 psycopg2-binary>=2.8.4 aiohttp>=3.6.2 aiodns>=2.0.0 +matplotlib>=3.3.4