Deleted getworld.py, new getpeers.py

This commit is contained in:
spla 2021-05-29 12:44:12 +02:00
pare 23842763ac
commit c9142101eb
S'han modificat 4 arxius amb 162 adicions i 288 eliminacions

Veure arxiu

@ -19,7 +19,7 @@ Within Python Virtual Environment:
3. Run `python setup.py` to get your bot's access token of your Mastodon or Pleroma server existing account. It will be saved to 'secrets/secrets.txt' for further use.
4. Run `python getworld.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world).
4. Run `python getpeers.py` to get all peers from your host and the whole world of fediverse's servers (or almost the whole world).
5. Run `python fetchservers.py` to add servers to alive servers database.

Veure arxiu

@ -65,6 +65,7 @@ def write_api(server, software, users, alive, api, soft_version):
async def getsoft(server):
try:
socket.gethostbyname(server)
@ -119,7 +120,7 @@ async def getsoft(server):
def getserver(server, x):
server = server[0].rstrip('.').lower()
if server.find(".") == -1:
return
if server.find("@") != -1:
@ -171,7 +172,7 @@ fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
# main
if __name__ == '__main__':
now = datetime.now()
start_time = time.time()
@ -181,8 +182,7 @@ if __name__ == '__main__':
conn = None
conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql",
port="5432")
conn = psycopg2.connect(database=fediverse_db, user=fediverse_db_user, password="", host="/var/run/postgresql", port="5432")
cur = conn.cursor()
@ -190,7 +190,10 @@ if __name__ == '__main__':
cur.execute("select server from world where checked='f'")
for row in cur:
rows = cur.fetchall()
for row in rows:
world_servers.append(row[0])
cur.close()

153
getpeers.py Normal file
Veure arxiu

@ -0,0 +1,153 @@
import os
import sys
import time
from datetime import datetime
import requests
import threading
import json
import psycopg2
import pdb
def write_servers(world_servers):
i = 0
while i < len(world_servers):
saved_at = datetime.now()
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
cur.execute(insert_sql, (world_servers[i], peer, updated_at, saved_at,))
print('writing to database ' + str(i) + ' of ' + str(len(world_servers)) + ': ' + world_servers[i])
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
i += 1
def get_peers(peer):
try:
response = requests.get('https://' + peer + peers_api, timeout=2)
response_json = response.json()
if response.status_code == 200:
try:
print("Server: " + peer + ", " + "federated with " + str(len(response_json)) + " servers")
for peer_peer in response_json:
exist_count = world_peers.count(peer_peer)
if exist_count == 0:
world_peers.append(peer_peer)
except:
pass
except:
pass
class Peerthreads(threading.Thread):
def __init__(self, peer, peers_semaphore):
threading.Thread.__init__(self)
self.sql_conn = None
self.peer = peer
self.peers_semaphore = peers_semaphore
def run(self):
self.peers_semaphore.acquire()
get_peers(self.peer)
self.peers_semaphore.release()
# Returns the parameter from the specified file
def get_parameter( parameter, file_path ):
# Check if secrets file exists
if not os.path.isfile(file_path):
print("File %s not found, exiting."%file_path)
sys.exit(0)
# Find parameter in file
with open( file_path ) as f:
for line in f:
if line.startswith( parameter ):
return line.replace(parameter + ":", "").strip()
# Cannot find parameter, exit
print(file_path + " Missing parameter %s "%parameter)
sys.exit(0)
###############################################################################
# main
if __name__ == '__main__':
start_time = time.time()
updated_at = datetime.now()
peers_api = '/api/v1/instance/peers?'
lemmy_api = '/api/v2/site?'
# Load configuration from config file
config_filepath = "config/config.txt"
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
# Load database config from db_config file
db_config_filepath = "config/db_config.txt"
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
world_peers = []
res = requests.get('https://' + mastodon_hostname + peers_api)
hostname_peers = res.json()
for peer in hostname_peers:
exist_count = world_peers.count(peer)
if exist_count == 0:
world_peers.append(peer)
peers_semaphore = threading.Semaphore(100)
peer_threads = []
for peer in hostname_peers:
p_thread = Peerthreads(peer, peers_semaphore)
peer_threads.append(p_thread)
p_thread.start()
for p_thread in peer_threads:
p_thread.join()
write_servers(world_peers)
exec_time = str(round((time.time() - start_time), 2))
print(exec_time)

Veure arxiu

@ -1,282 +0,0 @@
import time
start_time = time.time()
from six.moves import urllib
from datetime import datetime
from subprocess import call
from mastodon import Mastodon
import threading
import os
import json
import signal
import sys
import os.path
import requests
import operator
import calendar
import psycopg2
from itertools import product
from multiprocessing import Pool, Lock, Process, Queue, current_process
import queue
import multiprocessing
import aiohttp
import aiodns
import asyncio
from aiohttp import ClientError, ClientSession, ClientConnectionError, ClientConnectorError, ClientSSLError, ClientConnectorSSLError, ServerTimeoutError
from asyncio import TimeoutError
import socket
from socket import gaierror, gethostbyname
updated_at = datetime.now()
peers_api = '/api/v1/instance/peers?'
lemmy_api = '/api/v2/site?'
def is_json(myjson):
try:
json_object = json.loads(myjson)
except ValueError as e:
return False
return True
def get_lemmy_server(server):
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
try:
loop = asyncio.get_event_loop()
coroutines = [get_lemmy_peers(server)]
loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
except:
pass
def getserver(server):
if server.find(".") == -1:
return
if server.find("@") != -1:
return
if server.find("/") != -1:
return
if server.find(":") != -1:
return
try:
loop = asyncio.get_event_loop()
coroutines = [getpeers(server)]
loop.run_until_complete(asyncio.gather(*coroutines, return_exceptions=True))
except:
pass
async def get_lemmy_peers(server):
try:
socket.gethostbyname(server)
except socket.gaierror:
return
url = 'https://' + server
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
async with session.get(url+lemmy_api) as resp:
response = await resp.json()
if resp.status == 200:
try:
data = response['federated_instances']['linked']
print("Server: " + server + ", " + "federated with " + str(len(data)) + " servers")
i = 0
while i < len(data):
saved_at = datetime.now()
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
cur.execute(insert_sql, (data[i], server, updated_at, saved_at,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
i += 1
except:
pass
except aiohttp.ClientConnectorError as err:
pass
async def getpeers(server):
try:
socket.gethostbyname(server)
except socket.gaierror:
return
url = 'https://' + server
timeout = aiohttp.ClientTimeout(total=3)
async with aiohttp.ClientSession(timeout=timeout) as session:
try:
async with session.get(url+peers_api) as resp:
response = await resp.json()
if resp.status == 200:
try:
response_json = response
print("Server: " + server + ", " + "federated with " + str(len(response_json)) + " servers")
i = 0
while i < len(response_json):
saved_at = datetime.now()
insert_sql = "INSERT INTO world(server, federated_with, updated_at, saved_at) VALUES(%s,%s,%s,%s) ON CONFLICT DO NOTHING"
conn = None
try:
conn = psycopg2.connect(database = fediverse_db, user = fediverse_db_user, password = "", host = "/var/run/postgresql", port = "5432")
cur = conn.cursor()
cur.execute(insert_sql, (response_json[i], server, updated_at, saved_at,))
conn.commit()
cur.close()
except (Exception, psycopg2.DatabaseError) as error:
print(error)
finally:
if conn is not None:
conn.close()
i += 1
except:
pass
except aiohttp.ClientConnectorError as err:
pass
###############################################################################
# INITIALISATION
###############################################################################
# Returns the parameter from the specified file
def get_parameter( parameter, file_path ):
# Check if secrets file exists
if not os.path.isfile(file_path):
print("File %s not found, exiting."%file_path)
sys.exit(0)
# Find parameter in file
with open( file_path ) as f:
for line in f:
if line.startswith( parameter ):
return line.replace(parameter + ":", "").strip()
# Cannot find parameter, exit
print(file_path + " Missing parameter %s "%parameter)
sys.exit(0)
# Load secrets from secrets file
secrets_filepath = "secrets/secrets.txt"
uc_client_id = get_parameter("uc_client_id", secrets_filepath)
uc_client_secret = get_parameter("uc_client_secret", secrets_filepath)
uc_access_token = get_parameter("uc_access_token", secrets_filepath)
# Load configuration from config file
config_filepath = "config/config.txt"
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
# Load database config from db_config file
db_config_filepath = "config/db_config.txt"
fediverse_db = get_parameter("fediverse_db", db_config_filepath)
fediverse_db_user = get_parameter("fediverse_db_user", db_config_filepath)
# Initialise Mastodon API
mastodon = Mastodon(
client_id = uc_client_id,
client_secret = uc_client_secret,
access_token = uc_access_token,
api_base_url = 'https://' + mastodon_hostname,
)
# Initialise access headers
headers={ 'Authorization': 'Bearer %s'%uc_access_token }
###############################################################################
# main
if __name__ == '__main__':
lemmy_server = 'lemmy.ml'
get_lemmy_server(lemmy_server)
getserver(mastodon_hostname)
self_peers = mastodon.instance_peers()
###########################################################################
nprocs = multiprocessing.cpu_count()
with multiprocessing.Pool(processes=nprocs) as pool:
results = pool.starmap(getserver, product(self_peers))
exec_time = str(round((time.time() - start_time), 2))
print(exec_time)