2021-09-10 20:32:11 +02:00
|
|
|
import os
|
|
|
|
import feedparser
|
2021-09-29 19:06:31 +02:00
|
|
|
from bs4 import BeautifulSoup
|
2021-09-10 20:32:11 +02:00
|
|
|
from mastodon import Mastodon
|
|
|
|
import psycopg2
|
|
|
|
import sys
|
|
|
|
import time
|
2021-09-29 19:06:31 +02:00
|
|
|
import requests
|
|
|
|
import shutil
|
2021-09-10 20:32:11 +02:00
|
|
|
import tweepy
|
2021-10-13 13:32:26 +02:00
|
|
|
from tweepy import TweepyException
|
2021-09-10 20:32:11 +02:00
|
|
|
import logging
|
2021-10-13 13:32:26 +02:00
|
|
|
import filetype
|
|
|
|
import ffmpeg
|
2021-09-10 20:32:11 +02:00
|
|
|
import pdb
|
|
|
|
|
|
|
|
logger = logging.getLogger()
|
|
|
|
|
2021-10-08 14:30:00 +02:00
|
|
|
def get_toot(title):
|
|
|
|
|
|
|
|
soup = BeautifulSoup(title, 'html.parser')
|
|
|
|
|
|
|
|
toot_text = soup.get_text()
|
|
|
|
|
|
|
|
sub_str = 'http'
|
|
|
|
find_link = toot_text.find(sub_str)
|
|
|
|
if find_link != -1:
|
|
|
|
|
|
|
|
tuit_text = toot_text[:toot_text.index(sub_str)]
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
tuit_text = toot_text
|
|
|
|
|
|
|
|
links_lst = ''
|
|
|
|
for links in soup.find_all('a'):
|
|
|
|
find_tag = links.get('href').find('/tags/')
|
|
|
|
if find_tag == -1:
|
|
|
|
links_lst += links.get('href')
|
|
|
|
|
|
|
|
if len(links_lst) > 0:
|
|
|
|
|
|
|
|
last_text = toot_text[len(tuit_text) + len(links_lst):]
|
|
|
|
|
|
|
|
else:
|
|
|
|
|
|
|
|
last_text = ''
|
|
|
|
|
|
|
|
tuit_text = f'{tuit_text} {links_lst} {last_text}'
|
|
|
|
|
|
|
|
return tuit_text
|
|
|
|
|
2021-10-15 13:15:52 +02:00
|
|
|
def compose_tweet(tuit_text, with_images, is_reply):
|
2021-10-16 15:47:21 +02:00
|
|
|
images_ids = []
|
|
|
|
|
|
|
|
if with_images:
|
|
|
|
for image in images_list:
|
|
|
|
file_ = filetype.guess('images/' + image)
|
|
|
|
is_video = True if file_.mime == 'video/mp4' else False
|
|
|
|
# It's a video, let's do some processing...
|
|
|
|
if is_video:
|
|
|
|
try:
|
|
|
|
ffmpeg_probe = ffmpeg.probe(f'images/{image}')
|
|
|
|
video_duration = float(
|
|
|
|
ffmpeg_probe['streams'][0]['duration']
|
|
|
|
)
|
|
|
|
except Exception as e:
|
|
|
|
print(f'Error while trying to probe {image}\n{e}')
|
|
|
|
sys.exit(e)
|
|
|
|
|
|
|
|
if video_duration > 139:
|
|
|
|
print(f'video duration is too large: {video_duration}')
|
|
|
|
# We could potentially use ffmpeg to shorten the video
|
|
|
|
# We skip to the next image
|
|
|
|
continue
|
|
|
|
|
|
|
|
try:
|
|
|
|
# Now let's uplaod the media...
|
|
|
|
media_upload = api.media_upload(
|
|
|
|
f'images/{image}',
|
|
|
|
media_category='tweet_video' if is_video else'tweet_image'
|
|
|
|
)
|
|
|
|
|
|
|
|
if media_upload.processing_info['state'] == 'succeeded':
|
|
|
|
images_ids.append(media_upload.media_id)
|
|
|
|
|
|
|
|
except TweepyException as err:
|
|
|
|
print('Error while uploading media!\n')
|
|
|
|
sys.exit(err)
|
|
|
|
|
|
|
|
# Compose tuit
|
2021-10-16 15:58:41 +02:00
|
|
|
tuit_text2 = ''
|
|
|
|
|
|
|
|
if len(tuit_text) > 280:
|
|
|
|
tuit_max_length = 250 if with_images else 275
|
2021-10-16 15:47:21 +02:00
|
|
|
|
|
|
|
tuit_text = '{0} (1/2)'.format(
|
|
|
|
tuit_text[:tuit_max_length].rsplit(' ', 1)[0]
|
|
|
|
)
|
|
|
|
tuit_text2 = '{0} (2/2)'.format(
|
|
|
|
tuit_text[len(tuit_text) - 6:] # Why minus 6?
|
|
|
|
)
|
|
|
|
|
|
|
|
try:
|
|
|
|
first_tweet = api.update_status(
|
|
|
|
status=tuit_text,
|
|
|
|
# No idea where `tweet_id` is defined/coming from
|
|
|
|
in_reply_to_status_id=tweet_id if is_reply else ''
|
|
|
|
)
|
|
|
|
|
|
|
|
tweet = api.update_status(
|
|
|
|
status=tuit_text2,
|
|
|
|
in_reply_to_status_id=first_tweet.id,
|
|
|
|
media_ids=images_ids
|
|
|
|
)
|
|
|
|
|
|
|
|
except TweepyException as err:
|
|
|
|
print('Error while trying to publish split tweet.\n')
|
|
|
|
sys.exit(err)
|
2021-10-15 13:15:52 +02:00
|
|
|
|
|
|
|
|
2021-10-16 15:47:21 +02:00
|
|
|
else:
|
|
|
|
try:
|
|
|
|
tweet = api.update_status(
|
|
|
|
status=tuit_text,
|
|
|
|
# No idea where `tweet_id` is defined/coming from
|
|
|
|
in_reply_to_status_id=tweet_id if is_reply else '',
|
|
|
|
media_ids=images_ids # defaults to empty list
|
|
|
|
)
|
|
|
|
except TweepyException as err:
|
|
|
|
print('Errror while trying to publish tweet.\n')
|
|
|
|
sys.exit(err)
|
|
|
|
|
|
|
|
return tweet
|
2021-10-15 13:15:52 +02:00
|
|
|
|
2021-10-07 20:23:17 +02:00
|
|
|
def get_tweet_id(toot_id):
|
|
|
|
|
|
|
|
tweet_id = 0
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
conn = None
|
|
|
|
|
|
|
|
conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
|
|
|
|
|
|
|
cur = conn.cursor()
|
|
|
|
|
|
|
|
cur.execute('select tweet_id from id where toot_id=(%s)', (toot_id,))
|
|
|
|
|
|
|
|
row = cur.fetchone()
|
|
|
|
|
|
|
|
if row != None:
|
|
|
|
|
|
|
|
tweet_id = row[0]
|
|
|
|
|
|
|
|
cur.close()
|
|
|
|
|
|
|
|
return(tweet_id)
|
|
|
|
|
|
|
|
except (Exception, psycopg2.DatabaseError) as error:
|
|
|
|
|
|
|
|
print(error)
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
|
|
if conn is not None:
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
2021-09-29 19:06:31 +02:00
|
|
|
def write_image(image_url):
|
2021-09-10 20:32:11 +02:00
|
|
|
|
2021-09-29 19:06:31 +02:00
|
|
|
if not os.path.exists('images'):
|
|
|
|
os.makedirs('images')
|
|
|
|
filename = image_url.split("/") [-1]
|
|
|
|
r = requests.get(image_url, stream = True)
|
|
|
|
r.raw.decode_content = True
|
|
|
|
with open('images/' + filename, 'wb') as f:
|
|
|
|
shutil.copyfileobj(r.raw, f)
|
|
|
|
|
|
|
|
return filename
|
2021-09-10 20:32:11 +02:00
|
|
|
|
|
|
|
def create_api():
|
|
|
|
|
|
|
|
auth = tweepy.OAuthHandler(api_key, api_key_secret)
|
|
|
|
auth.set_access_token(access_token, access_token_secret)
|
2021-10-13 13:32:26 +02:00
|
|
|
api = tweepy.API(auth)
|
2021-09-10 20:32:11 +02:00
|
|
|
try:
|
|
|
|
api.verify_credentials()
|
2021-09-11 18:27:54 +02:00
|
|
|
logged_in = True
|
2021-09-10 20:32:11 +02:00
|
|
|
except Exception as e:
|
|
|
|
logger.error("Error creating API", exc_info=True)
|
|
|
|
raise e
|
|
|
|
logger.info("API created")
|
2021-09-11 18:27:54 +02:00
|
|
|
return (api, logged_in)
|
2021-09-10 20:32:11 +02:00
|
|
|
|
|
|
|
def mastodon():
|
|
|
|
|
|
|
|
# Load secrets from secrets file
|
|
|
|
secrets_filepath = "secrets/secrets.txt"
|
|
|
|
uc_client_id = get_parameter("uc_client_id", secrets_filepath)
|
|
|
|
uc_client_secret = get_parameter("uc_client_secret", secrets_filepath)
|
|
|
|
uc_access_token = get_parameter("uc_access_token", secrets_filepath)
|
|
|
|
|
|
|
|
# Load configuration from config file
|
|
|
|
config_filepath = "config/config.txt"
|
|
|
|
mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)
|
|
|
|
|
|
|
|
# Initialise Mastodon API
|
|
|
|
mastodon = Mastodon(
|
|
|
|
client_id=uc_client_id,
|
|
|
|
client_secret=uc_client_secret,
|
|
|
|
access_token=uc_access_token,
|
|
|
|
api_base_url='https://' + mastodon_hostname,
|
|
|
|
)
|
|
|
|
|
|
|
|
# Initialise access headers
|
|
|
|
headers = {'Authorization': 'Bearer %s'%uc_access_token}
|
|
|
|
|
|
|
|
return (mastodon, mastodon_hostname)
|
|
|
|
|
|
|
|
def db_config():
|
|
|
|
|
|
|
|
# Load db configuration from config file
|
|
|
|
db_config_filepath = "config/db_config.txt"
|
|
|
|
feeds_db = get_parameter("feeds_db", db_config_filepath)
|
|
|
|
feeds_db_user = get_parameter("feeds_db_user", db_config_filepath)
|
|
|
|
feeds_url = get_parameter("feeds_url", db_config_filepath)
|
|
|
|
|
|
|
|
return (feeds_db, feeds_db_user, feeds_url)
|
|
|
|
|
|
|
|
def twitter_config():
|
|
|
|
|
|
|
|
twitter_config_filepath = "config/keys_config.txt"
|
|
|
|
api_key = get_parameter("api_key", twitter_config_filepath)
|
|
|
|
api_key_secret = get_parameter("api_key_secret", twitter_config_filepath)
|
|
|
|
access_token = get_parameter("access_token", twitter_config_filepath)
|
|
|
|
access_token_secret = get_parameter("access_token_secret", twitter_config_filepath)
|
|
|
|
|
|
|
|
return(api_key, api_key_secret, access_token, access_token_secret)
|
|
|
|
|
|
|
|
# Returns the parameter from the specified file
|
|
|
|
def get_parameter( parameter, file_path ):
|
|
|
|
# Check if secrets file exists
|
|
|
|
if not os.path.isfile(file_path):
|
|
|
|
print("File %s not found, exiting."%file_path)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
# Find parameter in file
|
|
|
|
with open( file_path ) as f:
|
|
|
|
for line in f:
|
|
|
|
if line.startswith( parameter ):
|
|
|
|
return line.replace(parameter + ":", "").strip()
|
|
|
|
|
|
|
|
# Cannot find parameter, exit
|
|
|
|
print(file_path + " Missing parameter %s "%parameter)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# main
|
|
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
|
2021-10-07 20:23:17 +02:00
|
|
|
mastodon, mastodon_hostname = mastodon()
|
|
|
|
|
2021-09-10 20:32:11 +02:00
|
|
|
feeds_db, feeds_db_user, feeds_url = db_config()
|
|
|
|
|
|
|
|
api_key, api_key_secret, access_token, access_token_secret = twitter_config()
|
2021-10-07 20:23:17 +02:00
|
|
|
|
2021-09-11 18:27:54 +02:00
|
|
|
logged_in = False
|
2021-09-10 20:32:11 +02:00
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
newsfeeds = feedparser.parse(feeds_url)
|
|
|
|
|
|
|
|
except:
|
|
|
|
|
|
|
|
print(newsfeeds.status)
|
|
|
|
sys.exit(0)
|
|
|
|
|
|
|
|
for entry in newsfeeds.entries:
|
|
|
|
|
2021-09-29 19:06:31 +02:00
|
|
|
publish = False
|
|
|
|
with_images = False
|
2021-10-07 20:23:17 +02:00
|
|
|
is_reply = False
|
2021-09-29 19:06:31 +02:00
|
|
|
|
2021-09-10 20:32:11 +02:00
|
|
|
title = entry['summary']
|
|
|
|
id = entry['id']
|
|
|
|
link = entry['link']
|
|
|
|
|
2021-10-07 20:23:17 +02:00
|
|
|
toot_id = link.rsplit('/')[4]
|
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
tweet_id = get_tweet_id(toot_id)
|
2021-10-07 20:23:17 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
if tweet_id == 0:
|
2021-09-29 19:06:31 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
publish = True
|
2021-09-29 19:06:31 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
reply_id = mastodon.status(toot_id).in_reply_to_id
|
|
|
|
if reply_id != None:
|
2021-10-09 13:30:18 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
is_reply = True
|
|
|
|
tweet_id = get_tweet_id(reply_id)
|
2021-09-10 20:32:11 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
if len(entry.links) >= 2:
|
2021-09-10 20:32:11 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
with_images = True
|
|
|
|
images_list = []
|
|
|
|
images = len(entry.links) - 1
|
2021-09-10 20:32:11 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
i = 0
|
|
|
|
while i < images:
|
2021-09-10 20:32:11 +02:00
|
|
|
|
2021-10-15 14:02:11 +02:00
|
|
|
image_url = entry.links[i+1].href
|
|
|
|
image_filename = write_image(image_url)
|
|
|
|
images_list.append(image_filename)
|
|
|
|
i += 1
|
2021-09-10 20:32:11 +02:00
|
|
|
|
|
|
|
###########################################################
|
|
|
|
|
2021-09-29 19:06:31 +02:00
|
|
|
if publish:
|
2021-09-10 20:32:11 +02:00
|
|
|
|
2021-10-08 14:30:00 +02:00
|
|
|
tuit_text = get_toot(title)
|
2021-09-15 12:27:09 +02:00
|
|
|
|
2021-09-10 20:32:11 +02:00
|
|
|
print("Tooting...")
|
2021-09-30 18:05:35 +02:00
|
|
|
print(tuit_text)
|
2021-09-11 18:27:54 +02:00
|
|
|
|
|
|
|
if not logged_in:
|
2021-09-29 19:06:31 +02:00
|
|
|
|
2021-09-11 18:27:54 +02:00
|
|
|
api, logged_in = create_api()
|
2021-10-07 20:23:17 +02:00
|
|
|
|
2021-10-15 13:15:52 +02:00
|
|
|
tweet = compose_tweet(tuit_text, with_images, is_reply)
|
2021-09-10 20:32:11 +02:00
|
|
|
|
|
|
|
#########################################################
|
|
|
|
|
2021-10-09 13:30:18 +02:00
|
|
|
sql_insert_ids = 'INSERT INTO id(toot_id, tweet_id) VALUES (%s,%s)'
|
2021-10-07 20:23:17 +02:00
|
|
|
|
|
|
|
conn = None
|
|
|
|
|
|
|
|
try:
|
|
|
|
|
|
|
|
conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432")
|
|
|
|
|
|
|
|
cur = conn.cursor()
|
|
|
|
|
2021-10-09 13:30:18 +02:00
|
|
|
cur.execute(sql_insert_ids, (toot_id, tweet.id))
|
2021-10-07 20:23:17 +02:00
|
|
|
|
|
|
|
conn.commit()
|
|
|
|
|
|
|
|
cur.close()
|
|
|
|
|
|
|
|
except (Exception, psycopg2.DatabaseError) as error:
|
|
|
|
|
|
|
|
print(error)
|
|
|
|
|
|
|
|
finally:
|
|
|
|
|
|
|
|
if conn is not None:
|
|
|
|
|
|
|
|
conn.close()
|
|
|
|
|
2021-10-08 14:30:00 +02:00
|
|
|
#########################################################
|
|
|
|
|
|
|
|
time.sleep(2)
|
|
|
|
|
2021-09-10 20:32:11 +02:00
|
|
|
else:
|
|
|
|
|
|
|
|
print("Any new feeds")
|
|
|
|
sys.exit(0)
|