import os import feedparser from bs4 import BeautifulSoup from mastodon import Mastodon import psycopg2 import sys import time import requests import shutil import tweepy from tweepy import TweepyException import logging import filetype import ffmpeg import pdb logger = logging.getLogger() def get_toot(title): soup = BeautifulSoup(title, 'html.parser') toot_text = soup.get_text() sub_str = 'http' find_link = toot_text.find(sub_str) if find_link != -1: tuit_text = toot_text[:toot_text.index(sub_str)] else: tuit_text = toot_text links_lst = '' for links in soup.find_all('a'): find_tag = links.get('href').find('/tags/') if find_tag == -1: links_lst += links.get('href') if len(links_lst) > 0: last_text = toot_text[len(tuit_text) + len(links_lst):] else: last_text = '' tuit_text = f'{tuit_text} {links_lst} {last_text}' return tuit_text def get_tweet_id(toot_id): tweet_id = 0 try: conn = None conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute('select tweet_id from id where toot_id=(%s)', (toot_id,)) row = cur.fetchone() if row != None: tweet_id = row[0] cur.close() return(tweet_id) except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() def write_image(image_url): if not os.path.exists('images'): os.makedirs('images') filename = image_url.split("/") [-1] r = requests.get(image_url, stream = True) r.raw.decode_content = True with open('images/' + filename, 'wb') as f: shutil.copyfileobj(r.raw, f) return filename def create_api(): auth = tweepy.OAuthHandler(api_key, api_key_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) try: api.verify_credentials() logged_in = True except Exception as e: logger.error("Error creating API", exc_info=True) raise e logger.info("API created") return (api, logged_in) def mastodon(): # Load secrets from secrets file secrets_filepath = "secrets/secrets.txt" uc_client_id = get_parameter("uc_client_id", secrets_filepath) uc_client_secret = get_parameter("uc_client_secret", secrets_filepath) uc_access_token = get_parameter("uc_access_token", secrets_filepath) # Load configuration from config file config_filepath = "config/config.txt" mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) # Initialise Mastodon API mastodon = Mastodon( client_id=uc_client_id, client_secret=uc_client_secret, access_token=uc_access_token, api_base_url='https://' + mastodon_hostname, ) # Initialise access headers headers = {'Authorization': 'Bearer %s'%uc_access_token} return (mastodon, mastodon_hostname) def db_config(): # Load db configuration from config file db_config_filepath = "config/db_config.txt" feeds_db = get_parameter("feeds_db", db_config_filepath) feeds_db_user = get_parameter("feeds_db_user", db_config_filepath) feeds_url = get_parameter("feeds_url", db_config_filepath) return (feeds_db, feeds_db_user, feeds_url) def twitter_config(): twitter_config_filepath = "config/keys_config.txt" api_key = get_parameter("api_key", twitter_config_filepath) api_key_secret = get_parameter("api_key_secret", twitter_config_filepath) access_token = get_parameter("access_token", twitter_config_filepath) access_token_secret = get_parameter("access_token_secret", twitter_config_filepath) return(api_key, api_key_secret, access_token, access_token_secret) # Returns the parameter from the specified file def get_parameter( parameter, file_path ): # Check if secrets file exists if not os.path.isfile(file_path): print("File %s not found, exiting."%file_path) sys.exit(0) # Find parameter in file with open( file_path ) as f: for line in f: if line.startswith( parameter ): return line.replace(parameter + ":", "").strip() # Cannot find parameter, exit print(file_path + " Missing parameter %s "%parameter) sys.exit(0) ############################################################################### # main if __name__ == '__main__': ####################################################################### mastodon, mastodon_hostname = mastodon() feeds_db, feeds_db_user, feeds_url = db_config() api_key, api_key_secret, access_token, access_token_secret = twitter_config() logged_in = False try: newsfeeds = feedparser.parse(feeds_url) except: print(newsfeeds.status) sys.exit(0) for entry in newsfeeds.entries: publish = False with_images = False is_reply = False title = entry['summary'] id = entry['id'] link = entry['link'] toot_id = link.rsplit('/')[4] reply_id = mastodon.status(toot_id).in_reply_to_id if reply_id != None: is_reply = True tweet_id = get_tweet_id(reply_id) if len(entry.links) >= 2: with_images = True images_list = [] images = len(entry.links) - 1 i = 0 while i < images: image_url = entry.links[i+1].href image_filename = write_image(image_url) images_list.append(image_filename) i += 1 ################################################################### # check database if feed is already published try: conn = None conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute('select link from feeds where link=(%s)', (link,)) row = cur.fetchone() if row == None: publish = True cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() ########################################################### if publish: tuit_text = get_toot(title) print("Tooting...") print(tuit_text) if not logged_in: api, logged_in = create_api() if len(tuit_text) < 280: try: if with_images: images_id_lst = [] i = 0 while i < len(images_list): kind = filetype.guess('images/' + images_list[i]) if kind.mime == 'video/mp4': probe = ffmpeg.probe('images/' + images_list[i]) duration = probe['streams'][0]['duration'] if float(duration) > 139: print(f'video duration is too large: {duration}') else: media = api.media_upload('images/' + images_list[i], media_category='tweet_video') if media.processing_info['state'] == 'succeeded': images_id_lst.append(media.media_id) else: print(media.processing_info) else: media = api.media_upload('images/' + images_list[i]) if media.processing_info['state'] == 'succeeded': images_id_lst.append(media.media_id) else: print(media.processing_info) i += 1 if is_reply: tweet = api.update_status(status=tuit_text, in_reply_to_status_id=tweet_id, media_ids=images_id_lst) else: tweet = api.update_status(status=tuit_text, media_ids=images_id_lst) else: if is_reply: tweet = api.update_status(status=tuit_text, in_reply_to_status_id=tweet_id) else: tweet = api.update_status(tuit_text) except TweepyException as err: print('\n') sys.exit(err) else: if with_images: tuit_text1 = tuit_text[:250].rsplit(' ', 1)[0] + ' (1/2)' tuit_text2 = tuit_text[int(len(tuit_text1)-6):] + ' (2/2)' else: tuit_text1 = tuit_text[:275].rsplit(' ', 1)[0] + ' (1/2)' tuit_text2 = tuit_text[int(len(tuit_text1)-6):] + ' (2/2)' try: if with_images: images_id_lst = [] i = 0 while i < len(images_list): kind = filetype.guess('images/' + images_list[i]) if kind.mime == 'video/mp4': probe = ffmpeg.probe('images/' + images_list[i]) duration = probe['streams'][0]['duration'] if float(duration) > 139: print(f'video duration is too large: {duration}') else: media = api.media_upload('images/' + images_list[i], media_category='tweet_video') if media.processing_info['state'] == 'succeeded': images_id_lst.append(media.media_id) else: print(media.processing_info) else: media = api.media_upload('images/' + images_list[i]) images_id_lst.append(media.media_id) i += 1 if is_reply: first_tweet = api.update_status(status=tuit_text1, in_reply_to_status_id=tweet_id) else: first_tweet = api.update_status(status=tuit_text1) tweet = api.update_status(status=tuit_text2, in_reply_to_status_id=first_tweet.id, media_ids=images_id_lst) else: if is_reply: first_tweet = api.update_status(tuit_text1, in_reply_to_status_id=tweet_id) else: first_tweet = api.update_status(tuit_text1) tweet = api.update_status(tuit_text2, in_reply_to_status_id=first_tweet.id) except TweepException as err: print('\n') sys.exit(err) ######################################################### sql_insert_link = 'INSERT INTO feeds(link) VALUES (%s)' sql_insert_ids = 'INSERT INTO id(toot_id, tweet_id) VALUES (%s,%s)' conn = None try: conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute(sql_insert_link, (link,)) cur.execute(sql_insert_ids, (toot_id, tweet.id)) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() ######################################################### time.sleep(2) else: print("Any new feeds") sys.exit(0)