import os import feedparser from bs4 import BeautifulSoup from mastodon import Mastodon import psycopg2 import sys import time import requests import shutil import tweepy from tweepy import TweepyException import logging import filetype import ffmpeg import pdb logger = logging.getLogger() def get_toot(title): soup = BeautifulSoup(title, 'html.parser') toot_text = soup.get_text() sub_str = 'http' find_link = toot_text.find(sub_str) if find_link != -1: tuit_text = toot_text[:toot_text.index(sub_str)] else: tuit_text = toot_text links_lst = '' for links in soup.find_all('a'): find_tag = links.get('href').find('/tags/') if find_tag == -1: links_lst += links.get('href') if len(links_lst) > 0: last_text = toot_text[len(tuit_text) + len(links_lst):] else: last_text = '' tuit_text = f'{tuit_text} {links_lst} {last_text}' return tuit_text def compose_tweet(tuit_text, with_images, is_reply): if len(tuit_text) > 280: if with_images: tuit_text1 = tuit_text[:250].rsplit(' ', 1)[0] + ' (1/2)' tuit_text2 = tuit_text[int(len(tuit_text1)-6):] + ' (2/2)' else: tuit_text1 = tuit_text[:275].rsplit(' ', 1)[0] + ' (1/2)' tuit_text2 = tuit_text[int(len(tuit_text1)-6):] + ' (2/2)' try: if with_images: images_id_lst = [] i = 0 while i < len(images_list): kind = filetype.guess('images/' + images_list[i]) if kind.mime == 'video/mp4': probe = ffmpeg.probe('images/' + images_list[i]) duration = probe['streams'][0]['duration'] if float(duration) > 139: print(f'video duration is too large: {duration}') else: media = api.media_upload('images/' + images_list[i], media_category='tweet_video') if media.processing_info['state'] == 'succeeded': images_id_lst.append(media.media_id) else: print(media.processing_info) else: media = api.media_upload('images/' + images_list[i]) images_id_lst.append(media.media_id) i += 1 if len(tuit_text) > 280: if is_reply: first_tweet = api.update_status(status=tuit_text1, in_reply_to_status_id=tweet_id) else: first_tweet = api.update_status(status=tuit_text1) tweet = api.update_status(status=tuit_text2, in_reply_to_status_id=first_tweet.id, media_ids=images_id_lst) else: if is_reply: tweet = api.update_status(status=tuit_text, in_reply_to_status_id=tweet_id, media_ids=images_id_lst) else: tweet = api.update_status(status=tuit_text, media_ids=images_id_lst) else: if len(tuit_text) > 280: if is_reply: first_tweet = api.update_status(tuit_text1, in_reply_to_status_id=tweet_id) else: first_tweet = api.update_status(tuit_text1) tweet = api.update_status(tuit_text2, in_reply_to_status_id=first_tweet.id) else: if is_reply: tweet = api.update_status(status=tuit_text, in_reply_to_status_id=tweet_id) else: tweet = api.update_status(tuit_text) return tweet except TweepyException as err: print('\n') sys.exit(err) def get_tweet_id(toot_id): tweet_id = 0 try: conn = None conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute('select tweet_id from id where toot_id=(%s)', (toot_id,)) row = cur.fetchone() if row != None: tweet_id = row[0] cur.close() return(tweet_id) except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() def write_image(image_url): if not os.path.exists('images'): os.makedirs('images') filename = image_url.split("/") [-1] r = requests.get(image_url, stream = True) r.raw.decode_content = True with open('images/' + filename, 'wb') as f: shutil.copyfileobj(r.raw, f) return filename def create_api(): auth = tweepy.OAuthHandler(api_key, api_key_secret) auth.set_access_token(access_token, access_token_secret) api = tweepy.API(auth) try: api.verify_credentials() logged_in = True except Exception as e: logger.error("Error creating API", exc_info=True) raise e logger.info("API created") return (api, logged_in) def mastodon(): # Load secrets from secrets file secrets_filepath = "secrets/secrets.txt" uc_client_id = get_parameter("uc_client_id", secrets_filepath) uc_client_secret = get_parameter("uc_client_secret", secrets_filepath) uc_access_token = get_parameter("uc_access_token", secrets_filepath) # Load configuration from config file config_filepath = "config/config.txt" mastodon_hostname = get_parameter("mastodon_hostname", config_filepath) # Initialise Mastodon API mastodon = Mastodon( client_id=uc_client_id, client_secret=uc_client_secret, access_token=uc_access_token, api_base_url='https://' + mastodon_hostname, ) # Initialise access headers headers = {'Authorization': 'Bearer %s'%uc_access_token} return (mastodon, mastodon_hostname) def db_config(): # Load db configuration from config file db_config_filepath = "config/db_config.txt" feeds_db = get_parameter("feeds_db", db_config_filepath) feeds_db_user = get_parameter("feeds_db_user", db_config_filepath) feeds_url = get_parameter("feeds_url", db_config_filepath) return (feeds_db, feeds_db_user, feeds_url) def twitter_config(): twitter_config_filepath = "config/keys_config.txt" api_key = get_parameter("api_key", twitter_config_filepath) api_key_secret = get_parameter("api_key_secret", twitter_config_filepath) access_token = get_parameter("access_token", twitter_config_filepath) access_token_secret = get_parameter("access_token_secret", twitter_config_filepath) return(api_key, api_key_secret, access_token, access_token_secret) # Returns the parameter from the specified file def get_parameter( parameter, file_path ): # Check if secrets file exists if not os.path.isfile(file_path): print("File %s not found, exiting."%file_path) sys.exit(0) # Find parameter in file with open( file_path ) as f: for line in f: if line.startswith( parameter ): return line.replace(parameter + ":", "").strip() # Cannot find parameter, exit print(file_path + " Missing parameter %s "%parameter) sys.exit(0) ############################################################################### # main if __name__ == '__main__': mastodon, mastodon_hostname = mastodon() feeds_db, feeds_db_user, feeds_url = db_config() api_key, api_key_secret, access_token, access_token_secret = twitter_config() logged_in = False try: newsfeeds = feedparser.parse(feeds_url) except: print(newsfeeds.status) sys.exit(0) for entry in newsfeeds.entries: publish = False with_images = False is_reply = False title = entry['summary'] id = entry['id'] link = entry['link'] toot_id = link.rsplit('/')[4] tweet_id = get_tweet_id(toot_id) if tweet_id == 0: publish = True reply_id = mastodon.status(toot_id).in_reply_to_id if reply_id != None: is_reply = True tweet_id = get_tweet_id(reply_id) if len(entry.links) >= 2: with_images = True images_list = [] images = len(entry.links) - 1 i = 0 while i < images: image_url = entry.links[i+1].href image_filename = write_image(image_url) images_list.append(image_filename) i += 1 ########################################################### if publish: tuit_text = get_toot(title) print("Tooting...") print(tuit_text) if not logged_in: api, logged_in = create_api() tweet = compose_tweet(tuit_text, with_images, is_reply) ######################################################### sql_insert_ids = 'INSERT INTO id(toot_id, tweet_id) VALUES (%s,%s)' conn = None try: conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432") cur = conn.cursor() cur.execute(sql_insert_ids, (toot_id, tweet.id)) conn.commit() cur.close() except (Exception, psycopg2.DatabaseError) as error: print(error) finally: if conn is not None: conn.close() ######################################################### time.sleep(2) else: print("Any new feeds") sys.exit(0)