mastotuit/mastotuit.py

import os
import feedparser
from bs4 import BeautifulSoup
from mastodon import Mastodon
import psycopg2
import sys
import time
import requests
import shutil
import tweepy
from tweepy import TweepyException
import logging
import filetype
import ffmpeg
import pdb

logger = logging.getLogger()

def get_toot(title):

    soup = BeautifulSoup(title, 'html.parser')

    toot_text = soup.get_text()

    sub_str = 'http'
    find_link = toot_text.find(sub_str)
    if find_link != -1:

        tuit_text = toot_text[:toot_text.index(sub_str)]

    else:

        tuit_text = toot_text

    links_lst = ''
    for links in soup.find_all('a'):
        find_tag = links.get('href').find('/tags/')
        if find_tag == -1:
            links_lst += links.get('href')

    if len(links_lst) > 0:

        last_text = toot_text[len(tuit_text) + len(links_lst):]

    else:

        last_text = ''

    tuit_text = f'{tuit_text} {links_lst} {last_text}'

    return tuit_text

def compose_tweet(tuit_text, with_images, is_reply):

    if len(tuit_text) > 280:

        if with_images:

            tuit_text1 = tuit_text[:250].rsplit(' ', 1)[0] + ' (1/2)'
            tuit_text2 = tuit_text[int(len(tuit_text1)-6):] + ' (2/2)'

        else:

            tuit_text1 = tuit_text[:275].rsplit(' ', 1)[0] + ' (1/2)'
            tuit_text2 = tuit_text[int(len(tuit_text1)-6):] + ' (2/2)'

    try:

        if with_images:

            images_id_lst = []

            i = 0
            while i < len(images_list):

                kind = filetype.guess('images/' + images_list[i])
                if kind.mime == 'video/mp4':

                    probe = ffmpeg.probe('images/' + images_list[i])
                    duration = probe['streams'][0]['duration']

                    if float(duration) > 139:

                        print(f'video duration is too large: {duration}')

                    else:

                        media = api.media_upload('images/' + images_list[i], media_category='tweet_video')

                        if media.processing_info['state'] == 'succeeded':

                            images_id_lst.append(media.media_id)

                        else:

                            print(media.processing_info)

                else:

                    media = api.media_upload('images/' + images_list[i])

                    images_id_lst.append(media.media_id)

                i += 1

            if len(tuit_text) > 280:

                if is_reply:

                    first_tweet = api.update_status(status=tuit_text1, in_reply_to_status_id=tweet_id)

                else:

                    first_tweet = api.update_status(status=tuit_text1)

                tweet = api.update_status(status=tuit_text2, in_reply_to_status_id=first_tweet.id, media_ids=images_id_lst)

            else:

                if is_reply:

                    tweet = api.update_status(status=tuit_text, in_reply_to_status_id=tweet_id, media_ids=images_id_lst)

                else:

                    tweet = api.update_status(status=tuit_text, media_ids=images_id_lst)

        else:

            if len(tuit_text) > 280:

                if is_reply:

                    first_tweet = api.update_status(tuit_text1, in_reply_to_status_id=tweet_id)

                else:

                    first_tweet = api.update_status(tuit_text1)

                tweet = api.update_status(tuit_text2, in_reply_to_status_id=first_tweet.id)

            else:

                if is_reply:

                    tweet = api.update_status(status=tuit_text, in_reply_to_status_id=tweet_id)

                else:

                    tweet = api.update_status(tuit_text)

        return tweet

    except TweepyException as err:

        print('\n')
        sys.exit(err)

def get_tweet_id(toot_id):

    tweet_id = 0

    try:

        conn = None

        conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432")

        cur = conn.cursor()

        cur.execute('select tweet_id from id where toot_id=(%s)', (toot_id,))

        row = cur.fetchone()

        if row != None:

            tweet_id = row[0]

        cur.close()

        return(tweet_id)

    except (Exception, psycopg2.DatabaseError) as error:

        print(error)

    finally:

        if conn is not None:

            conn.close()

def write_image(image_url):

    if not os.path.exists('images'):
        os.makedirs('images')
    filename = image_url.split("/") [-1]
    r = requests.get(image_url, stream = True)
    r.raw.decode_content = True
    with open('images/' + filename, 'wb') as f:
        shutil.copyfileobj(r.raw, f)

    return filename

def create_api():

    auth = tweepy.OAuthHandler(api_key, api_key_secret)
    auth.set_access_token(access_token, access_token_secret)
    api = tweepy.API(auth)
    try:
        api.verify_credentials()
        logged_in = True
    except Exception as e:
        logger.error("Error creating API", exc_info=True)
        raise e
    logger.info("API created")
    return (api, logged_in)

def mastodon():

    # Load secrets from secrets file
    secrets_filepath = "secrets/secrets.txt"
    uc_client_id = get_parameter("uc_client_id",     secrets_filepath)
    uc_client_secret = get_parameter("uc_client_secret", secrets_filepath)
    uc_access_token = get_parameter("uc_access_token",  secrets_filepath)

    # Load configuration from config file
    config_filepath = "config/config.txt"
    mastodon_hostname = get_parameter("mastodon_hostname", config_filepath)

    # Initialise Mastodon API
    mastodon = Mastodon(
        client_id=uc_client_id,
        client_secret=uc_client_secret,
        access_token=uc_access_token,
        api_base_url='https://' + mastodon_hostname,
    )

    # Initialise access headers
    headers = {'Authorization': 'Bearer %s'%uc_access_token}

    return (mastodon, mastodon_hostname)

def db_config():

    # Load db configuration from config file
    db_config_filepath = "config/db_config.txt"
    feeds_db =  get_parameter("feeds_db", db_config_filepath)
    feeds_db_user =  get_parameter("feeds_db_user", db_config_filepath)
    feeds_url =  get_parameter("feeds_url", db_config_filepath)

    return (feeds_db, feeds_db_user, feeds_url)

def twitter_config():

    twitter_config_filepath = "config/keys_config.txt"
    api_key = get_parameter("api_key", twitter_config_filepath)
    api_key_secret = get_parameter("api_key_secret", twitter_config_filepath)
    access_token = get_parameter("access_token", twitter_config_filepath)
    access_token_secret = get_parameter("access_token_secret", twitter_config_filepath)

    return(api_key, api_key_secret, access_token, access_token_secret)

# Returns the parameter from the specified file
def get_parameter( parameter, file_path ):
    # Check if secrets file exists
    if not os.path.isfile(file_path):
        print("File %s not found, exiting."%file_path)
        sys.exit(0)

    # Find parameter in file
    with open( file_path ) as f:
        for line in f:
            if line.startswith( parameter ):
                return line.replace(parameter + ":", "").strip()

    # Cannot find parameter, exit
    print(file_path + "  Missing parameter %s "%parameter)
    sys.exit(0)

###############################################################################
# main

if __name__ == '__main__':

    mastodon, mastodon_hostname = mastodon()

    feeds_db, feeds_db_user, feeds_url = db_config()

    api_key, api_key_secret, access_token, access_token_secret = twitter_config()

    logged_in = False

    try:

        newsfeeds = feedparser.parse(feeds_url)

    except:

        print(newsfeeds.status)
        sys.exit(0)

    for entry in newsfeeds.entries:

        publish = False
        with_images = False
        is_reply = False

        title = entry['summary']
        id = entry['id']
        link = entry['link']

        toot_id = link.rsplit('/')[4]

        tweet_id = get_tweet_id(toot_id)

        if tweet_id == 0:

            publish = True

            reply_id = mastodon.status(toot_id).in_reply_to_id
            if reply_id != None:

                is_reply = True
                tweet_id = get_tweet_id(reply_id)

            if len(entry.links) >= 2:

                with_images = True
                images_list = []
                images = len(entry.links) - 1

                i = 0
                while i < images:

                    image_url = entry.links[i+1].href
                    image_filename = write_image(image_url)
                    images_list.append(image_filename)
                    i += 1

        ###########################################################

        if publish:

            tuit_text = get_toot(title)

            print("Tooting...")
            print(tuit_text)

            if not logged_in:

                api, logged_in = create_api()

            tweet = compose_tweet(tuit_text, with_images, is_reply)

            #########################################################

            sql_insert_ids = 'INSERT INTO id(toot_id, tweet_id) VALUES (%s,%s)'

            conn = None

            try:

                conn = psycopg2.connect(database = feeds_db, user = feeds_db_user, password = "", host = "/var/run/postgresql", port = "5432")

                cur = conn.cursor()

                cur.execute(sql_insert_ids, (toot_id, tweet.id))

                conn.commit()

                cur.close()

            except (Exception, psycopg2.DatabaseError) as error:

                print(error)

            finally:

                if conn is not None:

                    conn.close()

            #########################################################

            time.sleep(2)

        else:

            print("Any new feeds")
            sys.exit(0)