Added support to media files

2021-09-29 19:06:31 +02:00 · 2021-09-29 19:06:31 +02:00 · 4a40fa43be
commit 4a40fa43be
--- a/README.md
+++ b/README.md
@ -18,5 +18,6 @@ Within Python Virtual Environment:
 3. Run `python setup.py` to input and save your Twitter's key and access tokens. You can get your keys and tokens from [Twitter Developer Platform](https://developer.twitter.com/en/apply/user.html)
-4. Use your favourite scheduling method to set `python mastotuit.py` to run every minute..   
+4. Use your favourite scheduling method to set `python mastotuit.py` to run every minute.  
 29.9.2021 **New Feature** Added support to media files! mastotuit now gets all media files from Mastodon's post (if any) and publish them to Twitter together with your status update.
--- a/mastotuit.py
+++ b/mastotuit.py
@ -1,10 +1,12 @@
 import os
 import feedparser
-import re
+from bs4 import BeautifulSoup
 from mastodon import Mastodon
 import psycopg2
 import sys
 import time
 import requests
 import shutil
 import tweepy
 from tweepy import TweepError
 import logging
@ -12,17 +14,17 @@ import pdb
 logger = logging.getLogger()
-def cleanhtml(raw_html):
+def write_image(image_url):
-    cleanr = re.compile('<.*?>')
+    if not os.path.exists('images'):
-    cleantext = re.sub(cleanr, '', raw_html)
+        os.makedirs('images')
-    return cleantext
+    filename = image_url.split("/") [-1]
    r = requests.get(image_url, stream = True)
    r.raw.decode_content = True
    with open('images/' + filename, 'wb') as f:
        shutil.copyfileobj(r.raw, f)
-def unescape(s):
+    return filename
    s = s.replace("&apos;", "'")
    s = s.replace('&quot;', '"')
    return s
 def create_api():
@ -113,7 +115,6 @@ if __name__ == '__main__':
    api_key, api_key_secret, access_token, access_token_secret = twitter_config()
    publish = False
    logged_in = False
    try:
@ -127,10 +128,27 @@ if __name__ == '__main__':
    for entry in newsfeeds.entries:
        publish = False
        with_images = False
        title = entry['summary']
        id = entry['id']
        link = entry['link']
        if len(entry.links) >= 2:
            with_images = True
            images_list = []
            images = len(entry.links) - 1
            i = 0
            while i < images:
                image_url = entry.links[i+1].href
                image_filename = write_image(image_url)
                images_list.append(image_filename)
                i += 1
        ###################################################################
        # check database if feed is already published
@ -161,26 +179,50 @@ if __name__ == '__main__':
                conn.close()
        ###########################################################
        if publish:
-            toot_text = f'{title}\n'
+            soup = BeautifulSoup(title, 'html.parser')
-            toot_text = cleanhtml(toot_text)
+            toot_text = soup.get_text()
-            toot_text = unescape(toot_text)
+            sub_str = 'http'
            find_link = toot_text.find(sub_str)
            if find_link != -1:
                toot_text = toot_text[:toot_text.index(sub_str)]
-            toot_text = f'{toot_text} {link}'
+            links_lst = ''
            for links in soup.find_all('a'):
                links_lst += links.get('href') + ' '
            toot_text = f'{toot_text} {links_lst}'
            print("Tooting...")
            print(toot_text)
            if not logged_in:
                api, logged_in = create_api()
            if len(toot_text) < 280:
                try:
-                    api.update_status(toot_text)
+                    if with_images:
                        images_id_lst = []
                        i = 0
                        while i < len(images_list):
                            media = api.media_upload('images/' + images_list[i])
                            images_id_lst.append(media.media_id)
                            i += 1
                        api.update_status(status=toot_text, media_ids=images_id_lst)
                    else:
                        api.update_status(toot_text)
                except TweepError as err:
@ -189,13 +231,36 @@ if __name__ == '__main__':
            else:
-                toot_text1 = toot_text[:275].rsplit(' ', 1)[0] + ' (1/2)'
+                if with_images:
-                toot_text2 = toot_text[int(len(toot_text1)-6):] + ' (2/2)'
+
                    toot_text1 = toot_text[:250].rsplit(' ', 1)[0] + ' (1/2)'
                    toot_text2 = toot_text[int(len(toot_text1)-6):] + ' (2/2)'
                else:
                    toot_text1 = toot_text[:275].rsplit(' ', 1)[0] + ' (1/2)'
                    toot_text2 = toot_text[int(len(toot_text1)-6):] + ' (2/2)'
                try:
-                    first_tweet = api.update_status(toot_text1)
+                    if with_images:
-                    api.update_status(toot_text2, in_reply_to_status_id=first_tweet.id)
+
                        images_id_lst = []
                        i = 0
                        while i < len(images_list):
                            media = api.media_upload('images/' + images_list[i])
                            images_id_lst.append(media.media_id)
                            i += 1
                        first_tweet = api.update_status(status=toot_text1)
                        api.update_status(status=toot_text2, in_reply_to_status_id=first_tweet.id, media_ids=images_id_lst)
                    else:
                        first_tweet = api.update_status(toot_text1)
                        api.update_status(toot_text2, in_reply_to_status_id=first_tweet.id)
                except TweepError as err: