From 4a40fa43be379fa884462481f5d595554f199a25 Mon Sep 17 00:00:00 2001
From: spla <salvador.pla@mastodont.cat>
Date: Wed, 29 Sep 2021 19:06:31 +0200
Subject: [PATCH] Added support to media files

---
 README.md    |   3 +-
 mastotuit.py | 119 +++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 94 insertions(+), 28 deletions(-)

diff --git a/README.md b/README.md
index 15e9812..fee8b31 100644
--- a/README.md
+++ b/README.md
@@ -18,5 +18,6 @@ Within Python Virtual Environment:
 
 3. Run `python setup.py` to input and save your Twitter's key and access tokens. You can get your keys and tokens from [Twitter Developer Platform](https://developer.twitter.com/en/apply/user.html)
 
-4. Use your favourite scheduling method to set `python mastotuit.py` to run every minute..   
+4. Use your favourite scheduling method to set `python mastotuit.py` to run every minute.  
 
+29.9.2021 **New Feature** Added support to media files! mastotuit now gets all media files from Mastodon's post (if any) and publish them to Twitter together with your status update.
diff --git a/mastotuit.py b/mastotuit.py
index 978bda1..e2ef332 100644
--- a/mastotuit.py
+++ b/mastotuit.py
@@ -1,10 +1,12 @@
 import os
 import feedparser
-import re
+from bs4 import BeautifulSoup
 from mastodon import Mastodon
 import psycopg2
 import sys
 import time
+import requests
+import shutil
 import tweepy
 from tweepy import TweepError
 import logging
@@ -12,17 +14,17 @@ import pdb
 
 logger = logging.getLogger()
 
-def cleanhtml(raw_html):
-    
-    cleanr = re.compile('<.*?>')
-    cleantext = re.sub(cleanr, '', raw_html)
-    return cleantext
+def write_image(image_url):
 
-def unescape(s):
-    
-    s = s.replace("&apos;", "'")
-    s = s.replace('&quot;', '"')
-    return s
+    if not os.path.exists('images'):
+        os.makedirs('images')
+    filename = image_url.split("/") [-1]
+    r = requests.get(image_url, stream = True)
+    r.raw.decode_content = True
+    with open('images/' + filename, 'wb') as f:
+        shutil.copyfileobj(r.raw, f)
+
+    return filename
 
 def create_api():
     
@@ -113,7 +115,6 @@ if __name__ == '__main__':
 
     api_key, api_key_secret, access_token, access_token_secret = twitter_config()
     
-    publish = False
     logged_in = False
 
     try:
@@ -127,10 +128,27 @@ if __name__ == '__main__':
 
     for entry in newsfeeds.entries:
 
+        publish = False
+        with_images = False
+
         title = entry['summary']
         id = entry['id']
         link = entry['link']
 
+        if len(entry.links) >= 2:
+
+            with_images = True
+            images_list = []
+            images = len(entry.links) - 1
+
+            i = 0
+            while i < images:
+
+                image_url = entry.links[i+1].href
+                image_filename = write_image(image_url)
+                images_list.append(image_filename)
+                i += 1
+
         ###################################################################
         # check database if feed is already published
 
@@ -161,41 +179,88 @@ if __name__ == '__main__':
                 conn.close()
 
         ###########################################################
+
         if publish:
 
-            toot_text = f'{title}\n'
-
-            toot_text = cleanhtml(toot_text)
-            toot_text = unescape(toot_text)
+            soup = BeautifulSoup(title, 'html.parser')
             
-            toot_text = f'{toot_text} {link}'
+            toot_text = soup.get_text()
+            sub_str = 'http'
+            find_link = toot_text.find(sub_str)
+            if find_link != -1:
+                toot_text = toot_text[:toot_text.index(sub_str)]
+
+            links_lst = ''
+            for links in soup.find_all('a'):
+                links_lst += links.get('href') + ' '
+
+            toot_text = f'{toot_text} {links_lst}'
 
             print("Tooting...")
             print(toot_text)
 
             if not logged_in:
+
                 api, logged_in = create_api()
             
             if len(toot_text) < 280:
-                
+
                 try:
-                    
-                    api.update_status(toot_text)
-                
+
+                    if with_images:
+
+                        images_id_lst = []
+
+                        i = 0
+                        while i < len(images_list):
+
+                            media = api.media_upload('images/' + images_list[i])
+                            images_id_lst.append(media.media_id)
+                            i += 1
+
+                        api.update_status(status=toot_text, media_ids=images_id_lst)
+
+                    else:
+
+                        api.update_status(toot_text)
+
                 except TweepError as err:
                     
                     print('\n')
                     sys.exit(err)
             
             else:
-      
-                toot_text1 = toot_text[:275].rsplit(' ', 1)[0] + ' (1/2)'
-                toot_text2 = toot_text[int(len(toot_text1)-6):] + ' (2/2)'
+
+                if with_images:
+
+                    toot_text1 = toot_text[:250].rsplit(' ', 1)[0] + ' (1/2)'
+                    toot_text2 = toot_text[int(len(toot_text1)-6):] + ' (2/2)'
+
+                else:
+
+                    toot_text1 = toot_text[:275].rsplit(' ', 1)[0] + ' (1/2)'
+                    toot_text2 = toot_text[int(len(toot_text1)-6):] + ' (2/2)'
 
                 try:
-                    
-                    first_tweet = api.update_status(toot_text1)
-                    api.update_status(toot_text2, in_reply_to_status_id=first_tweet.id)
+
+                    if with_images:
+
+                        images_id_lst = []
+
+                        i = 0
+                        while i < len(images_list):
+
+                            media = api.media_upload('images/' + images_list[i])
+                            images_id_lst.append(media.media_id)
+                            i += 1
+
+                        first_tweet = api.update_status(status=toot_text1)
+                        api.update_status(status=toot_text2, in_reply_to_status_id=first_tweet.id, media_ids=images_id_lst)
+
+                    else:
+
+                        first_tweet = api.update_status(toot_text1)
+                        api.update_status(toot_text2, in_reply_to_status_id=first_tweet.id)
                 
                 except TweepError as err: