Changed to BeautifulSoup
This commit is contained in:
pare
93e1fa2e08
commit
4f27e13c3a
S'han modificat 2 arxius amb 17 adicions i 10 eliminacions
25
mastotuit.py
25
mastotuit.py
|
@ -1,6 +1,6 @@
|
|||
import os
|
||||
import feedparser
|
||||
import html2text
|
||||
from bs4 import BeautifulSoup
|
||||
from mastodon import Mastodon
|
||||
import psycopg2
|
||||
import sys
|
||||
|
@ -17,14 +17,21 @@ logger = logging.getLogger()
|
|||
|
||||
def get_toot_text(title):
|
||||
|
||||
html2text.hn = lambda _:0
|
||||
h = html2text.HTML2Text()
|
||||
h.images_to_alt = True
|
||||
h.single_line_break = True
|
||||
h.ignore_emphasis = True
|
||||
h.ignore_links = True
|
||||
h.ignore_tables = True
|
||||
tuit_text = h.handle(title)
|
||||
soup = BeautifulSoup(title, features='html.parser')
|
||||
|
||||
delimiter = '###' # unambiguous string
|
||||
|
||||
for line_break in soup.findAll('br'): # loop through line break tags
|
||||
|
||||
line_break.replaceWith(delimiter) # replace br tags with delimiter
|
||||
|
||||
tuit_text_str = soup.get_text().split(delimiter) # get list of strings
|
||||
|
||||
tuit_text = ''
|
||||
|
||||
for line in tuit_text_str:
|
||||
|
||||
tuit_text += f'{line}\n'
|
||||
|
||||
return tuit_text
|
||||
|
||||
|
|
|
@ -1,7 +1,7 @@
|
|||
wheel>=0.37.0
|
||||
psycopg2>=2.9.1
|
||||
feedparser>=6.0.8
|
||||
html2text>=2020.1.16
|
||||
bs4>=4.10.0
|
||||
Mastodon.py>=1.5.1
|
||||
tweepy>=4.5.0
|
||||
filetype>=1.0.8
|
||||
|
|
Loading…
Referencia en una nova incidència