Changed to BeautifulSoup
This commit is contained in:
pare
93e1fa2e08
commit
4f27e13c3a
S'han modificat 2 arxius amb 17 adicions i 10 eliminacions
25
mastotuit.py
25
mastotuit.py
|
@ -1,6 +1,6 @@
|
||||||
import os
|
import os
|
||||||
import feedparser
|
import feedparser
|
||||||
import html2text
|
from bs4 import BeautifulSoup
|
||||||
from mastodon import Mastodon
|
from mastodon import Mastodon
|
||||||
import psycopg2
|
import psycopg2
|
||||||
import sys
|
import sys
|
||||||
|
@ -17,14 +17,21 @@ logger = logging.getLogger()
|
||||||
|
|
||||||
def get_toot_text(title):
|
def get_toot_text(title):
|
||||||
|
|
||||||
html2text.hn = lambda _:0
|
soup = BeautifulSoup(title, features='html.parser')
|
||||||
h = html2text.HTML2Text()
|
|
||||||
h.images_to_alt = True
|
delimiter = '###' # unambiguous string
|
||||||
h.single_line_break = True
|
|
||||||
h.ignore_emphasis = True
|
for line_break in soup.findAll('br'): # loop through line break tags
|
||||||
h.ignore_links = True
|
|
||||||
h.ignore_tables = True
|
line_break.replaceWith(delimiter) # replace br tags with delimiter
|
||||||
tuit_text = h.handle(title)
|
|
||||||
|
tuit_text_str = soup.get_text().split(delimiter) # get list of strings
|
||||||
|
|
||||||
|
tuit_text = ''
|
||||||
|
|
||||||
|
for line in tuit_text_str:
|
||||||
|
|
||||||
|
tuit_text += f'{line}\n'
|
||||||
|
|
||||||
return tuit_text
|
return tuit_text
|
||||||
|
|
||||||
|
|
|
@ -1,7 +1,7 @@
|
||||||
wheel>=0.37.0
|
wheel>=0.37.0
|
||||||
psycopg2>=2.9.1
|
psycopg2>=2.9.1
|
||||||
feedparser>=6.0.8
|
feedparser>=6.0.8
|
||||||
html2text>=2020.1.16
|
bs4>=4.10.0
|
||||||
Mastodon.py>=1.5.1
|
Mastodon.py>=1.5.1
|
||||||
tweepy>=4.5.0
|
tweepy>=4.5.0
|
||||||
filetype>=1.0.8
|
filetype>=1.0.8
|
||||||
|
|
Loading…
Referencia en una nova incidència