Changed to BeautifulSoup

This commit is contained in:
spla 2022-05-09 17:52:16 +02:00
pare 93e1fa2e08
commit 4f27e13c3a
S'han modificat 2 arxius amb 17 adicions i 10 eliminacions

Veure arxiu

@ -1,6 +1,6 @@
import os
import feedparser
import html2text
from bs4 import BeautifulSoup
from mastodon import Mastodon
import psycopg2
import sys
@ -17,14 +17,21 @@ logger = logging.getLogger()
def get_toot_text(title):
html2text.hn = lambda _:0
h = html2text.HTML2Text()
h.images_to_alt = True
h.single_line_break = True
h.ignore_emphasis = True
h.ignore_links = True
h.ignore_tables = True
tuit_text = h.handle(title)
soup = BeautifulSoup(title, features='html.parser')
delimiter = '###' # unambiguous string
for line_break in soup.findAll('br'): # loop through line break tags
line_break.replaceWith(delimiter) # replace br tags with delimiter
tuit_text_str = soup.get_text().split(delimiter) # get list of strings
tuit_text = ''
for line in tuit_text_str:
tuit_text += f'{line}\n'
return tuit_text

Veure arxiu

@ -1,7 +1,7 @@
wheel>=0.37.0
psycopg2>=2.9.1
feedparser>=6.0.8
html2text>=2020.1.16
bs4>=4.10.0
Mastodon.py>=1.5.1
tweepy>=4.5.0
filetype>=1.0.8