Cleaning code

This commit is contained in:
spla 2021-10-08 14:30:00 +02:00
pare 9e8fa9ccb1
commit 89f16e8067

Veure arxiu

@ -14,6 +14,40 @@ import pdb
logger = logging.getLogger() logger = logging.getLogger()
def get_toot(title):
soup = BeautifulSoup(title, 'html.parser')
toot_text = soup.get_text()
sub_str = 'http'
find_link = toot_text.find(sub_str)
if find_link != -1:
tuit_text = toot_text[:toot_text.index(sub_str)]
else:
tuit_text = toot_text
links_lst = ''
for links in soup.find_all('a'):
find_tag = links.get('href').find('/tags/')
if find_tag == -1:
links_lst += links.get('href')
if len(links_lst) > 0:
last_text = toot_text[len(tuit_text) + len(links_lst):]
else:
last_text = ''
tuit_text = f'{tuit_text} {links_lst} {last_text}'
return tuit_text
def get_tweet_id(toot_id): def get_tweet_id(toot_id):
tweet_id = 0 tweet_id = 0
@ -224,35 +258,7 @@ if __name__ == '__main__':
if publish: if publish:
soup = BeautifulSoup(title, 'html.parser') tuit_text = get_toot(title)
toot_text = soup.get_text()
sub_str = 'http'
find_link = toot_text.find(sub_str)
if find_link != -1:
tuit_text = toot_text[:toot_text.index(sub_str)]
else:
tuit_text = toot_text
links_lst = ''
for links in soup.find_all('a'):
find_tag = links.get('href').find('/tags/')
if find_tag == -1:
links_lst += links.get('href')
if len(links_lst) > 0:
last_text = toot_text[len(tuit_text) + len(links_lst):]
else:
last_text = ''
tuit_text = f'{tuit_text} {links_lst} {last_text}'
print("Tooting...") print("Tooting...")
print(tuit_text) print(tuit_text)
@ -351,8 +357,6 @@ if __name__ == '__main__':
print('\n') print('\n')
sys.exit(err) sys.exit(err)
time.sleep(2)
######################################################### #########################################################
insert_line = 'INSERT INTO feeds(link) VALUES (%s)' insert_line = 'INSERT INTO feeds(link) VALUES (%s)'
@ -407,6 +411,10 @@ if __name__ == '__main__':
conn.close() conn.close()
#########################################################
time.sleep(2)
else: else:
print("Any new feeds") print("Any new feeds")