mirror of
https://github.com/fergalmoran/dss.api.git
synced 2025-12-22 09:18:13 +00:00
29 lines
549 B
Python
29 lines
549 B
Python
from html.parser import HTMLParser
|
|
|
|
|
|
class HTMLStripper(HTMLParser):
|
|
"""
|
|
Class that cleans HTML, removing all tags and HTML entities.
|
|
"""
|
|
|
|
def __init__(self):
|
|
self.reset()
|
|
self.fed = []
|
|
|
|
def handle_data(self, d):
|
|
self.fed.append(d)
|
|
|
|
def get_data(self):
|
|
return ''.join(self.fed)
|
|
|
|
def strip(self, d):
|
|
self.reset()
|
|
self.fed = []
|
|
self.feed(d)
|
|
return self.get_data().strip()
|
|
|
|
def strip_tags(html):
|
|
s = HTMLStripper()
|
|
s.feed(html)
|
|
return s.get_data()
|