mirror of
https://github.com/fergalmoran/dss.git
synced 2025-12-22 09:38:18 +00:00
18 lines
472 B
Python
18 lines
472 B
Python
from HTMLParser import HTMLParser
|
|
|
|
class HTMLStripper(HTMLParser):
|
|
"""
|
|
Class that cleans HTML, removing all tags and HTML entities.
|
|
"""
|
|
def __init__(self):
|
|
self.reset()
|
|
self.fed = []
|
|
def handle_data(self, d):
|
|
self.fed.append(d)
|
|
def get_data(self):
|
|
return ''.join(self.fed)
|
|
def strip(self, d):
|
|
self.reset()
|
|
self.fed = []
|
|
self.feed(d)
|
|
return self.get_data().strip() |