+ Apply all sanitizing filters to HTML.
+ Takes in HTML string and outputs HTML string.
+ """
+ # Fun fact: This could be made into a static method.
+ if not html or not len(html):
+ # if there was no HTML, return an empty string
+ return ''
+
+ # TODO adding from_encoding (if known) will speed up the process
+ # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#encodings
+ soup = BS(html)
+ # Iterate through filters, applying all to the soup object.
+ for soupfilter in (
+ self.sanitize_anchor_html,
+ ):
+ soup = soupfilter(soup)
+ # Return BeautifulSoup cleaned up HTML in UTF-8
+ # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#output-encoding
+ return soup.prettify()
+
+ def sanitize_anchor_html(self, soup):
+ """
+ Filter the given BeautifulSoup obj by adding target=_blank to all
+ anchor tags.
+ Returns BeautifulSoup obj.
+ """
+ # Fun fact: This could be made into a static method.
+ # Find all a tags in the HTML
+ a_tags = soup.find_all('a')
+ if not a_tags or not len(a_tags):
+ # nothing to process.
+ return soup
+