Correct comments and method sigs for sanitizer
authorCharlie DeTar <cfd@media.mit.edu>
Sat, 31 Jan 2015 19:34:00 +0000 (12:34 -0700)
committerBryan <btbonval@gmail.com>
Fri, 27 Feb 2015 01:08:12 +0000 (20:08 -0500)
karmaworld/apps/notes/sanitizer.py

index d8372c604fc26fa78d5d6052737b9e6a3818d5d3..885218e6207825164569a712555bafbf17b03654 100644 (file)
@@ -7,12 +7,9 @@ def _canonical_link_predicate(tag):
         tag.has_attr('rel') and \
         u'canonical' in tag['rel']
 
-def sanitize_html(raw_html, canonical_rel=None):
+def sanitize_html(raw_html):
     """
-    Arguments:
-
-    unclean: raw html to be cleaned
-    canonical_rel: optional fully qualified URL to set as canonical link.
+    Sanitize the given raw_html.
     """
     # Strip tags to the few that we like
     clean = bleach.clean(raw_html,
@@ -27,14 +24,19 @@ def sanitize_html(raw_html, canonical_rel=None):
     ])
     return clean
 
-def set_canonical_rel(raw_html, canonical_rel):
+def set_canonical_rel(raw_html, href):
+    """
+    Add or update <link rel='canonical'...> in the given html to the given
+    href. Note that this has the side effect of appending html/head/body tags
+    to the given html fragment if it doesn't already have them.
+    """
     soup = BeautifulSoup(raw_html)
     canonical_tags = soup.find_all(_canonical_link_predicate)
     if canonical_tags:
         for tag in canonical_tags:
-            tag['href'] = canonical_rel
+            tag['href'] = href
     else:
-        new_tag = soup.new_tag('link', rel='canonical', href=canonical_rel)
+        new_tag = soup.new_tag('link', rel='canonical', href=href)
         head = soup.find('head')
         head.append(new_tag)
     return unicode(soup)