From bd3c439f1aed1fa0730f4e6eb2f57650906c78ac Mon Sep 17 00:00:00 2001 From: Charlie DeTar Date: Sat, 31 Jan 2015 12:34:00 -0700 Subject: [PATCH] Correct comments and method sigs for sanitizer --- karmaworld/apps/notes/sanitizer.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/karmaworld/apps/notes/sanitizer.py b/karmaworld/apps/notes/sanitizer.py index d8372c6..885218e 100644 --- a/karmaworld/apps/notes/sanitizer.py +++ b/karmaworld/apps/notes/sanitizer.py @@ -7,12 +7,9 @@ def _canonical_link_predicate(tag): tag.has_attr('rel') and \ u'canonical' in tag['rel'] -def sanitize_html(raw_html, canonical_rel=None): +def sanitize_html(raw_html): """ - Arguments: - - unclean: raw html to be cleaned - canonical_rel: optional fully qualified URL to set as canonical link. + Sanitize the given raw_html. """ # Strip tags to the few that we like clean = bleach.clean(raw_html, @@ -27,14 +24,19 @@ def sanitize_html(raw_html, canonical_rel=None): ]) return clean -def set_canonical_rel(raw_html, canonical_rel): +def set_canonical_rel(raw_html, href): + """ + Add or update in the given html to the given + href. Note that this has the side effect of appending html/head/body tags + to the given html fragment if it doesn't already have them. + """ soup = BeautifulSoup(raw_html) canonical_tags = soup.find_all(_canonical_link_predicate) if canonical_tags: for tag in canonical_tags: - tag['href'] = canonical_rel + tag['href'] = href else: - new_tag = soup.new_tag('link', rel='canonical', href=canonical_rel) + new_tag = soup.new_tag('link', rel='canonical', href=href) head = soup.find('head') head.append(new_tag) return unicode(soup) -- 2.25.1