From: Bryan <btbonval@gmail.com>
Date: Sat, 18 Jan 2014 05:07:41 +0000 (-0500)
Subject: beautifulsoup no longer adds extraneous whitespace, so the formatting is no longer... 
X-Git-Tag: release-20150131~227
X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=6fb64b36e92f52579ca5b310638f36d66cba35b1;p=oweals%2Fkarmaworld.git

beautifulsoup no longer adds extraneous whitespace, so the formatting is no longer broken on pdf2html files. closes #290
---

diff --git a/karmaworld/apps/notes/gdrive.py b/karmaworld/apps/notes/gdrive.py
index f39ab95..6ad9f38 100644
--- a/karmaworld/apps/notes/gdrive.py
+++ b/karmaworld/apps/notes/gdrive.py
@@ -225,8 +225,8 @@ def convert_raw_document(raw_document, user=None, session_key=None):
         html = pdf2html(content_dict['pdf'])
     elif 'html' in content_dict and content_dict['html']:
         html = content_dict['html']
-        # cleanup the HTML
-        html = note.filter_html(html)
+    # cleanup the HTML
+    html = note.filter_html(html)
 
     # upload the HTML file to static host if it is not already there
     note.send_to_s3(html, do_save=False)
diff --git a/karmaworld/apps/notes/models.py b/karmaworld/apps/notes/models.py
index 7ce782b..93ab3f3 100644
--- a/karmaworld/apps/notes/models.py
+++ b/karmaworld/apps/notes/models.py
@@ -285,17 +285,13 @@ class Note(Document):
             # if there was no HTML, return an empty string
             return ''
 
-        # TODO adding from_encoding (if known) will speed up the process
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#encodings
         soup = BS(html)
         # Iterate through filters, applying all to the soup object.
         for soupfilter in (
           self.sanitize_anchor_html,
         ):
             soup = soupfilter(soup)
-        # Return BeautifulSoup cleaned up HTML in UTF-8
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#output-encoding
-        return soup.prettify("utf-8")
+        return str(soup)
 
     def sanitize_anchor_html(self, soup):
         """