From 6fb64b36e92f52579ca5b310638f36d66cba35b1 Mon Sep 17 00:00:00 2001
From: Bryan <btbonval@gmail.com>
Date: Sat, 18 Jan 2014 00:07:41 -0500
Subject: [PATCH] beautifulsoup no longer adds extraneous whitespace, so the
 formatting is no longer broken on pdf2html files. closes #290

---
 karmaworld/apps/notes/gdrive.py | 4 ++--
 karmaworld/apps/notes/models.py | 6 +-----
 2 files changed, 3 insertions(+), 7 deletions(-)

diff --git a/karmaworld/apps/notes/gdrive.py b/karmaworld/apps/notes/gdrive.py
index f39ab95..6ad9f38 100644
--- a/karmaworld/apps/notes/gdrive.py
+++ b/karmaworld/apps/notes/gdrive.py
@@ -225,8 +225,8 @@ def convert_raw_document(raw_document, user=None, session_key=None):
         html = pdf2html(content_dict['pdf'])
     elif 'html' in content_dict and content_dict['html']:
         html = content_dict['html']
-        # cleanup the HTML
-        html = note.filter_html(html)
+    # cleanup the HTML
+    html = note.filter_html(html)
 
     # upload the HTML file to static host if it is not already there
     note.send_to_s3(html, do_save=False)
diff --git a/karmaworld/apps/notes/models.py b/karmaworld/apps/notes/models.py
index 7ce782b..93ab3f3 100644
--- a/karmaworld/apps/notes/models.py
+++ b/karmaworld/apps/notes/models.py
@@ -285,17 +285,13 @@ class Note(Document):
             # if there was no HTML, return an empty string
             return ''
 
-        # TODO adding from_encoding (if known) will speed up the process
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#encodings
         soup = BS(html)
         # Iterate through filters, applying all to the soup object.
         for soupfilter in (
           self.sanitize_anchor_html,
         ):
             soup = soupfilter(soup)
-        # Return BeautifulSoup cleaned up HTML in UTF-8
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#output-encoding
-        return soup.prettify("utf-8")
+        return str(soup)
 
     def sanitize_anchor_html(self, soup):
         """
-- 
2.25.1