beautifulsoup no longer adds extraneous whitespace, so the formatting is no longer...
authorBryan <btbonval@gmail.com>
Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
committerBryan <btbonval@gmail.com>
Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
karmaworld/apps/notes/gdrive.py
karmaworld/apps/notes/models.py

index f39ab958ade96ee78c27408e185aee7419481bbd..6ad9f380a2741aa61f85a714538f1add8b4d8418 100644 (file)
@@ -225,8 +225,8 @@ def convert_raw_document(raw_document, user=None, session_key=None):
         html = pdf2html(content_dict['pdf'])
     elif 'html' in content_dict and content_dict['html']:
         html = content_dict['html']
-        # cleanup the HTML
-        html = note.filter_html(html)
+    # cleanup the HTML
+    html = note.filter_html(html)
 
     # upload the HTML file to static host if it is not already there
     note.send_to_s3(html, do_save=False)
index 7ce782b1431c4e6db06934dd3a19ba10b697d40a..93ab3f31f91d92aceab812fc9f353717ec873fcc 100644 (file)
@@ -285,17 +285,13 @@ class Note(Document):
             # if there was no HTML, return an empty string
             return ''
 
-        # TODO adding from_encoding (if known) will speed up the process
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#encodings
         soup = BS(html)
         # Iterate through filters, applying all to the soup object.
         for soupfilter in (
           self.sanitize_anchor_html,
         ):
             soup = soupfilter(soup)
-        # Return BeautifulSoup cleaned up HTML in UTF-8
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#output-encoding
-        return soup.prettify("utf-8")
+        return str(soup)
 
     def sanitize_anchor_html(self, soup):
         """