beautifulsoup no longer adds extraneous whitespace, so the formatting is no longer...

author Bryan <btbonval@gmail.com>

Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)

committer Bryan <btbonval@gmail.com>

Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
author Bryan <btbonval@gmail.com>
Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
committer Bryan <btbonval@gmail.com>
Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
diff --git a/karmaworld/apps/notes/gdrive.py b/karmaworld/apps/notes/gdrive.py

index f39ab958ade96ee78c27408e185aee7419481bbd..6ad9f380a2741aa61f85a714538f1add8b4d8418 100644 (file)
--- a/karmaworld/apps/notes/gdrive.py
+++ b/karmaworld/apps/notes/gdrive.py
@@ -225,8 +225,8 @@ def convert_raw_document(raw_document, user=None, session_key=None):
          html = pdf2html(content_dict['pdf'])
      elif 'html' in content_dict and content_dict['html']:
          html = content_dict['html']
-        # cleanup the HTML
-        html = note.filter_html(html)
+    # cleanup the HTML
+    html = note.filter_html(html)
  
      # upload the HTML file to static host if it is not already there
      note.send_to_s3(html, do_save=False)
diff --git a/karmaworld/apps/notes/models.py b/karmaworld/apps/notes/models.py

index 7ce782b1431c4e6db06934dd3a19ba10b697d40a..93ab3f31f91d92aceab812fc9f353717ec873fcc 100644 (file)
--- a/karmaworld/apps/notes/models.py
+++ b/karmaworld/apps/notes/models.py
@@ -285,17 +285,13 @@ class Note(Document):
              # if there was no HTML, return an empty string
              return ''
  
-        # TODO adding from_encoding (if known) will speed up the process
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#encodings
          soup = BS(html)
          # Iterate through filters, applying all to the soup object.
          for soupfilter in (
            self.sanitize_anchor_html,
          ):
              soup = soupfilter(soup)
-        # Return BeautifulSoup cleaned up HTML in UTF-8
-        # http://www.crummy.com/software/BeautifulSoup/bs4/doc/#output-encoding
-        return soup.prettify("utf-8")
+        return str(soup)
  
      def sanitize_anchor_html(self, soup):
          """
author	Bryan <btbonval@gmail.com>
	Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
committer	Bryan <btbonval@gmail.com>
	Sat, 18 Jan 2014 05:07:41 +0000 (00:07 -0500)
karmaworld/apps/notes/gdrive.py		patch \| blob \| history
karmaworld/apps/notes/models.py		patch \| blob \| history