Move to html2text
authorCharles Connell <charles@connells.org>
Thu, 2 Jan 2014 17:16:39 +0000 (12:16 -0500)
committerCharles Connell <charles@connells.org>
Thu, 2 Jan 2014 17:16:39 +0000 (12:16 -0500)
karmaworld/apps/notes/management/commands/strip_html_to_text.py
reqs/common.txt

index a74d4ceef7e6bce0d7199fc1bf4c50fe9bd9297e..3c37b6294b87cfea37712756d39297ae6dd62f6e 100644 (file)
@@ -2,7 +2,7 @@
 # -*- coding:utf8 -*-
 # Copyright (C) 2012  FinalsClub Foundation
 
-from lxml.html import fromstring
+import html2text
 
 from django.core.management.base import BaseCommand
 from apps.notes.models import Note
@@ -19,7 +19,7 @@ class Command(BaseCommand):
         cleaned_notes = 0
         for note in notes:
             #TODO: find style tags and drop them and their contents first
-            note.text = fromstring(note.html).text_content()
+            note.text = html2text.html2text(note.html)
             note.save()
             cleaned_notes += 1
         self.stdout.write('Processed %s notes' % cleaned_notes)
index bb5b20c89d129ffa79b0adc6c29eaa2e98b1206b..9bdfd57b81b0a838c04c49712f82185fdd7e403f 100644 (file)
@@ -19,3 +19,4 @@ pyopenssl
 python-twitter
 gdshortener
 git+https://github.com/flaptor/indextank-py.git
+html2text