From 16da4e6117e72dd591e9822e4fa44a62b0438dec Mon Sep 17 00:00:00 2001 From: Charles Connell Date: Thu, 2 Jan 2014 12:16:39 -0500 Subject: [PATCH] Move to html2text --- .../apps/notes/management/commands/strip_html_to_text.py | 4 ++-- reqs/common.txt | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/karmaworld/apps/notes/management/commands/strip_html_to_text.py b/karmaworld/apps/notes/management/commands/strip_html_to_text.py index a74d4ce..3c37b62 100644 --- a/karmaworld/apps/notes/management/commands/strip_html_to_text.py +++ b/karmaworld/apps/notes/management/commands/strip_html_to_text.py @@ -2,7 +2,7 @@ # -*- coding:utf8 -*- # Copyright (C) 2012 FinalsClub Foundation -from lxml.html import fromstring +import html2text from django.core.management.base import BaseCommand from apps.notes.models import Note @@ -19,7 +19,7 @@ class Command(BaseCommand): cleaned_notes = 0 for note in notes: #TODO: find style tags and drop them and their contents first - note.text = fromstring(note.html).text_content() + note.text = html2text.html2text(note.html) note.save() cleaned_notes += 1 self.stdout.write('Processed %s notes' % cleaned_notes) diff --git a/reqs/common.txt b/reqs/common.txt index bb5b20c..9bdfd57 100644 --- a/reqs/common.txt +++ b/reqs/common.txt @@ -19,3 +19,4 @@ pyopenssl python-twitter gdshortener git+https://github.com/flaptor/indextank-py.git +html2text -- 2.25.1