From: Charles Connell Date: Thu, 2 Jan 2014 21:17:10 +0000 (-0500) Subject: Better plain text output X-Git-Tag: release-20150131~326 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=7280103e08da7b66d693b992145beb7679f8e223;p=oweals%2Fkarmaworld.git Better plain text output --- diff --git a/karmaworld/apps/notes/management/commands/strip_html_to_text.py b/karmaworld/apps/notes/management/commands/strip_html_to_text.py index 3c37b62..bf32b31 100644 --- a/karmaworld/apps/notes/management/commands/strip_html_to_text.py +++ b/karmaworld/apps/notes/management/commands/strip_html_to_text.py @@ -3,7 +3,6 @@ # Copyright (C) 2012 FinalsClub Foundation import html2text - from django.core.management.base import BaseCommand from apps.notes.models import Note @@ -18,9 +17,16 @@ class Command(BaseCommand): notes = Note.objects.filter(html__isnull=False).filter(text__isnull=True) cleaned_notes = 0 for note in notes: - #TODO: find style tags and drop them and their contents first - note.text = html2text.html2text(note.html) - note.save() - cleaned_notes += 1 - self.stdout.write('Processed %s notes' % cleaned_notes) + try: + h = html2text.HTML2Text() + h.ignore_links = True + h.ignore_images = True + note.text = h.handle(note.html) + note.save() + cleaned_notes += 1 + except Exception, e: + print note + print e + continue + print 'Processed %s notes' % cleaned_notes