From 7b61d0712b486ec27c770c84b7e4ae016b6e7591 Mon Sep 17 00:00:00 2001 From: Bryan Date: Wed, 15 Jan 2014 22:37:56 -0500 Subject: [PATCH] Fixed upload to S3, moved into Note, removed copy pasta --- karmaworld/apps/notes/gdrive.py | 19 +------ .../notes/management/commands/populate_s3.py | 50 ++----------------- karmaworld/apps/notes/models.py | 36 +++++++++++++ 3 files changed, 41 insertions(+), 64 deletions(-) diff --git a/karmaworld/apps/notes/gdrive.py b/karmaworld/apps/notes/gdrive.py index 42153df..6ad9f38 100644 --- a/karmaworld/apps/notes/gdrive.py +++ b/karmaworld/apps/notes/gdrive.py @@ -6,7 +6,6 @@ import datetime from django.contrib.auth.models import User from django.contrib.sessions.backends.db import SessionStore from django.core.exceptions import ObjectDoesNotExist -from django.core.files.storage import default_storage import os import subprocess import tempfile @@ -15,7 +14,6 @@ import magic import re import json import time -from cStringIO import StringIO import httplib2 from apiclient.discovery import build @@ -231,22 +229,7 @@ def convert_raw_document(raw_document, user=None, session_key=None): html = note.filter_html(html) # upload the HTML file to static host if it is not already there - filepath = note.get_relative_s3_path() - if not default_storage.exists(filepath): - # This is a pretty ugly hackified answer to some s3boto shortcomings - # and some decent default settings chosen by django-storages. - - # S3 upload wants a file-like object. - htmlflo = StringIO(html) - # Create the new key (key == filename in S3 bucket) - newkey = default_storage.bucket.new_key(filepath) - # Upload data! - newkey.send_file(htmlflo) - if not newkey.exists(): - raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey))) - else: - # Mark this note as available from the static host - note.static_html = True + note.send_to_s3(html, do_save=False) note.text = content_dict['text'] diff --git a/karmaworld/apps/notes/management/commands/populate_s3.py b/karmaworld/apps/notes/management/commands/populate_s3.py index 8cc19f4..ff4d971 100644 --- a/karmaworld/apps/notes/management/commands/populate_s3.py +++ b/karmaworld/apps/notes/management/commands/populate_s3.py @@ -22,52 +22,10 @@ class Command(BaseCommand): for note in Note.objects.iterator(): if note.static_html: # don't reprocess notes that are already on S3. - print "Skipping {0}".format(str(note)) + print "Skipping pre-uploaded {0}".format(str(note)) continue - filepath = note.get_relative_s3_path() - if default_storage.exists(filepath): - # HTML file is already uploaded if its slug is already there. - note.static_html = True - note.save() - print "Marking {0} as uploaded.".format(filepath) - continue - - # Copy pasta! - - # This is a pretty ugly hackified answer to some s3boto shortcomings - # and some decent default settings chosen by django-storages. - - print "Processing {0}".format(filepath) + # grab the html from inside the note and process it html = note.filter_html(note.html) - # S3 upload wants a file-like object. - htmlflo = StringIO(html) - # Create the new key (key == filename in S3 bucket) - newkey = default_storage.bucket.new_key(filepath) - # Upload data! - newkey.send_file(htmlflo) - - # Make sure the upload went through - if not newkey.exists(): - # oh well. log it and continue on. - print 'Unable to find {0}'.format(str(newkey)) - continue - - # Local HTML checksum - htmlflo.seek(0) - htmlflo_check = hashlib.sha1(htmlflo.read()).hexdigest() - - # Remote HTML checksum - with default_storage.open(filepath, 'r') as s3file: - s3_check = hashlib.sha1(s3file.read()).hexdigest() - - if htmlflo_check == s3_check: - # Mark this note as available from the static host - note.static_html = True - # Scrub its HTML to clean up the database. - note.html = '' - note.save() - print "Completed upload of {0}".format(filepath) - else: - print "Checksum mismatch for {0}:\n{1}\n{2}\n".format(filepath, - htmlflo_check, s3_check) + # push clean HTML to S3 + note.send_to_s3(html) diff --git a/karmaworld/apps/notes/models.py b/karmaworld/apps/notes/models.py index 8cddeee..65365c8 100644 --- a/karmaworld/apps/notes/models.py +++ b/karmaworld/apps/notes/models.py @@ -12,6 +12,7 @@ import logging from allauth.account.signals import user_logged_in from django.contrib.auth.models import User from django.core.exceptions import ObjectDoesNotExist +from django.core.files.storage import default_storage from django.db.models import SET_NULL from django.db.models.signals import post_save, post_delete, pre_save from django.dispatch import receiver @@ -215,6 +216,41 @@ class Note(Document): # filename and its pretty well guaranteed to be there. return 'html/{0}.html'.format(self.slug) + def send_to_s3(self, html, do_save=True): + """ + Push the given HTML up to S3 for this Note. + Set do_save to False if the note will be saved outside this call. + """ + # do nothing if HTML is empty. + if not html or not len(html): + return + # do nothing if already uploaded. + # Maybe run checksums if possible to confirm its really done? + # (but then you gotta wonder was the original correct or is the new + # one correct) + if note.static_html: + return + # upload the HTML file to static host if it is not already there + filepath = self.get_relative_s3_path() + if not default_storage.exists(filepath): + # This is a pretty ugly hackified answer to some s3boto shortcomings + # and some decent default settings chosen by django-storages. + + # Create the new key (key == filename in S3 bucket) + newkey = default_storage.bucket.new_key(filepath) + # Upload data! + newkey.set_contents_from_string(html) + if not newkey.exists(): + raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey))) + # If the code reaches here, either: + # filepath exists on S3 but static_html is not marked. + # or + # file was just uploaded successfully to filepath + # Regardless, set note as uploaded. + note.static_html = True + if do_save: + note.save() + def get_absolute_url(self): """ Resolve note url, use 'note' route and slug if slug otherwise use note.id -- 2.25.1