From e48ec26f293b045f2cc2a7183ead322c772a6fac Mon Sep 17 00:00:00 2001 From: Seth Woodworth Date: Tue, 24 Sep 2013 13:53:16 +0000 Subject: [PATCH] Large in-progress checkin for filepicker integration. includes model form view js and html changes across the board. Also includes a lot of debug print statements --- karmaworld/apps/document_upload/forms.py | 2 +- karmaworld/apps/document_upload/models.py | 9 +++- karmaworld/apps/document_upload/tasks.py | 10 ++-- karmaworld/apps/document_upload/views.py | 21 ++++++-- karmaworld/apps/notes/gdrive.py | 55 ++++++++++++++------ karmaworld/apps/notes/models.py | 34 ++++++++++++ karmaworld/templates/partial/filepicker.html | 14 ++++- 7 files changed, 119 insertions(+), 26 deletions(-) diff --git a/karmaworld/apps/document_upload/forms.py b/karmaworld/apps/document_upload/forms.py index ae790c9..1484367 100644 --- a/karmaworld/apps/document_upload/forms.py +++ b/karmaworld/apps/document_upload/forms.py @@ -9,4 +9,4 @@ from karmaworld.apps.document_upload.models import RawDocument class RawDocumentForm(ModelForm): class Meta: model = RawDocument - fields = ('name', 'tags', 'course', 'fp_file') + fields = ('name', 'tags', 'course', 'fp_file', 'mimetype') diff --git a/karmaworld/apps/document_upload/models.py b/karmaworld/apps/document_upload/models.py index 766698e..876ee41 100644 --- a/karmaworld/apps/document_upload/models.py +++ b/karmaworld/apps/document_upload/models.py @@ -25,6 +25,7 @@ class RawDocument(Document): def convert_to_note(self): """ polymorph this object into a note.models.Note object """ + print "begin convert_to_note" note = Note( course=self.course, name=self.name, @@ -35,9 +36,13 @@ class RawDocument(Document): note.save() for tag in self.tags.all(): note.tags.add(tag) + print "finish convert_to_note" return note def save(self, *args, **kwargs): - if not is_processed: - tasks.process_raw_document(self) + print "`RawDocument.save()`" super(RawDocument, self).save(*args, **kwargs) + if not self.is_processed: + print "\t document not processed yet, doing that now" + tasks.process_raw_document.delay(self) + print "\t this arrow should point to the word now ^" diff --git a/karmaworld/apps/document_upload/tasks.py b/karmaworld/apps/document_upload/tasks.py index df7021e..e863085 100644 --- a/karmaworld/apps/document_upload/tasks.py +++ b/karmaworld/apps/document_upload/tasks.py @@ -2,11 +2,15 @@ # -*- coding:utf8 -*- # Copyright (C) 2013 FinalsClub Foundation -from celery.task import task -from karmaworld.apps.notes.gdrive import convert_with_google_drive +from celery import Celery +from celery import task +from karmaworld.apps.notes.gdrive import convert_raw_document -@task +#@Celery.task() +@task() def process_raw_document(raw_document): """ Process a RawDocument instance in to a Note instance """ + print "="*80 + print "this line should be deferred and only printed by celery" convert_raw_document(raw_document) diff --git a/karmaworld/apps/document_upload/views.py b/karmaworld/apps/document_upload/views.py index 4352ed2..a72d271 100644 --- a/karmaworld/apps/document_upload/views.py +++ b/karmaworld/apps/document_upload/views.py @@ -13,11 +13,26 @@ from karmaworld.apps.document_upload.models import RawDocument from karmaworld.apps.document_upload.forms import RawDocumentForm def save_fp_upload(request): + """ ajax endpoint for saving a FilePicker uploaded file form + """ r_d_f = RawDocumentForm(request.POST) if r_d_f.is_valid(): - model_instance = r_d_f.save(commit=False) - model_instance.uploaded_at = datetime.datetime.utcnow() - model_instance.save() + raw_document = r_d_f.save(commit=False) + + print request.POST.keys() + time_a = datetime.datetime.now() + raw_document.fp_file = request.POST['fp_file'] + + time_b = datetime.datetime.now() + delta = time_b - time_a + raw_document.ip = request.META['REMOTE_ADDR'] + raw_document.uploaded_at = datetime.datetime.utcnow() + time_c = datetime.datetime.now() + # note that .save() has the side-effect of kicking of a celery processing task + raw_document.save() + time_d = datetime.datetime.now() + delta = time_d - time_c + print "d\t%s" % delta return HttpResponse({'success'}) else: return HttpResponse(r_d_f.errors, status=400) diff --git a/karmaworld/apps/notes/gdrive.py b/karmaworld/apps/notes/gdrive.py index 105516b..5b8760e 100644 --- a/karmaworld/apps/notes/gdrive.py +++ b/karmaworld/apps/notes/gdrive.py @@ -11,6 +11,7 @@ import time import httplib2 from apiclient.discovery import build from apiclient.http import MediaFileUpload +from apiclient.http import MediaInMemoryUpload from django.conf import settings from django.core.files import File from oauth2client.client import flow_from_clientsecrets @@ -27,6 +28,8 @@ except: EXT_TO_MIME = {'.docx': 'application/msword'} +PPT_MIMETYPES = ['application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation'] + def build_flow(): """ Create an oauth2 autentication object with our preferred details """ scopes = [ @@ -84,12 +87,16 @@ def check_and_refresh(creds, auth): auth.save() return creds, auth -def download_from_gdrive(file_dict, http, extension): +def download_from_gdrive(file_dict, http, extension=None, mimetype=None): """ get urls from file_dict and download contextual files from google """ download_urls = {} download_urls['text'] = file_dict[u'exportLinks']['text/plain'] - if extension.lower() in ['.ppt', 'pptx']: + if extension: + extension = extension.lower() + + if extension in ['.ppt', 'pptx'] \ + or mimetype in PPT_MIMETYPES: download_urls['pdf'] = file_dict[u'exportLinks']['application/pdf'] else: download_urls['html'] = file_dict[u'exportLinks']['text/html'] @@ -109,11 +116,19 @@ def download_from_gdrive(file_dict, http, extension): return content_dict -def upload_to_gdrive(service, media, filename, extension): +def upload_to_gdrive(service, media, filename, extension=None, mimetype=None): """ take a gdrive service object, and a media wrapper and upload to gdrive - returns a file_dict """ + returns a file_dict + You must provide an `extension` or `mimetype` + """ _resource = {'title': filename} - if extension.lower() in ['.pdf', '.jpeg', '.jpg', '.png']: + + # clean up extensions for type checking + if extension: + extension = extension.lower() + + if extension in ['.pdf', '.jpeg', '.jpg', '.png'] \ + or mimetype in ['application/pdf']: # include OCR on ocr-able files file_dict = service.files().insert(body=_resource, media_body=media, convert=True, ocr=True).execute() @@ -206,17 +221,23 @@ def convert_with_google_drive(note): def convert_raw_document(raw_document): """ Upload a raw document to google drive and get a Note back """ + fp_file = raw_document.get_file() + # download the file to memory # get the file's mimetype - file_type, _ = mimetypes=guess_type(raw_document.fp_file.path) + #file_type, _ = mimetypes.guess_type(raw_document.fp_file.path) # get the file extension - filename, extension = os.path.splitext(raw_document.fp_file.path) - - if file_type == None: - media = MediaFileUpload(note.note_file.path, + #filename, extension = os.path.splitext(raw_document.fp_file.path) + filename = raw_document.name + print "this is the mimetype of the document to check:" + print raw_document.mimetype + print "" + + if raw_document.mimetype == None: + media = MediaInMemoryUpload(fp_file.read(), chunksize=1024*1024, resumable=True) else: - media = MediaFileUpload(note.note_file.path, mimetype=file_type, + media = MediaInMemoryUpload(fp_file.read(), mimetype=raw_document.mimetype, chunksize=1024*1024, resumable=True) auth = DriveAuth.objects.filter(email=GOOGLE_USER).all()[0] @@ -226,14 +247,18 @@ def convert_raw_document(raw_document): service, http = build_api_service(creds) # prepare the upload - file_dict = upload_to_gdrive(service, media, filename, extension) - content_dict = download_from_gdrive(file_dict, http, extension) + file_dict = upload_to_gdrive(service, media, filename, mimetype=raw_document.mimetype) + content_dict = download_from_gdrive(file_dict, http, mimetype=raw_document.mimetype) + + # this should have already happened, lets see why it hasn't + raw_document.save() note = raw_document.convert_to_note() - if extension.lower() == '.pdf': + + if raw_document.mimetype == 'application/pdf': note.file_type = 'pdf' - elif extension.lower() in ['.ppt', '.pptx']: + elif raw_document.mimetype in PPT_MIMETYPES: note.file_type = 'ppt' now = datetime.datetime.utcnow() # create a folder path to store the ppt > pdf file with year and month folders diff --git a/karmaworld/apps/notes/models.py b/karmaworld/apps/notes/models.py index b8ce9ce..63e3a9c 100644 --- a/karmaworld/apps/notes/models.py +++ b/karmaworld/apps/notes/models.py @@ -7,8 +7,11 @@ Contains only the minimum for handling files and their representation """ import datetime +import os +import urllib from django.conf import settings +from django.core.files import File from django.core.files.storage import FileSystemStorage from django.db import models from django.template import defaultfilters @@ -57,8 +60,10 @@ class Document(models.Model): fp_file = django_filepicker.models.FPFileField( upload_to=_choose_upload_to, + storage=fs, \ null=True, blank=True, help_text=u"An uploaded file reference from Filepicker.io") + mimetype = models.CharField(max_length=255, blank=True, null=True) class Meta: abstract = True @@ -78,6 +83,35 @@ class Document(models.Model): self.uploaded_at.day, self.uploaded_at.microsecond) self.slug = _slug + def get_file(self): + """ Downloads the file from filepicker.io and returns a + Django File wrapper object """ + # clean up any old downloads that are still hanging around + if hasattr(self, 'tempfile'): + self.tempfile.close() + delattr(self, 'tempfile') + + if hasattr(self, 'filename'): + # the file might have been moved in the meantime so + # check first + if os.path.exists(self.filename): + os.remove(self.filename) + delattr(self, 'filename') + + # The temporary file will be created in a directory set by the + # environment (TEMP_DIR, TEMP or TMP) + self.filename, header = urllib.urlretrieve(self.fp_file.name) + name = os.path.basename(self.filename) + disposition = header.get('Content-Disposition') + if disposition: + name = disposition.rpartition("filename=")[2].strip('" ') + filename = header.get('X-File-Name') + if filename: + name = filename + + self.tempfile = open(self.filename, 'r') + return File(self.tempfile, name=name) + def save(self, *args, **kwargs): if self.name and not self.slug: self._generate_unique_slug() diff --git a/karmaworld/templates/partial/filepicker.html b/karmaworld/templates/partial/filepicker.html index 4fc90f0..feaa9b9 100644 --- a/karmaworld/templates/partial/filepicker.html +++ b/karmaworld/templates/partial/filepicker.html @@ -47,6 +47,7 @@