includes model form view js and html changes across the board. Also includes a lot of debug print statements
class RawDocumentForm(ModelForm):
class Meta:
model = RawDocument
- fields = ('name', 'tags', 'course', 'fp_file')
+ fields = ('name', 'tags', 'course', 'fp_file', 'mimetype')
def convert_to_note(self):
""" polymorph this object into a note.models.Note object """
+ print "begin convert_to_note"
note = Note(
course=self.course,
name=self.name,
note.save()
for tag in self.tags.all():
note.tags.add(tag)
+ print "finish convert_to_note"
return note
def save(self, *args, **kwargs):
- if not is_processed:
- tasks.process_raw_document(self)
+ print "`RawDocument.save()`"
super(RawDocument, self).save(*args, **kwargs)
+ if not self.is_processed:
+ print "\t document not processed yet, doing that now"
+ tasks.process_raw_document.delay(self)
+ print "\t this arrow should point to the word now ^"
# -*- coding:utf8 -*-
# Copyright (C) 2013 FinalsClub Foundation
-from celery.task import task
-from karmaworld.apps.notes.gdrive import convert_with_google_drive
+from celery import Celery
+from celery import task
+from karmaworld.apps.notes.gdrive import convert_raw_document
-@task
+#@Celery.task()
+@task()
def process_raw_document(raw_document):
""" Process a RawDocument instance in to a Note instance """
+ print "="*80
+ print "this line should be deferred and only printed by celery"
convert_raw_document(raw_document)
from karmaworld.apps.document_upload.forms import RawDocumentForm
def save_fp_upload(request):
+ """ ajax endpoint for saving a FilePicker uploaded file form
+ """
r_d_f = RawDocumentForm(request.POST)
if r_d_f.is_valid():
- model_instance = r_d_f.save(commit=False)
- model_instance.uploaded_at = datetime.datetime.utcnow()
- model_instance.save()
+ raw_document = r_d_f.save(commit=False)
+
+ print request.POST.keys()
+ time_a = datetime.datetime.now()
+ raw_document.fp_file = request.POST['fp_file']
+
+ time_b = datetime.datetime.now()
+ delta = time_b - time_a
+ raw_document.ip = request.META['REMOTE_ADDR']
+ raw_document.uploaded_at = datetime.datetime.utcnow()
+ time_c = datetime.datetime.now()
+ # note that .save() has the side-effect of kicking of a celery processing task
+ raw_document.save()
+ time_d = datetime.datetime.now()
+ delta = time_d - time_c
+ print "d\t%s" % delta
return HttpResponse({'success'})
else:
return HttpResponse(r_d_f.errors, status=400)
import httplib2
from apiclient.discovery import build
from apiclient.http import MediaFileUpload
+from apiclient.http import MediaInMemoryUpload
from django.conf import settings
from django.core.files import File
from oauth2client.client import flow_from_clientsecrets
EXT_TO_MIME = {'.docx': 'application/msword'}
+PPT_MIMETYPES = ['application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
+
def build_flow():
""" Create an oauth2 autentication object with our preferred details """
scopes = [
auth.save()
return creds, auth
-def download_from_gdrive(file_dict, http, extension):
+def download_from_gdrive(file_dict, http, extension=None, mimetype=None):
""" get urls from file_dict and download contextual files from google """
download_urls = {}
download_urls['text'] = file_dict[u'exportLinks']['text/plain']
- if extension.lower() in ['.ppt', 'pptx']:
+ if extension:
+ extension = extension.lower()
+
+ if extension in ['.ppt', 'pptx'] \
+ or mimetype in PPT_MIMETYPES:
download_urls['pdf'] = file_dict[u'exportLinks']['application/pdf']
else:
download_urls['html'] = file_dict[u'exportLinks']['text/html']
return content_dict
-def upload_to_gdrive(service, media, filename, extension):
+def upload_to_gdrive(service, media, filename, extension=None, mimetype=None):
""" take a gdrive service object, and a media wrapper and upload to gdrive
- returns a file_dict """
+ returns a file_dict
+ You must provide an `extension` or `mimetype`
+ """
_resource = {'title': filename}
- if extension.lower() in ['.pdf', '.jpeg', '.jpg', '.png']:
+
+ # clean up extensions for type checking
+ if extension:
+ extension = extension.lower()
+
+ if extension in ['.pdf', '.jpeg', '.jpg', '.png'] \
+ or mimetype in ['application/pdf']:
# include OCR on ocr-able files
file_dict = service.files().insert(body=_resource, media_body=media, convert=True, ocr=True).execute()
def convert_raw_document(raw_document):
""" Upload a raw document to google drive and get a Note back """
+ fp_file = raw_document.get_file()
+ # download the file to memory
# get the file's mimetype
- file_type, _ = mimetypes=guess_type(raw_document.fp_file.path)
+ #file_type, _ = mimetypes.guess_type(raw_document.fp_file.path)
# get the file extension
- filename, extension = os.path.splitext(raw_document.fp_file.path)
-
- if file_type == None:
- media = MediaFileUpload(note.note_file.path,
+ #filename, extension = os.path.splitext(raw_document.fp_file.path)
+ filename = raw_document.name
+ print "this is the mimetype of the document to check:"
+ print raw_document.mimetype
+ print ""
+
+ if raw_document.mimetype == None:
+ media = MediaInMemoryUpload(fp_file.read(),
chunksize=1024*1024, resumable=True)
else:
- media = MediaFileUpload(note.note_file.path, mimetype=file_type,
+ media = MediaInMemoryUpload(fp_file.read(), mimetype=raw_document.mimetype,
chunksize=1024*1024, resumable=True)
auth = DriveAuth.objects.filter(email=GOOGLE_USER).all()[0]
service, http = build_api_service(creds)
# prepare the upload
- file_dict = upload_to_gdrive(service, media, filename, extension)
- content_dict = download_from_gdrive(file_dict, http, extension)
+ file_dict = upload_to_gdrive(service, media, filename, mimetype=raw_document.mimetype)
+ content_dict = download_from_gdrive(file_dict, http, mimetype=raw_document.mimetype)
+
+ # this should have already happened, lets see why it hasn't
+ raw_document.save()
note = raw_document.convert_to_note()
- if extension.lower() == '.pdf':
+
+ if raw_document.mimetype == 'application/pdf':
note.file_type = 'pdf'
- elif extension.lower() in ['.ppt', '.pptx']:
+ elif raw_document.mimetype in PPT_MIMETYPES:
note.file_type = 'ppt'
now = datetime.datetime.utcnow()
# create a folder path to store the ppt > pdf file with year and month folders
Contains only the minimum for handling files and their representation
"""
import datetime
+import os
+import urllib
from django.conf import settings
+from django.core.files import File
from django.core.files.storage import FileSystemStorage
from django.db import models
from django.template import defaultfilters
fp_file = django_filepicker.models.FPFileField(
upload_to=_choose_upload_to,
+ storage=fs, \
null=True, blank=True,
help_text=u"An uploaded file reference from Filepicker.io")
+ mimetype = models.CharField(max_length=255, blank=True, null=True)
class Meta:
abstract = True
self.uploaded_at.day, self.uploaded_at.microsecond)
self.slug = _slug
+ def get_file(self):
+ """ Downloads the file from filepicker.io and returns a
+ Django File wrapper object """
+ # clean up any old downloads that are still hanging around
+ if hasattr(self, 'tempfile'):
+ self.tempfile.close()
+ delattr(self, 'tempfile')
+
+ if hasattr(self, 'filename'):
+ # the file might have been moved in the meantime so
+ # check first
+ if os.path.exists(self.filename):
+ os.remove(self.filename)
+ delattr(self, 'filename')
+
+ # The temporary file will be created in a directory set by the
+ # environment (TEMP_DIR, TEMP or TMP)
+ self.filename, header = urllib.urlretrieve(self.fp_file.name)
+ name = os.path.basename(self.filename)
+ disposition = header.get('Content-Disposition')
+ if disposition:
+ name = disposition.rpartition("filename=")[2].strip('" ')
+ filename = header.get('X-File-Name')
+ if filename:
+ name = filename
+
+ self.tempfile = open(self.filename, 'r')
+ return File(self.tempfile, name=name)
+
def save(self, *args, **kwargs):
if self.name and not self.slug:
self._generate_unique_slug()
</div>
<div class="hidden-fields" style="display:none;">
<input type="text" id="id_fpfile" name="fpfile" class="fpurl">
+ <input type="text" id="id_mimetype" name="mimetype" class="mimetype">
<input type="text" id="id_course" name="course" value="{{ course.id }}"
class="course_id">
{% csrf_token %}
*/
makeFileForm = function(upFile) {
var _form = document.getElementById('form-template').cloneNode(deep=true);
- // FIXME: VVVVVV
+ // save the Filename to the form name field
$(_form.children[0].children[0].children[1]).val(upFile.filename); // replace with upFile name
_form.style.display = "inline";
_form.id = null; // clear the unique id
+ // save the FP url to the form
$(_form.children[0].children[2].children[0]).val(upFile.url);
+ console.log(upFile);
+ // save the mimetype to the form
+ $(_form.children[0].children[2].children[1]).val(upFile.mimetype);
+
document.getElementById('forms_container').appendChild(_form);
};
}
});
$('#save-btn').on('click', function(e){
- $('.inline-form').each(function(i,el){
+ e.stopPropagation();
+ $('#forms_container .inline-form').each(function(i,el){
+ console.log("inline form " + i + "el: " + el);
var name, tags, fpurl, course;
name = $(el).find('.intext').val();
fp_file = $(el).find('.fpurl').val();
tags = $(el).find('.taggit-tags').val();
course = $(el).find('.course_id').val();
csrf = $(el).find('.csrf').val();
+ mimetype = $(el).find('.mimetype').val();
$.post('{% url 'upload_post' %}', {
'name': name,
'tags': tags,
'course': course,
'csrfmiddlewaretoken': csrf,
+ 'mimetype': mimetype
});
});
});