karmaworld/apps/notes/gdrive.py

   1 #!/usr/bin/env python
   2 # -*- coding:utf8 -*-
   3 # Copyright (C) 2012  FinalsClub Foundation
   4
   5 import datetime
   6 from io import FileIO, BufferedWriter
   7 import mimetypes
   8 import os
   9 import time
  10
  11 import httplib2
  12 from apiclient.discovery import build
  13 from apiclient.http import MediaFileUpload
  14 from django.conf import settings
  15 from django.core.files import File
  16 from oauth2client.client import flow_from_clientsecrets
  17
  18 from karmaworld.apps.notes.models import DriveAuth, Note
  19
  20 CLIENT_SECRET = os.path.join(settings.DJANGO_ROOT, \
  21                     'secret/client_secrets.json')
  22 #from credentials import GOOGLE_USER # FIXME
  23 try:
  24     from secrets.drive import GOOGLE_USER
  25 except:
  26     GOOGLE_USER = 'admin@karmanotes.org' # FIXME
  27
  28 EXT_TO_MIME = {'.docx': 'application/msword'}
  29
  30 def build_flow():
  31     """ Create an oauth2 autentication object with our preferred details """
  32     scopes = [
  33         'https://www.googleapis.com/auth/drive',
  34         'https://www.googleapis.com/auth/drive.file',
  35         'https://www.googleapis.com/auth/userinfo.email',
  36         'https://www.googleapis.com/auth/userinfo.profile',
  37     ]
  38
  39     flow = flow_from_clientsecrets(CLIENT_SECRET, ' '.join(scopes), \
  40             redirect_uri='http://localhost:8000/oauth2callback')
  41     flow.params['access_type'] = 'offline'
  42     flow.params['approval_prompt'] = 'force'
  43     flow.params['user_id'] = GOOGLE_USER
  44     return flow
  45
  46
  47 def authorize():
  48     """ Use an oauth2client flow object to generate the web url to create a new
  49         auth that can be then stored """
  50     flow = build_flow()
  51     print flow.step1_get_authorize_url()
  52
  53
  54 def accept_auth(code):
  55     """ Callback endpoint for accepting the post `authorize()` google drive
  56         response, and generate a credentials object
  57         :code:  An authentication token from a WEB oauth dialog
  58         returns a oauth2client credentials object """
  59     flow = build_flow()
  60     creds = flow.step2_exchange(code)
  61     return creds
  62
  63
  64 def build_api_service(creds):
  65     http = httplib2.Http()
  66     http = creds.authorize(http)
  67     return build('drive', 'v2', http=http), http
  68
  69
  70 def check_and_refresh(creds, auth):
  71     """ Check a Credentials object's expiration token
  72         if it is out of date, refresh the token and save
  73         :creds: a Credentials object
  74         :auth:  a DriveAuth that backs the cred object
  75         :returns: updated creds and auth objects
  76     """
  77     if creds.token_expiry < datetime.datetime.utcnow():
  78         # if we are passed the token expiry,
  79         # refresh the creds and store them
  80         http = httplib2.Http()
  81         http = creds.authorize(http)
  82         creds.refresh(http)
  83         auth.credentials = creds.to_json()
  84         auth.save()
  85     return creds, auth
  86
  87 def download_from_gdrive(file_dict, http, extension):
  88     """ get urls from file_dict and download contextual files from google """
  89     download_urls = {}
  90     download_urls['text'] = file_dict[u'exportLinks']['text/plain']
  91
  92     if extension.lower() in ['.ppt', 'pptx']:
  93         download_urls['pdf'] = file_dict[u'exportLinks']['application/pdf']
  94     else:
  95         download_urls['html'] = file_dict[u'exportLinks']['text/html']
  96
  97
  98     content_dict = {}
  99     for download_type, download_url in download_urls.items():
 100         print "\n%s -- %s" % (download_type, download_urls)
 101         resp, content = http.request(download_url, "GET")
 102
 103         if resp.status in [200]:
 104             print "\t downloaded!"
 105             # save to the File.property resulting field
 106             content_dict[download_type] = content
 107         else:
 108             print "\t Download failed: %s" % resp.status
 109
 110     return content_dict
 111
 112 def upload_to_gdrive(service, media, filename, extension):
 113     """ take a gdrive service object, and a media wrapper and upload to gdrive
 114         returns a file_dict """
 115     _resource = {'title': filename}
 116     if extension.lower() in ['.pdf', '.jpeg', '.jpg', '.png']:
 117         # include OCR on ocr-able files
 118         file_dict = service.files().insert(body=_resource, media_body=media, convert=True, ocr=True).execute()
 119
 120     else:
 121         file_dict = service.files().insert(body=_resource, media_body=media, convert=True).execute()
 122
 123     if u'exportLinks' not in file_dict:
 124         # wait some seconds
 125         # get the doc from gdrive
 126         time.sleep(30)
 127         file_dict = service.files().get(fileId=file_dict[u'id']).execute()
 128
 129     return file_dict
 130
 131 def convert_with_google_drive(note):
 132     """ Upload a local note and download HTML
 133         using Google Drive
 134         :note: a File model instance # FIXME
 135     """
 136     # TODO: set the permission of the file to permissive so we can use the
 137     #       gdrive_url to serve files directly to users
 138
 139     # Get file_type and encoding of uploaded file
 140     # i.e: file_type = 'text/plain', encoding = None
 141     (file_type, encoding) = mimetypes.guess_type(note.note_file.path)
 142
 143
 144
 145     if file_type != None:
 146         media = MediaFileUpload(note.note_file.path, mimetype=file_type,
 147                     chunksize=1024*1024, resumable=True)
 148
 149     else:
 150         media = MediaFileUpload(note.note_file.path,
 151                     chunksize=1024*1024, resumable=True)
 152
 153     auth = DriveAuth.objects.filter(email=GOOGLE_USER).all()[0]
 154     creds = auth.transform_to_cred()
 155
 156
 157     creds, auth = check_and_refresh(creds, auth)
 158
 159     service, http = build_api_service(creds)
 160
 161     # get the file extension
 162     filename, extension = os.path.splitext(note.note_file.path)
 163
 164     file_dict = upload_to_gdrive(service, media, filename, extension)
 165
 166     content_dict = download_from_gdrive(file_dict, http, extension)
 167
 168     # Get a new copy of the file from the database with the new metadata from filemeta
 169     new_note = Note.objects.get(id=note.id)
 170
 171     if extension.lower() == '.pdf':
 172         new_note.file_type = 'pdf'
 173
 174     elif extension.lower() in ['.ppt', '.pptx']:
 175         new_note.file_type = 'ppt'
 176         now = datetime.datetime.utcnow()
 177         # create a folder path to store the ppt > pdf file with year and month folders
 178         nonce_path = '/ppt_pdf/%s/%s/' % (now.year, now.month)
 179
 180         _path = filename + '.pdf'
 181         try:
 182             # If those folders don't exist, create them
 183             os.makedirs(os.path.realpath(os.path.dirname(_path)))
 184         except:
 185             print "we failed to create those directories"
 186
 187         _writer = BufferedWriter(FileIO(_path, "w"))
 188         _writer.write(content_dict['pdf'])
 189         _writer.close()
 190
 191         new_note.pdf_file = _path
 192
 193     else:
 194         # PPT files do not have this export ability
 195         new_note.gdrive_url = file_dict[u'exportLinks']['application/vnd.oasis.opendocument.text']
 196         new_note.html = content_dict['html']
 197
 198     new_note.text = content_dict['text']
 199
 200     # before we save new html, sanitize a tags in note.html
 201     #new_note.sanitize_html(save=False)
 202     #FIXME: ^^^ disabled until we can get html out of an Etree html element
 203
 204     # Finally, save whatever data we got back from google
 205     new_note.save()