karmaworld/apps/notes/gdrive.py

   1 #!/usr/bin/env python
   2 # -*- coding:utf8 -*-
   3 # Copyright (C) 2012  FinalsClub Foundation
   4
   5 import datetime
   6 import mimetypes
   7 import os
   8 import time
   9
  10 import httplib2
  11 from apiclient.discovery import build
  12 from apiclient.http import MediaFileUpload
  13 from django.conf import settings
  14 from django.core.files import File
  15 from oauth2client.client import flow_from_clientsecrets
  16
  17 from karmaworld.apps.notes.models import DriveAuth, Note
  18
  19 CLIENT_SECRET = os.path.join(settings.DJANGO_ROOT, \
  20                     'secret/client_secrets.json')
  21 #from credentials import GOOGLE_USER # FIXME
  22 try:
  23     from secrets.drive import GOOGLE_USER
  24 except:
  25     GOOGLE_USER = 'admin@karmanotes.org' # FIXME
  26
  27 EXT_TO_MIME = {'.docx': 'application/msword'}
  28
  29 def build_flow():
  30     """ Create an oauth2 autentication object with our preferred details """
  31     scopes = [
  32         'https://www.googleapis.com/auth/drive',
  33         'https://www.googleapis.com/auth/drive.file',
  34         'https://www.googleapis.com/auth/userinfo.email',
  35         'https://www.googleapis.com/auth/userinfo.profile',
  36     ]
  37
  38     flow = flow_from_clientsecrets(CLIENT_SECRET, ' '.join(scopes), \
  39             redirect_uri='http://localhost:8000/oauth2callback')
  40     flow.params['access_type'] = 'offline'
  41     flow.params['approval_prompt'] = 'force'
  42     flow.params['user_id'] = GOOGLE_USER
  43     return flow
  44
  45
  46 def authorize():
  47     """ Use an oauth2client flow object to generate the web url to create a new
  48         auth that can be then stored """
  49     flow = build_flow()
  50     print flow.step1_get_authorize_url()
  51
  52
  53 def accept_auth(code):
  54     """ Callback endpoint for accepting the post `authorize()` google drive
  55         response, and generate a credentials object
  56         :code:  An authentication token from a WEB oauth dialog
  57         returns a oauth2client credentials object """
  58     flow = build_flow()
  59     creds = flow.step2_exchange(code)
  60     return creds
  61
  62
  63 def build_api_service(creds):
  64     http = httplib2.Http()
  65     http = creds.authorize(http)
  66     return build('drive', 'v2', http=http), http
  67
  68
  69 def check_and_refresh(creds, auth):
  70     """ Check a Credentials object's expiration token
  71         if it is out of date, refresh the token and save
  72         :creds: a Credentials object
  73         :auth:  a DriveAuth that backs the cred object
  74         :returns: updated creds and auth objects
  75     """
  76     if creds.token_expiry < datetime.datetime.utcnow():
  77         # if we are passed the token expiry,
  78         # refresh the creds and store them
  79         http = httplib2.Http()
  80         http = creds.authorize(http)
  81         creds.refresh(http)
  82         auth.credentials = creds.to_json()
  83         auth.save()
  84     return creds, auth
  85
  86 def download_from_gdrive(file_dict, http, extension):
  87     """ get urls from file_dict and download contextual files from google """
  88     download_urls = {}
  89     download_urls['text'] = file_dict[u'exportLinks']['text/plain']
  90     if extension.lower() in ['.ppt', 'pptx']:
  91         download_urls['pdf'] = file_dict[u'exportLinks']['application/pdf']
  92     else:
  93         download_urls['html'] = file_dict[u'exportLinks']['text/html']
  94
  95
  96     content_dict = {}
  97     for download_type, download_url in download_urls.items():
  98         print "\n%s -- %s" % (download_type, download_urls)
  99         resp, content = http.request(download_url, "GET")
 100
 101         if resp.status in [200]:
 102             print "\t downloaded!"
 103             # save to the File.property resulting field
 104             content_dict[download_type] = content
 105         else:
 106             print "\t Download failed: %s" % resp.status
 107
 108     return content_dict
 109
 110 def upload_to_gdrive(service, media, filename, extension):
 111     """ take a gdrive service object, and a media wrapper and upload to gdrive
 112         returns a file_dict """
 113     _resource = {'title': filename}
 114     if extension.lower() in ['.pdf', '.jpeg', '.jpg', '.png']:
 115         # include OCR on ocr-able files
 116         file_dict = service.files().insert(body=_resource, media_body=media, convert=True, ocr=True).execute()
 117
 118     else:
 119         file_dict = service.files().insert(body=_resource, media_body=media, convert=True).execute()
 120
 121     if u'exportLinks' not in file_dict:
 122         # wait some seconds
 123         # get the doc from gdrive
 124         time.sleep(30)
 125         file_dict = service.files().get(fileId=file_dict[u'id']).execute()
 126
 127     return file_dict
 128
 129 def convert_with_google_drive(note):
 130     """ Upload a local note and download HTML
 131         using Google Drive
 132         :note: a File model instance # FIXME
 133     """
 134     # TODO: set the permission of the file to permissive so we can use the
 135     #       gdrive_url to serve files directly to users
 136
 137     # Get file_type and encoding of uploaded file
 138     # i.e: file_type = 'text/plain', encoding = None
 139     (file_type, encoding) = mimetypes.guess_type(note.note_file.path)
 140
 141
 142
 143     if file_type != None:
 144         media = MediaFileUpload(note.note_file.path, mimetype=file_type,
 145                     chunksize=1024*1024, resumable=True)
 146
 147     else:
 148         media = MediaFileUpload(note.note_file.path,
 149                     chunksize=1024*1024, resumable=True)
 150
 151     auth = DriveAuth.objects.filter(email=GOOGLE_USER).all()[0]
 152     creds = auth.transform_to_cred()
 153
 154
 155     creds, auth = check_and_refresh(creds, auth)
 156
 157     service, http = build_api_service(creds)
 158
 159     # get the file extension
 160     filename, extension = os.path.splitext(note.note_file.path)
 161
 162     file_dict = upload_to_gdrive(service, media, filename, extension)
 163
 164     content_dict = download_from_gdrive(file_dict, http, extension)
 165
 166
 167     # Get a new copy of the file from the database with the new metadata from filemeta
 168     new_note = Note.objects.get(id=note.id)
 169     if extension.lower() == '.pdf':
 170         new_note.file_type = 'pdf'
 171
 172
 173     # set the .odt as the download from google link
 174     if extension.lower() in ['.ppt', '.pptx']:
 175         print "is ppt"
 176         new_note.pdf_file = File(content_dict['pdf'])
 177     else:
 178         # PPT files do not have this export ability
 179         new_note.gdrive_url = file_dict[u'exportLinks']['application/vnd.oasis.opendocument.text']
 180         new_note.html = content_dict['html']
 181
 182     new_note.text = content_dict['text']
 183
 184     # before we save new html, sanitize a tags in note.html
 185     new_note.sanitize_html(save=False)
 186
 187     # Finally, save whatever data we got back from google
 188     new_note.save()