3 # Copyright (C) 2012 FinalsClub Foundation
11 from apiclient.discovery import build
12 from apiclient.http import MediaFileUpload
13 from apiclient.http import MediaInMemoryUpload
14 from django.conf import settings
15 from django.core.files.base import ContentFile
16 from oauth2client.client import flow_from_clientsecrets
18 from karmaworld.apps.notes.models import DriveAuth, Note
20 CLIENT_SECRET = os.path.join(settings.DJANGO_ROOT, \
21 'secret/client_secrets.json')
22 #from credentials import GOOGLE_USER # FIXME
24 from secret.drive import GOOGLE_USER
26 GOOGLE_USER = 'admin@karmanotes.org' # FIXME
28 EXT_TO_MIME = {'.docx': 'application/msword'}
30 PPT_MIMETYPES = ['application/vnd.ms-powerpoint', 'application/vnd.openxmlformats-officedocument.presentationml.presentation']
33 """ Create an oauth2 autentication object with our preferred details """
35 'https://www.googleapis.com/auth/drive',
36 'https://www.googleapis.com/auth/drive.file',
37 'https://www.googleapis.com/auth/userinfo.email',
38 'https://www.googleapis.com/auth/userinfo.profile',
41 flow = flow_from_clientsecrets(CLIENT_SECRET, ' '.join(scopes), \
42 redirect_uri='http://localhost:8000/oauth2callback')
43 flow.params['access_type'] = 'offline'
44 flow.params['approval_prompt'] = 'force'
45 flow.params['user_id'] = GOOGLE_USER
50 """ Use an oauth2client flow object to generate the web url to create a new
51 auth that can be then stored """
53 print flow.step1_get_authorize_url()
56 def accept_auth(code):
57 """ Callback endpoint for accepting the post `authorize()` google drive
58 response, and generate a credentials object
59 :code: An authentication token from a WEB oauth dialog
60 returns a oauth2client credentials object """
62 creds = flow.step2_exchange(code)
66 def build_api_service(creds):
67 http = httplib2.Http()
68 http = creds.authorize(http)
69 return build('drive', 'v2', http=http), http
72 def check_and_refresh(creds, auth):
73 """ Check a Credentials object's expiration token
74 if it is out of date, refresh the token and save
75 :creds: a Credentials object
76 :auth: a DriveAuth that backs the cred object
77 :returns: updated creds and auth objects
79 if creds.token_expiry < datetime.datetime.utcnow():
80 # if we are passed the token expiry,
81 # refresh the creds and store them
82 http = httplib2.Http()
83 http = creds.authorize(http)
85 auth.credentials = creds.to_json()
89 def download_from_gdrive(file_dict, http, extension=None, mimetype=None):
90 """ get urls from file_dict and download contextual files from google """
92 download_urls['text'] = file_dict[u'exportLinks']['text/plain']
95 extension = extension.lower()
97 if extension in ['.ppt', 'pptx'] \
98 or mimetype in PPT_MIMETYPES:
99 download_urls['pdf'] = file_dict[u'exportLinks']['application/pdf']
101 download_urls['html'] = file_dict[u'exportLinks']['text/html']
105 for download_type, download_url in download_urls.items():
106 print "\n%s -- %s" % (download_type, download_urls)
107 resp, content = http.request(download_url, "GET")
109 if resp.status in [200]:
110 print "\t downloaded!"
111 # save to the File.property resulting field
112 content_dict[download_type] = content
114 print "\t Download failed: %s" % resp.status
118 def upload_to_gdrive(service, media, filename, extension=None, mimetype=None):
119 """ take a gdrive service object, and a media wrapper and upload to gdrive
121 You must provide an `extension` or `mimetype`
123 _resource = {'title': filename}
125 # clean up extensions for type checking
127 extension = extension.lower()
129 if extension in ['.pdf', '.jpeg', '.jpg', '.png'] \
130 or mimetype in ['application/pdf']:
131 # include OCR on ocr-able files
132 file_dict = service.files().insert(body=_resource, media_body=media, convert=True, ocr=True).execute()
135 file_dict = service.files().insert(body=_resource, media_body=media, convert=True).execute()
137 if u'exportLinks' not in file_dict:
139 # get the doc from gdrive
141 file_dict = service.files().get(fileId=file_dict[u'id']).execute()
145 def convert_with_google_drive(note):
146 """ Upload a local note and download HTML
148 :note: a File model instance # FIXME
150 # TODO: set the permission of the file to permissive so we can use the
151 # gdrive_url to serve files directly to users
153 # Get file_type and encoding of uploaded file
154 # i.e: file_type = 'text/plain', encoding = None
155 (file_type, encoding) = mimetypes.guess_type(note.note_file.path)
159 if file_type != None:
160 media = MediaFileUpload(note.note_file.path, mimetype=file_type,
161 chunksize=1024*1024, resumable=True)
164 media = MediaFileUpload(note.note_file.path,
165 chunksize=1024*1024, resumable=True)
167 auth = DriveAuth.objects.filter(email=GOOGLE_USER).all()[0]
168 creds = auth.transform_to_cred()
171 creds, auth = check_and_refresh(creds, auth)
173 service, http = build_api_service(creds)
175 # get the file extension
176 filename, extension = os.path.splitext(note.note_file.path)
178 file_dict = upload_to_gdrive(service, media, filename, extension)
180 content_dict = download_from_gdrive(file_dict, http, extension)
182 # Get a new copy of the file from the database with the new metadata from filemeta
183 new_note = Note.objects.get(id=note.id)
185 if extension.lower() == '.pdf':
186 new_note.file_type = 'pdf'
188 elif extension.lower() in ['.ppt', '.pptx']:
189 new_note.file_type = 'ppt'
190 new_note.pdf_file.save(filename + '.pdf', ContentFile(content_dict['pdf']))
193 # PPT files do not have this export ability
194 new_note.gdrive_url = file_dict[u'exportLinks']['application/vnd.oasis.opendocument.text']
195 new_note.html = content_dict['html']
197 new_note.text = content_dict['text']
199 # before we save new html, sanitize a tags in note.html
200 #new_note.sanitize_html(save=False)
201 #FIXME: ^^^ disabled until we can get html out of an Etree html element
203 # Finally, save whatever data we got back from google
206 def convert_raw_document(raw_document):
207 """ Upload a raw document to google drive and get a Note back """
208 fp_file = raw_document.get_file()
210 # download the file to memory
211 # get the file's mimetype
212 #file_type, _ = mimetypes.guess_type(raw_document.fp_file.path)
213 # get the file extension
214 #filename, extension = os.path.splitext(raw_document.fp_file.path)
215 filename = raw_document.name
216 print "this is the mimetype of the document to check:"
217 print raw_document.mimetype
220 if raw_document.mimetype == None:
221 media = MediaInMemoryUpload(fp_file.read(),
222 chunksize=1024*1024, resumable=True)
224 media = MediaInMemoryUpload(fp_file.read(), mimetype=raw_document.mimetype,
225 chunksize=1024*1024, resumable=True)
227 auth = DriveAuth.objects.filter(email=GOOGLE_USER).all()[0]
228 creds = auth.transform_to_cred()
230 creds, auth = check_and_refresh(creds, auth)
231 service, http = build_api_service(creds)
234 file_dict = upload_to_gdrive(service, media, filename, mimetype=raw_document.mimetype)
235 content_dict = download_from_gdrive(file_dict, http, mimetype=raw_document.mimetype)
237 # this should have already happened, lets see why it hasn't
238 raw_document.is_processed = True
241 note = raw_document.convert_to_note()
243 if raw_document.mimetype == 'application/pdf':
244 note.file_type = 'pdf'
246 elif raw_document.mimetype in PPT_MIMETYPES:
247 note.file_type = 'ppt'
248 note.pdf_file.save(filename + '.pdf', ContentFile(content_dict['pdf']))
251 # PPT files do not have this export ability
252 note.gdrive_url = file_dict[u'exportLinks']['application/vnd.oasis.opendocument.text']
253 note.html = content_dict['html']
255 note.text = content_dict['text']
257 # before we save new html, sanitize a tags in note.html
258 #note.sanitize_html(save=False)
259 #FIXME: ^^^ disabled until we can get html out of an Etree html element
261 # Finally, save whatever data we got back from google