Migrate secrets to environment variables
[oweals/karmaworld.git] / karmaworld / apps / notes / models.py
1 #!/usr/bin/env python
2 # -*- coding:utf8 -*-
3 # Copyright (C) 2012  FinalsClub Foundation
4
5 """
6     Models for the notes django app.
7     Contains only the minimum for handling files and their representation
8 """
9 import datetime
10 import traceback
11 import logging
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
24 import os
25 import time
26 import urllib
27
28 from django.conf import settings
29 from django.core.files import File
30 from django.core.files.storage import FileSystemStorage
31 from django.db import models
32 from django.utils.text import slugify
33 import django_filepicker
34 from bs4 import BeautifulSoup as BS
35 from taggit.managers import TaggableManager
36
37 from karmaworld.apps.courses.models import Course
38 from karmaworld.apps.licenses.models import License
39 from karmaworld.apps.notes.search import SearchIndex
40 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
41
42 FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
43
44 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
45 KEYWORD_MTURK_THRESHOLD = 3
46
47 logger = logging.getLogger(__name__)
48 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
49
50 # Dictionary for S3 upload headers
51 s3_upload_headers = {
52     'Content-Type': 'text/html',
53 }
54
55 # This is a bit hacky, but nothing else works. Grabbed this from a proper
56 # file configured via S3 management console.
57 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
58 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
59
60
61 class Document(models.Model):
62     """
63     An Abstract Base Class representing a document intended to be subclassed.
64     """
65     course          = models.ForeignKey(Course)
66     tags            = TaggableManager(blank=True)
67     name            = models.CharField(max_length=255, blank=True, null=True)
68     slug            = models.SlugField(max_length=255, unique=True)
69
70     LECTURE_NOTES = 'LECTURE_NOTES'
71     STUDY_GUIDE = 'STUDY_GUIDE'
72     SYLLABUS = 'SYLLABUS'
73     ASSIGNMENT = 'ASSIGNMENT'
74     OTHER = 'OTHER'
75     NOTE_CATEGORIES = (
76         (LECTURE_NOTES, 'Lecture Notes'),
77         (STUDY_GUIDE, 'Study Guide'),
78         (SYLLABUS, 'Syllabus'),
79         (ASSIGNMENT, 'Assignment'),
80         (OTHER, 'Other'),
81     )
82     category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
83
84     # license if different from default
85     license         = models.ForeignKey(License, blank=True, null=True)
86
87     # provide an upstream file link
88     upstream_link   = models.URLField(max_length=1024, blank=True, null=True, unique=True)
89
90     # metadata relevant to the Upload process
91     user            = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
92     ip              = models.GenericIPAddressField(blank=True, null=True,
93                         help_text=u"IP address of the uploader")
94     uploaded_at     = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
95
96
97     # if True, NEVER show this file
98     # WARNING: This may throw an error on migration
99     is_hidden       = models.BooleanField(default=False)
100
101     ###
102     # Everything Filepicker, now in one small area
103
104     # Allow pick (choose files), store (upload to S3), read (from FP repo),
105     # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
106     # seconds). Generated one time, at class definition upon import. So the
107     # server will need to be rebooted at least one time each year or this will
108     # go stale.
109     fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
110     fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
111     fp_policy      = encode_fp_policy(fp_policy_json)
112     fp_signature   = sign_fp_policy(fp_policy)
113
114     # Hack because mimetypes conflict with extensions, but there is no way to
115     # disable mimetypes.
116     # https://github.com/Ink/django-filepicker/issues/22
117     django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
118     # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
119     # parameters!
120     fp_file = django_filepicker.models.FPFileField(
121                 # FPFileField settings
122                 apikey=FILEPICKER_API_KEY,
123                 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
124                 additional_params={
125                     'data-fp-multiple': 'true', 
126                     'data-fp-folders': 'true',
127                     'data-fp-button-class':
128                       'inline-button important add-note-btn',
129                     'data-fp-button-text': 'Add Notes',
130                     'data-fp-extensions':
131                       '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
132                     'data-fp-store-location': 'S3',
133                     'data-fp-policy': fp_policy,
134                     'data-fp-signature': fp_signature,
135                     'type': 'filepicker',
136                     'onchange': "got_file(event)",
137                 },
138                 # FileField settings
139                 null=True, blank=True,
140                 upload_to='nil', # field ignored because S3, but required.
141                 verbose_name='', # prevent a label from showing up
142                 )
143     mimetype = models.CharField(max_length=255, blank=True, null=True)
144
145     class Meta:
146         abstract = True
147         ordering = ['-uploaded_at']
148
149     def _generate_unique_slug(self):
150         """ generate a unique slug based on name and uploaded_at  """
151         _slug = slugify(unicode(self.name))
152         klass = self.__class__
153         collision = klass.objects.filter(slug=_slug)
154         if collision:
155             _slug = u"{0}-{1}-{2}-{3}".format(
156                     _slug, self.uploaded_at.month,
157                     self.uploaded_at.day, self.uploaded_at.microsecond)
158         self.slug = _slug
159
160     def _get_fpf(self):
161         """
162         Memoized FilepickerFile getter. Returns FilepickerFile.
163         """
164         if not hasattr(self, 'cached_fpf'):
165             # Fetch additional_params containing signature, etc
166             aps = self.fp_file.field.additional_params
167             self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
168         return self.cached_fpf
169
170     def get_fp_url(self):
171         """
172         Returns the Filepicker URL for reading the upstream document.
173         """
174         fpf = self._get_fpf()
175         # Return proper URL for reading
176         return fpf.get_url()
177
178     def get_file(self):
179         """
180         Downloads the file from filepicker.io and returns a Django File wrapper
181         object.
182         """
183         # Fetch FilepickerFile
184         fpf = self._get_fpf()
185         # Return Django File
186         return fpf.get_file()
187
188     def save(self, *args, **kwargs):
189         if self.name and not self.slug:
190             self._generate_unique_slug()
191         super(Document, self).save(*args, **kwargs)
192
193
194 class NoteManager(models.Manager):
195     """ Handle restoring data. """
196     def get_by_natural_key(self, fp_file, upstream_link):
197         """
198         Return a Note defined by its Filepicker and upstream URLs.
199         """
200         return self.get(fp_file=fp_file,upstream_link=upstream_link)
201
202
203 class Note(Document):
204     """ 
205     A django model representing an uploaded file and associated metadata.
206     """
207     objects = NoteManager()
208
209     PDF_MIMETYPES = (
210       'application/pdf',
211       'application/vnd.ms-powerpoint',
212       'application/vnd.openxmlformats-officedocument.presentationml.presentation'
213     )
214
215     # Cache the Google drive file link
216     gdrive_url      = models.URLField(max_length=1024, blank=True, null=True, unique=True)
217
218     # Generated by Google Drive but saved locally
219     text            = models.TextField(blank=True, null=True)
220
221     # Number of times this note has been flagged as abusive/spam.
222     flags           = models.IntegerField(default=0,null=False)
223
224     # Social media tracking
225     tweeted         = models.BooleanField(default=False)
226     thanks          = models.PositiveIntegerField(default=0)
227
228     class Meta:
229         unique_together = ('fp_file', 'upstream_link')
230         ordering = ['-uploaded_at']
231
232     def __unicode__(self):
233         return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
234
235     def natural_key(self):
236         """
237         A Note is uniquely defined by both the Filepicker link and the upstream
238         link. The Filepicker link should be unique by itself, but it may be
239         null in the database, so the upstream link component should resolve
240         those cases.
241         """
242         # gdrive_url might also fit the bill?
243         return (self.fp_file, self.upstream_link)
244
245     def get_relative_s3_path(self):
246         """
247         returns s3 path relative to the appropriate bucket.
248         """
249         # Note.slug will be unique and brought in from RawDocument or created
250         # upon save() inside RawDocument.convert_to_note(). It makes for a good
251         # filename and its pretty well guaranteed to be there.
252         return 'html/{0}.html'.format(self.slug)
253
254     def send_to_s3(self, html, do_save=True):
255         """
256         Push the given HTML up to S3 for this Note.
257         Set do_save to False if the note will be saved outside this call.
258         """
259         # do nothing if HTML is empty.
260         if not html or not len(html):
261             return
262         # upload the HTML file to static host if it is not already there
263         filepath = self.get_relative_s3_path()
264         if not default_storage.exists(filepath):
265             # This is a pretty ugly hackified answer to some s3boto shortcomings
266             # and some decent default settings chosen by django-storages.
267
268             # Create the new key (key == filename in S3 bucket)
269             newkey = default_storage.bucket.new_key(filepath)
270             # Upload data!
271             newkey.set_contents_from_string(html, headers=s3_upload_headers)
272             if not newkey.exists():
273                 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
274
275             # set the permissions for everyone to read.
276             newkey.set_xml_acl(all_read_xml_acl)
277
278     def update_note_on_s3(self, html):
279         # do nothing if HTML is empty.
280         if not html or not len(html):
281             return
282         # if it's not already there then bail out
283         filepath = self.get_relative_s3_path()
284         if not default_storage.exists(filepath):
285             logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
286             return
287
288         key = default_storage.bucket.get_key(filepath)
289         key.set_contents_from_string(html, headers=s3_upload_headers)
290         key.set_xml_acl(all_read_xml_acl)
291
292     def remaining_thanks_for_mturk(self):
293         return KEYWORD_MTURK_THRESHOLD - self.thanks
294
295     def total_thanks_for_mturk(self):
296         return KEYWORD_MTURK_THRESHOLD
297
298     def get_absolute_url(self):
299         """ Resolve note url, use 'note' route and slug if slug
300             otherwise use note.id
301         """
302         if self.slug is not None:
303             # return a url ending in slug
304             if self.course.school:
305                 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
306             else:
307                 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
308         else:
309             # return a url ending in id
310             return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
311
312     def get_absolute_keywords_url(self):
313         """ Resolve note url, use 'note' route and slug if slug
314             otherwise use note.id
315         """
316         if self.slug is not None:
317             # return a url ending in slug
318             if self.course.school:
319                 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
320             else:
321                 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
322         else:
323             # return a url ending in id
324             return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
325
326     def get_absolute_quiz_url(self):
327         """ Resolve note url, use 'note' route and slug if slug
328             otherwise use note.id
329         """
330         if self.slug is not None:
331             # return a url ending in slug
332             if self.course.school:
333                 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
334             else:
335                 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
336         else:
337             # return a url ending in id
338             return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
339
340     def filter_html(self, html):
341         """
342         Apply all sanitizing filters to HTML.
343         Takes in HTML string and outputs HTML string.
344         """
345         # Fun fact: This could be made into a static method.
346         if not html or not len(html):
347             # if there was no HTML, return an empty string
348             return ''
349
350         soup = BS(html)
351         # Iterate through filters, applying all to the soup object.
352         for soupfilter in (
353             self.sanitize_anchor_html,
354             self.set_canonical_link,
355         ):
356             soup = soupfilter(soup)
357         return str(soup)
358
359     def sanitize_anchor_html(self, soup):
360         """
361         Filter the given BeautifulSoup obj by adding target=_blank to all
362         anchor tags.
363         Returns BeautifulSoup obj.
364         """
365         # Fun fact: This could be made into a static method.
366         # Find all a tags in the HTML
367         a_tags = soup.find_all('a')
368         if not a_tags or not len(a_tags):
369             # nothing to process.
370             return soup
371
372         # build a tag sanitizer
373         def set_attribute_target(tag):
374             tag['target'] = '_blank'
375         # set all anchors to have target="_blank"
376         map(set_attribute_target, a_tags)
377
378         # return filtered soup
379         return soup
380
381     @staticmethod
382     def canonical_link_predicate(tag):
383         return tag.name == u'link' and \
384             tag.has_attr('rel') and \
385             u'canonical' in tag['rel']
386
387     def set_canonical_link(self, soup):
388         """
389         Filter the given BeautifulSoup obj by adding
390         <link rel="canonical" href="note.get_absolute_url" />
391         to the document head.
392         Returns BeautifulSoup obj.
393         """
394         domain = Site.objects.all()[0].domain
395         note_full_href = 'http://' + domain + self.get_absolute_url()
396         canonical_tags = soup.find_all(self.canonical_link_predicate)
397         if canonical_tags:
398             for tag in canonical_tags:
399                 tag['href'] = note_full_href
400         else:
401             new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
402             head = soup.find('head')
403             head.append(new_tag)
404
405         # return filtered soup
406         return soup
407
408     def _update_parent_updated_at(self):
409         """ update the parent Course.updated_at model
410             with the latest uploaded_at """
411         self.course.updated_at = self.uploaded_at
412         self.course.save()
413
414     def save(self, *args, **kwargs):
415         if self.uploaded_at and self.uploaded_at > self.course.updated_at:
416             self._update_parent_updated_at()
417         super(Note, self).save(*args, **kwargs)
418
419     def has_markdown(self):
420         return hasattr(self, "notemarkdown")
421
422     def is_pdf(self):
423         return self.mimetype in Note.PDF_MIMETYPES
424
425
426 class NoteMarkdown(models.Model):
427     note     = models.OneToOneField(Note, primary_key=True)
428     markdown = models.TextField(blank=True, null=True)
429
430 auto_add_check_unique_together(Note)
431
432
433 def update_note_counts(note_instance):
434     try:
435         # test if the course still exists, or if this is a cascade delete.
436         note_instance.course
437     except Course.DoesNotExist:
438         # this is a cascade delete. there is no course to update
439         pass
440     else:
441         # course exists
442         note_instance.course.update_thank_count()
443         note_instance.course.update_note_count()
444         if note_instance.course.school:
445             note_instance.course.school.update_note_count()
446         elif note_instance.course.department.school:
447             note_instance.course.department.school.update_note_count()
448
449 @receiver(pre_save, sender=Note, weak=False)
450 def note_pre_save_receiver(sender, **kwargs):
451     """Stick an instance of the pre-save value of
452     the given Note instance in the instances itself.
453     This will be looked at in post_save."""
454     if not 'instance' in kwargs:
455         return
456
457     try:
458         kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
459     except ObjectDoesNotExist:
460         pass
461
462 @receiver(post_save, sender=Note, weak=False)
463 def note_save_receiver(sender, **kwargs):
464     if not 'instance' in kwargs:
465         return
466     note = kwargs['instance']
467
468
469     update_note_counts(note)
470
471     try:
472         index = SearchIndex()
473         if kwargs['created']:
474             index.add_note(note)
475         else:
476             index.update_note(note, note.old_instance)
477     except Exception:
478         logger.error("Error with IndexDen:\n" + traceback.format_exc())
479
480
481 @receiver(post_delete, sender=Note, weak=False)
482 def note_delete_receiver(sender, **kwargs):
483     if not 'instance' in kwargs:
484         return
485     note = kwargs['instance']
486
487     # Update course and school counts of how
488     # many notes they have
489     update_note_counts(kwargs['instance'])
490
491     # Remove document from search index
492     try:
493         index = SearchIndex()
494         index.remove_note(note)
495     except Exception:
496         logger.error("Error with IndexDen:\n" + traceback.format_exc())
497
498     if note.user:
499         GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
500
501
502 class UserUploadMapping(models.Model):
503     user = models.ForeignKey(User)
504     fp_file = models.CharField(max_length=255)
505
506     class Meta:
507         unique_together = ('user', 'fp_file')
508
509
510 @receiver(user_logged_in, weak=True)
511 def find_orphan_notes(sender, **kwargs):
512     user = kwargs['user']
513     s = kwargs['request'].session
514     uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
515     for uploaded_note_url in uploaded_note_urls:
516         try:
517             note = Note.objects.get(fp_file=uploaded_note_url)
518             note.user = user
519             note.save()
520             NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
521         except (ObjectDoesNotExist, MultipleObjectsReturned):
522             mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)
523             mapping.save()
524