9aecbcc8ee734d431c347b3ea71369d45ba2d559
[oweals/karmaworld.git] / karmaworld / apps / notes / models.py
1 #!/usr/bin/env python
2 # -*- coding:utf8 -*-
3 # Copyright (C) 2012  FinalsClub Foundation
4
5 """
6     Models for the notes django app.
7     Contains only the minimum for handling files and their representation
8 """
9 import datetime
10 import traceback
11 import logging
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
25 import os
26 import time
27 import urllib
28
29 from django.conf import settings
30 from django.core.files import File
31 from django.core.files.storage import FileSystemStorage
32 from django.db import models
33 from django.utils.text import slugify
34 import django_filepicker
35 from bs4 import BeautifulSoup as BS
36 from taggit.managers import TaggableManager
37
38 from karmaworld.apps.courses.models import Course
39 from karmaworld.apps.licenses.models import License
40 from karmaworld.apps.notes.search import SearchIndex
41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
42
43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
44 KEYWORD_MTURK_THRESHOLD = 3
45
46 logger = logging.getLogger(__name__)
47 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
48
49 # Dictionary for S3 upload headers
50 s3_upload_headers = {
51     'Content-Type': 'text/html',
52 }
53
54 # This is a bit hacky, but nothing else works. Grabbed this from a proper
55 # file configured via S3 management console.
56 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
57 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
58
59
60 class Document(models.Model):
61     """
62     An Abstract Base Class representing a document intended to be subclassed.
63     """
64     course          = models.ForeignKey(Course)
65     tags            = TaggableManager(blank=True)
66     name            = models.CharField(max_length=255, blank=True, null=True)
67     slug            = models.SlugField(max_length=255, unique=True)
68
69     LECTURE_NOTES = 'LECTURE_NOTES'
70     STUDY_GUIDE = 'STUDY_GUIDE'
71     SYLLABUS = 'SYLLABUS'
72     ASSIGNMENT = 'ASSIGNMENT'
73     OTHER = 'OTHER'
74     NOTE_CATEGORIES = (
75         (LECTURE_NOTES, 'Lecture Notes'),
76         (STUDY_GUIDE, 'Study Guide'),
77         (SYLLABUS, 'Syllabus'),
78         (ASSIGNMENT, 'Assignment'),
79         (OTHER, 'Other'),
80     )
81     category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
82
83     # license if different from default
84     license         = models.ForeignKey(License, blank=True, null=True)
85
86     # provide an upstream file link
87     upstream_link   = models.URLField(max_length=1024, blank=True, null=True, unique=True)
88
89     # metadata relevant to the Upload process
90     user            = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
91     ip              = models.GenericIPAddressField(blank=True, null=True,
92                         help_text=u"IP address of the uploader")
93     uploaded_at     = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
94
95
96     # if True, NEVER show this file
97     # WARNING: This may throw an error on migration
98     is_hidden       = models.BooleanField(default=False)
99
100     ###
101     # Everything Filepicker, now in one small area
102
103     # Allow pick (choose files), store (upload to S3), read (from FP repo),
104     # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
105     # seconds). Generated one time, at class definition upon import. So the
106     # server will need to be rebooted at least one time each year or this will
107     # go stale.
108     fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
109     fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
110     fp_policy      = encode_fp_policy(fp_policy_json)
111     fp_signature   = sign_fp_policy(fp_policy)
112
113     # Hack because mimetypes conflict with extensions, but there is no way to
114     # disable mimetypes.
115     # https://github.com/Ink/django-filepicker/issues/22
116     django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
117     # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
118     # parameters!
119     fp_file = django_filepicker.models.FPFileField(
120                 # FPFileField settings
121                 apikey=FILEPICKER_API_KEY,
122                 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
123                 additional_params={
124                     'data-fp-multiple': 'true', 
125                     'data-fp-folders': 'true',
126                     'data-fp-button-class':
127                       'inline-button important add-note-btn',
128                     'data-fp-button-text': 'Add Notes',
129                     'data-fp-extensions':
130                       '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
131                     'data-fp-store-location': 'S3',
132                     'data-fp-policy': fp_policy,
133                     'data-fp-signature': fp_signature,
134                     'type': 'filepicker',
135                     'onchange': "got_file(event)",
136                 },
137                 # FileField settings
138                 null=True, blank=True,
139                 upload_to='nil', # field ignored because S3, but required.
140                 verbose_name='', # prevent a label from showing up
141                 )
142     mimetype = models.CharField(max_length=255, blank=True, null=True)
143
144     class Meta:
145         abstract = True
146         ordering = ['-uploaded_at']
147
148     def _generate_unique_slug(self):
149         """ generate a unique slug based on name and uploaded_at  """
150         _slug = slugify(unicode(self.name))
151         klass = self.__class__
152         collision = klass.objects.filter(slug=_slug)
153         if collision:
154             _slug = u"{0}-{1}-{2}-{3}".format(
155                     _slug, self.uploaded_at.month,
156                     self.uploaded_at.day, self.uploaded_at.microsecond)
157         self.slug = _slug
158
159     def _get_fpf(self):
160         """
161         Memoized FilepickerFile getter. Returns FilepickerFile.
162         """
163         if not hasattr(self, 'cached_fpf'):
164             # Fetch additional_params containing signature, etc
165             aps = self.fp_file.field.additional_params
166             self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
167         return self.cached_fpf
168
169     def get_fp_url(self):
170         """
171         Returns the Filepicker URL for reading the upstream document.
172         """
173         fpf = self._get_fpf()
174         # Return proper URL for reading
175         return fpf.get_url()
176
177     def get_file(self):
178         """
179         Downloads the file from filepicker.io and returns a Django File wrapper
180         object.
181         """
182         # Fetch FilepickerFile
183         fpf = self._get_fpf()
184         # Return Django File
185         return fpf.get_file()
186
187     def save(self, *args, **kwargs):
188         if self.name and not self.slug:
189             self._generate_unique_slug()
190         super(Document, self).save(*args, **kwargs)
191
192
193 class NoteManager(models.Manager):
194     """ Handle restoring data. """
195     def get_by_natural_key(self, fp_file, upstream_link):
196         """
197         Return a Note defined by its Filepicker and upstream URLs.
198         """
199         return self.get(fp_file=fp_file,upstream_link=upstream_link)
200
201
202 class Note(Document):
203     """ 
204     A django model representing an uploaded file and associated metadata.
205     """
206     objects = NoteManager()
207
208     PDF_MIMETYPES = (
209       'application/pdf',
210       'application/vnd.ms-powerpoint',
211       'application/vnd.openxmlformats-officedocument.presentationml.presentation'
212     )
213
214     # Cache the Google drive file link
215     gdrive_url      = models.URLField(max_length=1024, blank=True, null=True, unique=True)
216
217     # Generated by Google Drive but saved locally
218     text            = models.TextField(blank=True, null=True)
219
220     # Number of times this note has been flagged as abusive/spam.
221     flags           = models.IntegerField(default=0,null=False)
222
223     # Social media tracking
224     tweeted         = models.BooleanField(default=False)
225     thanks          = models.PositiveIntegerField(default=0)
226
227     class Meta:
228         unique_together = ('fp_file', 'upstream_link')
229         ordering = ['-uploaded_at']
230
231     def __unicode__(self):
232         return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
233
234     def natural_key(self):
235         """
236         A Note is uniquely defined by both the Filepicker link and the upstream
237         link. The Filepicker link should be unique by itself, but it may be
238         null in the database, so the upstream link component should resolve
239         those cases.
240         """
241         # gdrive_url might also fit the bill?
242         return (self.fp_file, self.upstream_link)
243
244     def get_relative_s3_path(self):
245         """
246         returns s3 path relative to the appropriate bucket.
247         """
248         # Note.slug will be unique and brought in from RawDocument or created
249         # upon save() inside RawDocument.convert_to_note(). It makes for a good
250         # filename and its pretty well guaranteed to be there.
251         return 'html/{0}.html'.format(self.slug)
252
253     def send_to_s3(self, html, do_save=True):
254         """
255         Push the given HTML up to S3 for this Note.
256         Set do_save to False if the note will be saved outside this call.
257         """
258         # do nothing if HTML is empty.
259         if not html or not len(html):
260             return
261         # upload the HTML file to static host if it is not already there
262         filepath = self.get_relative_s3_path()
263         if not default_storage.exists(filepath):
264             # This is a pretty ugly hackified answer to some s3boto shortcomings
265             # and some decent default settings chosen by django-storages.
266
267             # Create the new key (key == filename in S3 bucket)
268             newkey = default_storage.bucket.new_key(filepath)
269             # Upload data!
270             newkey.set_contents_from_string(html, headers=s3_upload_headers)
271             if not newkey.exists():
272                 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
273
274             # set the permissions for everyone to read.
275             newkey.set_xml_acl(all_read_xml_acl)
276
277     def update_note_on_s3(self, html):
278         # do nothing if HTML is empty.
279         if not html or not len(html):
280             return
281         # if it's not already there then bail out
282         filepath = self.get_relative_s3_path()
283         if not default_storage.exists(filepath):
284             logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
285             return
286
287         key = default_storage.bucket.get_key(filepath)
288         key.set_contents_from_string(html, headers=s3_upload_headers)
289         key.set_xml_acl(all_read_xml_acl)
290
291     def remaining_thanks_for_mturk(self):
292         return KEYWORD_MTURK_THRESHOLD - self.thanks
293
294     def total_thanks_for_mturk(self):
295         return KEYWORD_MTURK_THRESHOLD
296
297     def get_absolute_url(self):
298         """ Resolve note url, use 'note' route and slug if slug
299             otherwise use note.id
300         """
301         if self.slug is not None:
302             # return a url ending in slug
303             if self.course.school:
304                 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
305             else:
306                 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
307         else:
308             # return a url ending in id
309             return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
310
311     def get_absolute_keywords_url(self):
312         """ Resolve note url, use 'note' route and slug if slug
313             otherwise use note.id
314         """
315         if self.slug is not None:
316             # return a url ending in slug
317             if self.course.school:
318                 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
319             else:
320                 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
321         else:
322             # return a url ending in id
323             return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
324
325     def get_absolute_quiz_url(self):
326         """ Resolve note url, use 'note' route and slug if slug
327             otherwise use note.id
328         """
329         if self.slug is not None:
330             # return a url ending in slug
331             if self.course.school:
332                 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
333             else:
334                 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
335         else:
336             # return a url ending in id
337             return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
338
339     def filter_html(self, html):
340         """
341         Apply all sanitizing filters to HTML.
342         Takes in HTML string and outputs HTML string.
343         """
344         # Fun fact: This could be made into a static method.
345         if not html or not len(html):
346             # if there was no HTML, return an empty string
347             return ''
348
349         soup = BS(html)
350         # Iterate through filters, applying all to the soup object.
351         for soupfilter in (
352             self.sanitize_anchor_html,
353             self.set_canonical_link,
354         ):
355             soup = soupfilter(soup)
356         return str(soup)
357
358     def sanitize_anchor_html(self, soup):
359         """
360         Filter the given BeautifulSoup obj by adding target=_blank to all
361         anchor tags.
362         Returns BeautifulSoup obj.
363         """
364         # Fun fact: This could be made into a static method.
365         # Find all a tags in the HTML
366         a_tags = soup.find_all('a')
367         if not a_tags or not len(a_tags):
368             # nothing to process.
369             return soup
370
371         # build a tag sanitizer
372         def set_attribute_target(tag):
373             tag['target'] = '_blank'
374         # set all anchors to have target="_blank"
375         map(set_attribute_target, a_tags)
376
377         # return filtered soup
378         return soup
379
380     @staticmethod
381     def canonical_link_predicate(tag):
382         return tag.name == u'link' and \
383             tag.has_attr('rel') and \
384             u'canonical' in tag['rel']
385
386     def set_canonical_link(self, soup):
387         """
388         Filter the given BeautifulSoup obj by adding
389         <link rel="canonical" href="note.get_absolute_url" />
390         to the document head.
391         Returns BeautifulSoup obj.
392         """
393         domain = Site.objects.all()[0].domain
394         note_full_href = 'http://' + domain + self.get_absolute_url()
395         canonical_tags = soup.find_all(self.canonical_link_predicate)
396         if canonical_tags:
397             for tag in canonical_tags:
398                 tag['href'] = note_full_href
399         else:
400             new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
401             head = soup.find('head')
402             head.append(new_tag)
403
404         # return filtered soup
405         return soup
406
407     def _update_parent_updated_at(self):
408         """ update the parent Course.updated_at model
409             with the latest uploaded_at """
410         self.course.updated_at = self.uploaded_at
411         self.course.save()
412
413     def save(self, *args, **kwargs):
414         if self.uploaded_at and self.uploaded_at > self.course.updated_at:
415             self._update_parent_updated_at()
416         super(Note, self).save(*args, **kwargs)
417
418     def has_markdown(self):
419         return hasattr(self, "notemarkdown")
420
421     def is_pdf(self):
422         return self.mimetype in Note.PDF_MIMETYPES
423
424
425 class NoteMarkdown(models.Model):
426     note     = models.OneToOneField(Note, primary_key=True)
427     markdown = models.TextField(blank=True, null=True)
428
429 auto_add_check_unique_together(Note)
430
431
432 def update_note_counts(note_instance):
433     try:
434         # test if the course still exists, or if this is a cascade delete.
435         note_instance.course
436     except Course.DoesNotExist:
437         # this is a cascade delete. there is no course to update
438         pass
439     else:
440         # course exists
441         note_instance.course.update_thank_count()
442         note_instance.course.update_note_count()
443         if note_instance.course.school:
444             note_instance.course.school.update_note_count()
445         elif note_instance.course.department.school:
446             note_instance.course.department.school.update_note_count()
447
448 @receiver(pre_save, sender=Note, weak=False)
449 def note_pre_save_receiver(sender, **kwargs):
450     """Stick an instance of the pre-save value of
451     the given Note instance in the instances itself.
452     This will be looked at in post_save."""
453     if not 'instance' in kwargs:
454         return
455
456     try:
457         kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
458     except ObjectDoesNotExist:
459         pass
460
461 @receiver(post_save, sender=Note, weak=False)
462 def note_save_receiver(sender, **kwargs):
463     if not 'instance' in kwargs:
464         return
465     note = kwargs['instance']
466
467
468     update_note_counts(note)
469
470     try:
471         index = SearchIndex()
472         if kwargs['created']:
473             index.add_note(note)
474         else:
475             index.update_note(note, note.old_instance)
476     except Exception:
477         logger.error("Error with IndexDen:\n" + traceback.format_exc())
478
479
480 @receiver(post_delete, sender=Note, weak=False)
481 def note_delete_receiver(sender, **kwargs):
482     if not 'instance' in kwargs:
483         return
484     note = kwargs['instance']
485
486     # Update course and school counts of how
487     # many notes they have
488     update_note_counts(kwargs['instance'])
489
490     # Remove document from search index
491     try:
492         index = SearchIndex()
493         index.remove_note(note)
494     except Exception:
495         logger.error("Error with IndexDen:\n" + traceback.format_exc())
496
497     if note.user:
498         GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
499
500
501 class UserUploadMapping(models.Model):
502     user = models.ForeignKey(User)
503     fp_file = models.CharField(max_length=255)
504
505     class Meta:
506         unique_together = ('user', 'fp_file')
507
508
509 @receiver(user_logged_in, weak=True)
510 def find_orphan_notes(sender, **kwargs):
511     user = kwargs['user']
512     s = kwargs['request'].session
513     uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
514     for uploaded_note_url in uploaded_note_urls:
515         try:
516             note = Note.objects.get(fp_file=uploaded_note_url)
517             note.user = user
518             note.save()
519             NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
520         except (ObjectDoesNotExist, MultipleObjectsReturned):
521             mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)
522             mapping.save()
523