karmaworld/apps/notes/models.py

   1 #!/usr/bin/env python
   2 # -*- coding:utf8 -*-
   3 # Copyright (C) 2012  FinalsClub Foundation
   4
   5 """
   6     Models for the notes django app.
   7     Contains only the minimum for handling files and their representation
   8 """
   9 import datetime
  10 import traceback
  11 import logging
  12 from allauth.account.signals import user_logged_in
  13 from django.contrib.auth.models import User
  14 from django.contrib.sites.models import Site
  15 from django.core.urlresolvers import reverse
  16 from django.utils.safestring import mark_safe
  17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
  18 from django.core.files.storage import default_storage
  19 from django.db.models import SET_NULL
  20 from django.db.models.signals import post_save, post_delete, pre_save
  21 from django.dispatch import receiver
  22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
  23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
  24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
  25 import os
  26 import time
  27 import urllib
  28
  29 from django.conf import settings
  30 from django.core.files import File
  31 from django.core.files.storage import FileSystemStorage
  32 from django.db import models
  33 from django.utils.text import slugify
  34 import django_filepicker
  35 from bs4 import BeautifulSoup as BS
  36 from taggit.managers import TaggableManager
  37
  38 from karmaworld.apps.courses.models import Course
  39 from karmaworld.apps.licenses.models import License
  40 from karmaworld.apps.notes.search import SearchIndex
  41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
  42
  43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
  44 KEYWORD_MTURK_THRESHOLD = 3
  45
  46 logger = logging.getLogger(__name__)
  47 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
  48
  49 # Dictionary for S3 upload headers
  50 s3_upload_headers = {
  51     'Content-Type': 'text/html',
  52 }
  53
  54 # This is a bit hacky, but nothing else works. Grabbed this from a proper
  55 # file configured via S3 management console.
  56 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
  57 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
  58
  59
  60 class Document(models.Model):
  61     """
  62     An Abstract Base Class representing a document intended to be subclassed.
  63     """
  64     course          = models.ForeignKey(Course)
  65     tags            = TaggableManager(blank=True)
  66     name            = models.CharField(max_length=255, blank=True, null=True)
  67     slug            = models.SlugField(max_length=255, unique=True)
  68
  69     LECTURE_NOTES = 'LECTURE_NOTES'
  70     STUDY_GUIDE = 'STUDY_GUIDE'
  71     SYLLABUS = 'SYLLABUS'
  72     ASSIGNMENT = 'ASSIGNMENT'
  73     OTHER = 'OTHER'
  74     NOTE_CATEGORIES = (
  75         (LECTURE_NOTES, 'Lecture Notes'),
  76         (STUDY_GUIDE, 'Study Guide'),
  77         (SYLLABUS, 'Syllabus'),
  78         (ASSIGNMENT, 'Assignment'),
  79         (OTHER, 'Other'),
  80     )
  81     category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
  82
  83     # license if different from default
  84     license         = models.ForeignKey(License, blank=True, null=True)
  85
  86     # provide an upstream file link
  87     upstream_link   = models.URLField(max_length=1024, blank=True, null=True, unique=True)
  88
  89     # metadata relevant to the Upload process
  90     user            = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
  91     ip              = models.GenericIPAddressField(blank=True, null=True,
  92                         help_text=u"IP address of the uploader")
  93     uploaded_at     = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
  94
  95
  96     # if True, NEVER show this file
  97     # WARNING: This may throw an error on migration
  98     is_hidden       = models.BooleanField(default=False)
  99
 100     ###
 101     # Everything Filepicker, now in one small area
 102
 103     # Allow pick (choose files), store (upload to S3), read (from FP repo),
 104     # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
 105     # seconds). Generated one time, at class definition upon import. So the
 106     # server will need to be rebooted at least one time each year or this will
 107     # go stale.
 108     fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
 109     fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
 110     fp_policy      = encode_fp_policy(fp_policy_json)
 111     fp_signature   = sign_fp_policy(fp_policy)
 112
 113     # Hack because mimetypes conflict with extensions, but there is no way to
 114     # disable mimetypes.
 115     # https://github.com/Ink/django-filepicker/issues/22
 116     django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
 117     # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
 118     # parameters!
 119     fp_file = django_filepicker.models.FPFileField(
 120                 # FPFileField settings
 121                 apikey=FILEPICKER_API_KEY,
 122                 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
 123                 additional_params={
 124                     'data-fp-multiple': 'true',
 125                     'data-fp-folders': 'true',
 126                     'data-fp-button-class':
 127                       'inline-button important add-note-btn',
 128                     'data-fp-button-text': 'Add Notes',
 129                     'data-fp-extensions':
 130                       '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
 131                     'data-fp-store-location': 'S3',
 132                     'data-fp-policy': fp_policy,
 133                     'data-fp-signature': fp_signature,
 134                     'type': 'filepicker',
 135                     'onchange': "got_file(event)",
 136                 },
 137                 # FileField settings
 138                 null=True, blank=True,
 139                 upload_to='nil', # field ignored because S3, but required.
 140                 verbose_name='', # prevent a label from showing up
 141                 )
 142     mimetype = models.CharField(max_length=255, blank=True, null=True)
 143
 144     class Meta:
 145         abstract = True
 146         ordering = ['-uploaded_at']
 147
 148     def _generate_unique_slug(self):
 149         """ generate a unique slug based on name and uploaded_at  """
 150         _slug = slugify(unicode(self.name))
 151         klass = self.__class__
 152         collision = klass.objects.filter(slug=_slug)
 153         if collision:
 154             _slug = u"{0}-{1}-{2}-{3}".format(
 155                     _slug, self.uploaded_at.month,
 156                     self.uploaded_at.day, self.uploaded_at.microsecond)
 157         self.slug = _slug
 158
 159     def _get_fpf(self):
 160         """
 161         Memoized FilepickerFile getter. Returns FilepickerFile.
 162         """
 163         if not hasattr(self, 'cached_fpf'):
 164             # Fetch additional_params containing signature, etc
 165             aps = self.fp_file.field.additional_params
 166             self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
 167         return self.cached_fpf
 168
 169     def get_fp_url(self):
 170         """
 171         Returns the Filepicker URL for reading the upstream document.
 172         """
 173         fpf = self._get_fpf()
 174         # Return proper URL for reading
 175         return fpf.get_url()
 176
 177     def get_file(self):
 178         """
 179         Downloads the file from filepicker.io and returns a Django File wrapper
 180         object.
 181         """
 182         # Fetch FilepickerFile
 183         fpf = self._get_fpf()
 184         # Return Django File
 185         return fpf.get_file()
 186
 187     def save(self, *args, **kwargs):
 188         if self.name and not self.slug:
 189             self._generate_unique_slug()
 190         super(Document, self).save(*args, **kwargs)
 191
 192
 193 class NoteManager(models.Manager):
 194     """ Handle restoring data. """
 195     def get_by_natural_key(self, fp_file, upstream_link):
 196         """
 197         Return a Note defined by its Filepicker and upstream URLs.
 198         """
 199         return self.get(fp_file=fp_file,upstream_link=upstream_link)
 200
 201
 202 class Note(Document):
 203     """
 204     A django model representing an uploaded file and associated metadata.
 205     """
 206     objects = NoteManager()
 207
 208     PDF_MIMETYPES = (
 209       'application/pdf',
 210       'application/vnd.ms-powerpoint',
 211       'application/vnd.openxmlformats-officedocument.presentationml.presentation'
 212     )
 213
 214     # Cache the Google drive file link
 215     gdrive_url      = models.URLField(max_length=1024, blank=True, null=True, unique=True)
 216
 217     # Generated by Google Drive but saved locally
 218     text            = models.TextField(blank=True, null=True)
 219
 220     # Number of times this note has been flagged as abusive/spam.
 221     flags           = models.IntegerField(default=0,null=False)
 222
 223     # Social media tracking
 224     tweeted         = models.BooleanField(default=False)
 225     thanks          = models.PositiveIntegerField(default=0)
 226
 227     class Meta:
 228         unique_together = ('fp_file', 'upstream_link')
 229         ordering = ['-uploaded_at']
 230
 231     def __unicode__(self):
 232         return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
 233
 234     def natural_key(self):
 235         """
 236         A Note is uniquely defined by both the Filepicker link and the upstream
 237         link. The Filepicker link should be unique by itself, but it may be
 238         null in the database, so the upstream link component should resolve
 239         those cases.
 240         """
 241         # gdrive_url might also fit the bill?
 242         return (self.fp_file, self.upstream_link)
 243
 244     def get_relative_s3_path(self):
 245         """
 246         returns s3 path relative to the appropriate bucket.
 247         """
 248         # Note.slug will be unique and brought in from RawDocument or created
 249         # upon save() inside RawDocument.convert_to_note(). It makes for a good
 250         # filename and its pretty well guaranteed to be there.
 251         return 'html/{0}.html'.format(self.slug)
 252
 253     def send_to_s3(self, html, do_save=True):
 254         """
 255         Push the given HTML up to S3 for this Note.
 256         Set do_save to False if the note will be saved outside this call.
 257         """
 258         # do nothing if HTML is empty.
 259         if not html or not len(html):
 260             return
 261         # upload the HTML file to static host if it is not already there
 262         filepath = self.get_relative_s3_path()
 263         if not default_storage.exists(filepath):
 264             # This is a pretty ugly hackified answer to some s3boto shortcomings
 265             # and some decent default settings chosen by django-storages.
 266
 267             # Create the new key (key == filename in S3 bucket)
 268             newkey = default_storage.bucket.new_key(filepath)
 269             # Upload data!
 270             newkey.set_contents_from_string(html, headers=s3_upload_headers)
 271             if not newkey.exists():
 272                 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
 273
 274             # set the permissions for everyone to read.
 275             newkey.set_xml_acl(all_read_xml_acl)
 276
 277     def update_note_on_s3(self, html):
 278         # do nothing if HTML is empty.
 279         if not html or not len(html):
 280             return
 281         # if it's not already there then bail out
 282         filepath = self.get_relative_s3_path()
 283         if not default_storage.exists(filepath):
 284             logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
 285             return
 286
 287         key = default_storage.bucket.get_key(filepath)
 288         key.set_contents_from_string(html, headers=s3_upload_headers)
 289         key.set_xml_acl(all_read_xml_acl)
 290
 291     def remaining_thanks_for_mturk(self):
 292         return KEYWORD_MTURK_THRESHOLD - self.thanks
 293
 294     def total_thanks_for_mturk(self):
 295         return KEYWORD_MTURK_THRESHOLD
 296
 297     def get_absolute_url(self):
 298         """ Resolve note url, use 'note' route and slug if slug
 299             otherwise use note.id
 300         """
 301         if self.slug is not None:
 302             # return a url ending in slug
 303             if self.course.school:
 304                 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
 305             else:
 306                 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
 307         else:
 308             # return a url ending in id
 309             return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
 310
 311     def get_absolute_keywords_url(self):
 312         """ Resolve note url, use 'note' route and slug if slug
 313             otherwise use note.id
 314         """
 315         if self.slug is not None:
 316             # return a url ending in slug
 317             if self.course.school:
 318                 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
 319             else:
 320                 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
 321         else:
 322             # return a url ending in id
 323             return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
 324
 325     def get_absolute_quiz_url(self):
 326         """ Resolve note url, use 'note' route and slug if slug
 327             otherwise use note.id
 328         """
 329         if self.slug is not None:
 330             # return a url ending in slug
 331             if self.course.school:
 332                 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
 333             else:
 334                 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
 335         else:
 336             # return a url ending in id
 337             return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
 338
 339     def filter_html(self, html):
 340         """
 341         Apply all sanitizing filters to HTML.
 342         Takes in HTML string and outputs HTML string.
 343         """
 344         # Fun fact: This could be made into a static method.
 345         if not html or not len(html):
 346             # if there was no HTML, return an empty string
 347             return ''
 348
 349         soup = BS(html)
 350         # Iterate through filters, applying all to the soup object.
 351         for soupfilter in (
 352             self.sanitize_anchor_html,
 353             self.set_canonical_link,
 354         ):
 355             soup = soupfilter(soup)
 356         return str(soup)
 357
 358     def sanitize_anchor_html(self, soup):
 359         """
 360         Filter the given BeautifulSoup obj by adding target=_blank to all
 361         anchor tags.
 362         Returns BeautifulSoup obj.
 363         """
 364         # Fun fact: This could be made into a static method.
 365         # Find all a tags in the HTML
 366         a_tags = soup.find_all('a')
 367         if not a_tags or not len(a_tags):
 368             # nothing to process.
 369             return soup
 370
 371         # build a tag sanitizer
 372         def set_attribute_target(tag):
 373             tag['target'] = '_blank'
 374         # set all anchors to have target="_blank"
 375         map(set_attribute_target, a_tags)
 376
 377         # return filtered soup
 378         return soup
 379
 380     @staticmethod
 381     def canonical_link_predicate(tag):
 382         return tag.name == u'link' and \
 383             tag.has_attr('rel') and \
 384             u'canonical' in tag['rel']
 385
 386     def set_canonical_link(self, soup):
 387         """
 388         Filter the given BeautifulSoup obj by adding
 389         <link rel="canonical" href="note.get_absolute_url" />
 390         to the document head.
 391         Returns BeautifulSoup obj.
 392         """
 393         domain = Site.objects.all()[0].domain
 394         note_full_href = 'http://' + domain + self.get_absolute_url()
 395         canonical_tags = soup.find_all(self.canonical_link_predicate)
 396         if canonical_tags:
 397             for tag in canonical_tags:
 398                 tag['href'] = note_full_href
 399         else:
 400             new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
 401             head = soup.find('head')
 402             head.append(new_tag)
 403
 404         # return filtered soup
 405         return soup
 406
 407     def _update_parent_updated_at(self):
 408         """ update the parent Course.updated_at model
 409             with the latest uploaded_at """
 410         self.course.updated_at = self.uploaded_at
 411         self.course.save()
 412
 413     def save(self, *args, **kwargs):
 414         if self.uploaded_at and self.uploaded_at > self.course.updated_at:
 415             self._update_parent_updated_at()
 416         super(Note, self).save(*args, **kwargs)
 417
 418     def has_markdown(self):
 419         return hasattr(self, "notemarkdown")
 420
 421     def is_pdf(self):
 422         return self.mimetype in Note.PDF_MIMETYPES
 423
 424
 425 class NoteMarkdown(models.Model):
 426     note     = models.OneToOneField(Note, primary_key=True)
 427     markdown = models.TextField(blank=True, null=True)
 428
 429 auto_add_check_unique_together(Note)
 430
 431
 432 def update_note_counts(note_instance):
 433     try:
 434         # test if the course still exists, or if this is a cascade delete.
 435         note_instance.course
 436     except Course.DoesNotExist:
 437         # this is a cascade delete. there is no course to update
 438         pass
 439     else:
 440         # course exists
 441         note_instance.course.update_thank_count()
 442         note_instance.course.update_note_count()
 443         if note_instance.course.school:
 444             note_instance.course.school.update_note_count()
 445         elif note_instance.course.department.school:
 446             note_instance.course.department.school.update_note_count()
 447
 448 @receiver(pre_save, sender=Note, weak=False)
 449 def note_pre_save_receiver(sender, **kwargs):
 450     """Stick an instance of the pre-save value of
 451     the given Note instance in the instances itself.
 452     This will be looked at in post_save."""
 453     if not 'instance' in kwargs:
 454         return
 455
 456     try:
 457         kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
 458     except ObjectDoesNotExist:
 459         pass
 460
 461 @receiver(post_save, sender=Note, weak=False)
 462 def note_save_receiver(sender, **kwargs):
 463     if not 'instance' in kwargs:
 464         return
 465     note = kwargs['instance']
 466
 467
 468     update_note_counts(note)
 469
 470     try:
 471         index = SearchIndex()
 472         if kwargs['created']:
 473             index.add_note(note)
 474         else:
 475             index.update_note(note, note.old_instance)
 476     except Exception:
 477         logger.error("Error with IndexDen:\n" + traceback.format_exc())
 478
 479
 480 @receiver(post_delete, sender=Note, weak=False)
 481 def note_delete_receiver(sender, **kwargs):
 482     if not 'instance' in kwargs:
 483         return
 484     note = kwargs['instance']
 485
 486     # Update course and school counts of how
 487     # many notes they have
 488     update_note_counts(kwargs['instance'])
 489
 490     # Remove document from search index
 491     try:
 492         index = SearchIndex()
 493         index.remove_note(note)
 494     except Exception:
 495         logger.error("Error with IndexDen:\n" + traceback.format_exc())
 496
 497     if note.user:
 498         GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
 499
 500
 501 class UserUploadMapping(models.Model):
 502     user = models.ForeignKey(User)
 503     fp_file = models.CharField(max_length=255)
 504
 505     class Meta:
 506         unique_together = ('user', 'fp_file')
 507
 508
 509 @receiver(user_logged_in, weak=True)
 510 def find_orphan_notes(sender, **kwargs):
 511     user = kwargs['user']
 512     s = kwargs['request'].session
 513     uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
 514     for uploaded_note_url in uploaded_note_urls:
 515         try:
 516             note = Note.objects.get(fp_file=uploaded_note_url)
 517             note.user = user
 518             note.save()
 519             NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
 520         except (ObjectDoesNotExist, MultipleObjectsReturned):
 521             mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)
 522             mapping.save()
 523