karmaworld/apps/notes/models.py

   1 #!/usr/bin/env python
   2 # -*- coding:utf8 -*-
   3 # Copyright (C) 2012  FinalsClub Foundation
   4
   5 """
   6     Models for the notes django app.
   7     Contains only the minimum for handling files and their representation
   8 """
   9 import datetime
  10 import traceback
  11 import logging
  12 from allauth.account.signals import user_logged_in
  13 from django.contrib.auth.models import User
  14 from django.contrib.sites.models import Site
  15 from django.core.urlresolvers import reverse
  16 from django.utils.safestring import mark_safe
  17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
  18 from django.core.files.storage import default_storage
  19 from django.db.models import SET_NULL
  20 from django.db.models.signals import post_save, post_delete, pre_save
  21 from django.dispatch import receiver
  22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
  23 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
  24 import os
  25 import time
  26 import urllib
  27
  28 from django.conf import settings
  29 from django.core.files import File
  30 from django.core.files.storage import FileSystemStorage
  31 from django.db import models
  32 from django.utils.text import slugify
  33 import django_filepicker
  34 from bs4 import BeautifulSoup as BS
  35 from taggit.managers import TaggableManager
  36
  37 from karmaworld.apps.courses.models import Course
  38 from karmaworld.apps.licenses.models import License
  39 from karmaworld.apps.notes.search import SearchIndex
  40 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
  41
  42 FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
  43
  44 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
  45 KEYWORD_MTURK_THRESHOLD = 3
  46
  47 logger = logging.getLogger(__name__)
  48 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
  49
  50 # Dictionary for S3 upload headers
  51 s3_upload_headers = {
  52     'Content-Type': 'text/html',
  53 }
  54
  55 # This is a bit hacky, but nothing else works. Grabbed this from a proper
  56 # file configured via S3 management console.
  57 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
  58 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
  59
  60
  61 class Document(models.Model):
  62     """
  63     An Abstract Base Class representing a document intended to be subclassed.
  64     """
  65     course          = models.ForeignKey(Course)
  66     tags            = TaggableManager(blank=True)
  67     name            = models.CharField(max_length=255, blank=True, null=True)
  68     slug            = models.SlugField(max_length=255, unique=True)
  69
  70     LECTURE_NOTES = 'LECTURE_NOTES'
  71     STUDY_GUIDE = 'STUDY_GUIDE'
  72     SYLLABUS = 'SYLLABUS'
  73     ASSIGNMENT = 'ASSIGNMENT'
  74     OTHER = 'OTHER'
  75     NOTE_CATEGORIES = (
  76         (LECTURE_NOTES, 'Lecture Notes'),
  77         (STUDY_GUIDE, 'Study Guide'),
  78         (SYLLABUS, 'Syllabus'),
  79         (ASSIGNMENT, 'Assignment'),
  80         (OTHER, 'Other'),
  81     )
  82     category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
  83
  84     # license if different from default
  85     license         = models.ForeignKey(License, blank=True, null=True)
  86
  87     # provide an upstream file link
  88     upstream_link   = models.URLField(max_length=1024, blank=True, null=True, unique=True)
  89
  90     # metadata relevant to the Upload process
  91     user            = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
  92     ip              = models.GenericIPAddressField(blank=True, null=True,
  93                         help_text=u"IP address of the uploader")
  94     uploaded_at     = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
  95
  96
  97     # if True, NEVER show this file
  98     # WARNING: This may throw an error on migration
  99     is_hidden       = models.BooleanField(default=False)
 100
 101     ###
 102     # Everything Filepicker, now in one small area
 103
 104     # Allow pick (choose files), store (upload to S3), read (from FP repo),
 105     # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
 106     # seconds). Generated one time, at class definition upon import. So the
 107     # server will need to be rebooted at least one time each year or this will
 108     # go stale.
 109     fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
 110     fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
 111     fp_policy      = encode_fp_policy(fp_policy_json)
 112     fp_signature   = sign_fp_policy(fp_policy)
 113
 114     # Hack because mimetypes conflict with extensions, but there is no way to
 115     # disable mimetypes.
 116     # https://github.com/Ink/django-filepicker/issues/22
 117     django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
 118     # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
 119     # parameters!
 120     fp_file = django_filepicker.models.FPFileField(
 121                 # FPFileField settings
 122                 apikey=FILEPICKER_API_KEY,
 123                 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
 124                 additional_params={
 125                     'data-fp-multiple': 'true',
 126                     'data-fp-folders': 'true',
 127                     'data-fp-button-class':
 128                       'inline-button important add-note-btn',
 129                     'data-fp-button-text': 'Add Notes',
 130                     'data-fp-extensions':
 131                       '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
 132                     'data-fp-store-location': 'S3',
 133                     'data-fp-policy': fp_policy,
 134                     'data-fp-signature': fp_signature,
 135                     'type': 'filepicker',
 136                     'onchange': "got_file(event)",
 137                 },
 138                 # FileField settings
 139                 null=True, blank=True,
 140                 upload_to='nil', # field ignored because S3, but required.
 141                 verbose_name='', # prevent a label from showing up
 142                 )
 143     mimetype = models.CharField(max_length=255, blank=True, null=True)
 144
 145     class Meta:
 146         abstract = True
 147         ordering = ['-uploaded_at']
 148
 149     def _generate_unique_slug(self):
 150         """ generate a unique slug based on name and uploaded_at  """
 151         _slug = slugify(unicode(self.name))
 152         klass = self.__class__
 153         collision = klass.objects.filter(slug=_slug)
 154         if collision:
 155             _slug = u"{0}-{1}-{2}-{3}".format(
 156                     _slug, self.uploaded_at.month,
 157                     self.uploaded_at.day, self.uploaded_at.microsecond)
 158         self.slug = _slug
 159
 160     def _get_fpf(self):
 161         """
 162         Memoized FilepickerFile getter. Returns FilepickerFile.
 163         """
 164         if not hasattr(self, 'cached_fpf'):
 165             # Fetch additional_params containing signature, etc
 166             aps = self.fp_file.field.additional_params
 167             self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
 168         return self.cached_fpf
 169
 170     def get_fp_url(self):
 171         """
 172         Returns the Filepicker URL for reading the upstream document.
 173         """
 174         fpf = self._get_fpf()
 175         # Return proper URL for reading
 176         return fpf.get_url()
 177
 178     def get_file(self):
 179         """
 180         Downloads the file from filepicker.io and returns a Django File wrapper
 181         object.
 182         """
 183         # Fetch FilepickerFile
 184         fpf = self._get_fpf()
 185         # Return Django File
 186         return fpf.get_file()
 187
 188     def save(self, *args, **kwargs):
 189         if self.name and not self.slug:
 190             self._generate_unique_slug()
 191         super(Document, self).save(*args, **kwargs)
 192
 193
 194 class NoteManager(models.Manager):
 195     """ Handle restoring data. """
 196     def get_by_natural_key(self, fp_file, upstream_link):
 197         """
 198         Return a Note defined by its Filepicker and upstream URLs.
 199         """
 200         return self.get(fp_file=fp_file,upstream_link=upstream_link)
 201
 202
 203 class Note(Document):
 204     """
 205     A django model representing an uploaded file and associated metadata.
 206     """
 207     objects = NoteManager()
 208
 209     PDF_MIMETYPES = (
 210       'application/pdf',
 211       'application/vnd.ms-powerpoint',
 212       'application/vnd.openxmlformats-officedocument.presentationml.presentation'
 213     )
 214
 215     # Cache the Google drive file link
 216     gdrive_url      = models.URLField(max_length=1024, blank=True, null=True, unique=True)
 217
 218     # Generated by Google Drive but saved locally
 219     text            = models.TextField(blank=True, null=True)
 220
 221     # Number of times this note has been flagged as abusive/spam.
 222     flags           = models.IntegerField(default=0,null=False)
 223
 224     # Social media tracking
 225     tweeted         = models.BooleanField(default=False)
 226     thanks          = models.PositiveIntegerField(default=0)
 227
 228     class Meta:
 229         unique_together = ('fp_file', 'upstream_link')
 230         ordering = ['-uploaded_at']
 231
 232     def __unicode__(self):
 233         return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
 234
 235     def natural_key(self):
 236         """
 237         A Note is uniquely defined by both the Filepicker link and the upstream
 238         link. The Filepicker link should be unique by itself, but it may be
 239         null in the database, so the upstream link component should resolve
 240         those cases.
 241         """
 242         # gdrive_url might also fit the bill?
 243         return (self.fp_file, self.upstream_link)
 244
 245     def get_relative_s3_path(self):
 246         """
 247         returns s3 path relative to the appropriate bucket.
 248         """
 249         # Note.slug will be unique and brought in from RawDocument or created
 250         # upon save() inside RawDocument.convert_to_note(). It makes for a good
 251         # filename and its pretty well guaranteed to be there.
 252         return 'html/{0}.html'.format(self.slug)
 253
 254     def send_to_s3(self, html, do_save=True):
 255         """
 256         Push the given HTML up to S3 for this Note.
 257         Set do_save to False if the note will be saved outside this call.
 258         """
 259         # do nothing if HTML is empty.
 260         if not html or not len(html):
 261             return
 262         # upload the HTML file to static host if it is not already there
 263         filepath = self.get_relative_s3_path()
 264         if not default_storage.exists(filepath):
 265             # This is a pretty ugly hackified answer to some s3boto shortcomings
 266             # and some decent default settings chosen by django-storages.
 267
 268             # Create the new key (key == filename in S3 bucket)
 269             newkey = default_storage.bucket.new_key(filepath)
 270             # Upload data!
 271             newkey.set_contents_from_string(html, headers=s3_upload_headers)
 272             if not newkey.exists():
 273                 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
 274
 275             # set the permissions for everyone to read.
 276             newkey.set_xml_acl(all_read_xml_acl)
 277
 278     def update_note_on_s3(self, html):
 279         # do nothing if HTML is empty.
 280         if not html or not len(html):
 281             return
 282         # if it's not already there then bail out
 283         filepath = self.get_relative_s3_path()
 284         if not default_storage.exists(filepath):
 285             logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
 286             return
 287
 288         key = default_storage.bucket.get_key(filepath)
 289         key.set_contents_from_string(html, headers=s3_upload_headers)
 290         key.set_xml_acl(all_read_xml_acl)
 291
 292     def remaining_thanks_for_mturk(self):
 293         return KEYWORD_MTURK_THRESHOLD - self.thanks
 294
 295     def total_thanks_for_mturk(self):
 296         return KEYWORD_MTURK_THRESHOLD
 297
 298     def get_absolute_url(self):
 299         """ Resolve note url, use 'note' route and slug if slug
 300             otherwise use note.id
 301         """
 302         if self.slug is not None:
 303             # return a url ending in slug
 304             if self.course.school:
 305                 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
 306             else:
 307                 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
 308         else:
 309             # return a url ending in id
 310             return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
 311
 312     def get_absolute_keywords_url(self):
 313         """ Resolve note url, use 'note' route and slug if slug
 314             otherwise use note.id
 315         """
 316         if self.slug is not None:
 317             # return a url ending in slug
 318             if self.course.school:
 319                 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
 320             else:
 321                 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
 322         else:
 323             # return a url ending in id
 324             return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
 325
 326     def get_absolute_quiz_url(self):
 327         """ Resolve note url, use 'note' route and slug if slug
 328             otherwise use note.id
 329         """
 330         if self.slug is not None:
 331             # return a url ending in slug
 332             if self.course.school:
 333                 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
 334             else:
 335                 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
 336         else:
 337             # return a url ending in id
 338             return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
 339
 340     def filter_html(self, html):
 341         """
 342         Apply all sanitizing filters to HTML.
 343         Takes in HTML string and outputs HTML string.
 344         """
 345         # Fun fact: This could be made into a static method.
 346         if not html or not len(html):
 347             # if there was no HTML, return an empty string
 348             return ''
 349
 350         soup = BS(html)
 351         # Iterate through filters, applying all to the soup object.
 352         for soupfilter in (
 353             self.sanitize_anchor_html,
 354             self.set_canonical_link,
 355         ):
 356             soup = soupfilter(soup)
 357         return str(soup)
 358
 359     def sanitize_anchor_html(self, soup):
 360         """
 361         Filter the given BeautifulSoup obj by adding target=_blank to all
 362         anchor tags.
 363         Returns BeautifulSoup obj.
 364         """
 365         # Fun fact: This could be made into a static method.
 366         # Find all a tags in the HTML
 367         a_tags = soup.find_all('a')
 368         if not a_tags or not len(a_tags):
 369             # nothing to process.
 370             return soup
 371
 372         # build a tag sanitizer
 373         def set_attribute_target(tag):
 374             tag['target'] = '_blank'
 375         # set all anchors to have target="_blank"
 376         map(set_attribute_target, a_tags)
 377
 378         # return filtered soup
 379         return soup
 380
 381     @staticmethod
 382     def canonical_link_predicate(tag):
 383         return tag.name == u'link' and \
 384             tag.has_attr('rel') and \
 385             u'canonical' in tag['rel']
 386
 387     def set_canonical_link(self, soup):
 388         """
 389         Filter the given BeautifulSoup obj by adding
 390         <link rel="canonical" href="note.get_absolute_url" />
 391         to the document head.
 392         Returns BeautifulSoup obj.
 393         """
 394         domain = Site.objects.all()[0].domain
 395         note_full_href = 'http://' + domain + self.get_absolute_url()
 396         canonical_tags = soup.find_all(self.canonical_link_predicate)
 397         if canonical_tags:
 398             for tag in canonical_tags:
 399                 tag['href'] = note_full_href
 400         else:
 401             new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
 402             head = soup.find('head')
 403             head.append(new_tag)
 404
 405         # return filtered soup
 406         return soup
 407
 408     def _update_parent_updated_at(self):
 409         """ update the parent Course.updated_at model
 410             with the latest uploaded_at """
 411         self.course.updated_at = self.uploaded_at
 412         self.course.save()
 413
 414     def save(self, *args, **kwargs):
 415         if self.uploaded_at and self.uploaded_at > self.course.updated_at:
 416             self._update_parent_updated_at()
 417         super(Note, self).save(*args, **kwargs)
 418
 419     def has_markdown(self):
 420         return hasattr(self, "notemarkdown")
 421
 422     def is_pdf(self):
 423         return self.mimetype in Note.PDF_MIMETYPES
 424
 425
 426 class NoteMarkdown(models.Model):
 427     note     = models.OneToOneField(Note, primary_key=True)
 428     markdown = models.TextField(blank=True, null=True)
 429
 430 auto_add_check_unique_together(Note)
 431
 432
 433 def update_note_counts(note_instance):
 434     try:
 435         # test if the course still exists, or if this is a cascade delete.
 436         note_instance.course
 437     except Course.DoesNotExist:
 438         # this is a cascade delete. there is no course to update
 439         pass
 440     else:
 441         # course exists
 442         note_instance.course.update_thank_count()
 443         note_instance.course.update_note_count()
 444         if note_instance.course.school:
 445             note_instance.course.school.update_note_count()
 446         elif note_instance.course.department.school:
 447             note_instance.course.department.school.update_note_count()
 448
 449 @receiver(pre_save, sender=Note, weak=False)
 450 def note_pre_save_receiver(sender, **kwargs):
 451     """Stick an instance of the pre-save value of
 452     the given Note instance in the instances itself.
 453     This will be looked at in post_save."""
 454     if not 'instance' in kwargs:
 455         return
 456
 457     try:
 458         kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
 459     except ObjectDoesNotExist:
 460         pass
 461
 462 @receiver(post_save, sender=Note, weak=False)
 463 def note_save_receiver(sender, **kwargs):
 464     if not 'instance' in kwargs:
 465         return
 466     note = kwargs['instance']
 467
 468
 469     update_note_counts(note)
 470
 471     try:
 472         index = SearchIndex()
 473         if kwargs['created']:
 474             index.add_note(note)
 475         else:
 476             index.update_note(note, note.old_instance)
 477     except Exception:
 478         logger.error("Error with IndexDen:\n" + traceback.format_exc())
 479
 480
 481 @receiver(post_delete, sender=Note, weak=False)
 482 def note_delete_receiver(sender, **kwargs):
 483     if not 'instance' in kwargs:
 484         return
 485     note = kwargs['instance']
 486
 487     # Update course and school counts of how
 488     # many notes they have
 489     update_note_counts(kwargs['instance'])
 490
 491     # Remove document from search index
 492     try:
 493         index = SearchIndex()
 494         index.remove_note(note)
 495     except Exception:
 496         logger.error("Error with IndexDen:\n" + traceback.format_exc())
 497
 498     if note.user:
 499         GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
 500
 501
 502 class UserUploadMapping(models.Model):
 503     user = models.ForeignKey(User)
 504     fp_file = models.CharField(max_length=255)
 505
 506     class Meta:
 507         unique_together = ('user', 'fp_file')
 508
 509
 510 @receiver(user_logged_in, weak=True)
 511 def find_orphan_notes(sender, **kwargs):
 512     user = kwargs['user']
 513     s = kwargs['request'].session
 514     uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
 515     for uploaded_note_url in uploaded_note_urls:
 516         try:
 517             note = Note.objects.get(fp_file=uploaded_note_url)
 518             note.user = user
 519             note.save()
 520             NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
 521         except (ObjectDoesNotExist, MultipleObjectsReturned):
 522             mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)
 523             mapping.save()
 524