karmaworld/apps/notes/models.py

   1 #!/usr/bin/env python
   2 # -*- coding:utf8 -*-
   3 # Copyright (C) 2012  FinalsClub Foundation
   4
   5 """
   6     Models for the notes django app.
   7     Contains only the minimum for handling files and their representation
   8 """
   9 import datetime
  10 import traceback
  11 import logging
  12 from allauth.account.signals import user_logged_in
  13 from django.contrib.auth.models import User
  14 from django.contrib.sites.models import Site
  15 from django.core.urlresolvers import reverse
  16 from django.utils.safestring import mark_safe
  17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
  18 from django.core.files.storage import default_storage
  19 from django.db.models import SET_NULL
  20 from django.db.models.signals import post_save, post_delete, pre_save
  21 from django.dispatch import receiver
  22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
  23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
  24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
  25 import os
  26 import time
  27 import urllib
  28
  29 from django.conf import settings
  30 from django.core.files import File
  31 from django.core.files.storage import FileSystemStorage
  32 from django.db import models
  33 from django.utils.text import slugify
  34 import django_filepicker
  35 from bs4 import BeautifulSoup as BS
  36 from taggit.managers import TaggableManager
  37
  38 from karmaworld.apps.courses.models import Course
  39 from karmaworld.apps.licenses.models import License
  40 from karmaworld.apps.notes.search import SearchIndex
  41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
  42
  43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
  44
  45 logger = logging.getLogger(__name__)
  46 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
  47
  48 # Dictionary for S3 upload headers
  49 s3_upload_headers = {
  50     'Content-Type': 'text/html',
  51 }
  52
  53 # This is a bit hacky, but nothing else works. Grabbed this from a proper
  54 # file configured via S3 management console.
  55 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
  56 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
  57
  58
  59 class Document(models.Model):
  60     """
  61     An Abstract Base Class representing a document intended to be subclassed.
  62     """
  63     course          = models.ForeignKey(Course)
  64     tags            = TaggableManager(blank=True)
  65     name            = models.CharField(max_length=255, blank=True, null=True)
  66     slug            = models.SlugField(max_length=255, unique=True)
  67
  68
  69     # license if different from default
  70     license         = models.ForeignKey(License, blank=True, null=True)
  71
  72     # provide an upstream file link
  73     upstream_link   = models.URLField(max_length=1024, blank=True, null=True, unique=True)
  74
  75     # metadata relevant to the Upload process
  76     user            = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
  77     ip              = models.GenericIPAddressField(blank=True, null=True,
  78                         help_text=u"IP address of the uploader")
  79     uploaded_at     = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
  80
  81
  82     # if True, NEVER show this file
  83     # WARNING: This may throw an error on migration
  84     is_hidden       = models.BooleanField(default=False)
  85
  86     ###
  87     # Everything Filepicker, now in one small area
  88
  89     # Allow pick (choose files), store (upload to S3), read (from FP repo),
  90     # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
  91     # seconds). Generated one time, at class definition upon import. So the
  92     # server will need to be rebooted at least one time each year or this will
  93     # go stale.
  94     fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
  95     fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
  96     fp_policy      = encode_fp_policy(fp_policy_json)
  97     fp_signature   = sign_fp_policy(fp_policy)
  98
  99     # Hack because mimetypes conflict with extensions, but there is no way to
 100     # disable mimetypes.
 101     # https://github.com/Ink/django-filepicker/issues/22
 102     django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
 103     # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
 104     # parameters!
 105     fp_file = django_filepicker.models.FPFileField(
 106                 # FPFileField settings
 107                 apikey=FILEPICKER_API_KEY,
 108                 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
 109                 additional_params={
 110                     'data-fp-multiple': 'true',
 111                     'data-fp-folders': 'true',
 112                     'data-fp-button-class':
 113                       'inline-button important add-note-btn',
 114                     'data-fp-button-text': 'Add Notes',
 115                     'data-fp-extensions':
 116                       '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
 117                     'data-fp-store-location': 'S3',
 118                     'data-fp-policy': fp_policy,
 119                     'data-fp-signature': fp_signature,
 120                     'type': 'filepicker',
 121                     'onchange': "got_file(event)",
 122                 },
 123                 # FileField settings
 124                 null=True, blank=True,
 125                 upload_to='nil', # field ignored because S3, but required.
 126                 verbose_name='', # prevent a label from showing up
 127                 )
 128     mimetype = models.CharField(max_length=255, blank=True, null=True)
 129
 130     class Meta:
 131         abstract = True
 132         ordering = ['-uploaded_at']
 133
 134     def _generate_unique_slug(self):
 135         """ generate a unique slug based on name and uploaded_at  """
 136         _slug = slugify(unicode(self.name))
 137         klass = self.__class__
 138         collision = klass.objects.filter(slug=_slug)
 139         if collision:
 140             _slug = u"{0}-{1}-{2}-{3}".format(
 141                     _slug, self.uploaded_at.month,
 142                     self.uploaded_at.day, self.uploaded_at.microsecond)
 143         self.slug = _slug
 144
 145     def _get_fpf(self):
 146         """
 147         Memoized FilepickerFile getter. Returns FilepickerFile.
 148         """
 149         if not hasattr(self, 'cached_fpf'):
 150             # Fetch additional_params containing signature, etc
 151             aps = self.fp_file.field.additional_params
 152             self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
 153         return self.cached_fpf
 154
 155     def get_fp_url(self):
 156         """
 157         Returns the Filepicker URL for reading the upstream document.
 158         """
 159         fpf = self._get_fpf()
 160         # Return proper URL for reading
 161         return fpf.get_url()
 162
 163     def get_file(self):
 164         """
 165         Downloads the file from filepicker.io and returns a Django File wrapper
 166         object.
 167         """
 168         # Fetch FilepickerFile
 169         fpf = self._get_fpf()
 170         # Return Django File
 171         return fpf.get_file()
 172
 173     def save(self, *args, **kwargs):
 174         if self.name and not self.slug:
 175             self._generate_unique_slug()
 176         super(Document, self).save(*args, **kwargs)
 177
 178
 179 class NoteManager(models.Manager):
 180     """ Handle restoring data. """
 181     def get_by_natural_key(self, fp_file, upstream_link):
 182         """
 183         Return a Note defined by its Filepicker and upstream URLs.
 184         """
 185         return self.get(fp_file=fp_file,upstream_link=upstream_link)
 186
 187
 188 class Note(Document):
 189     """
 190     A django model representing an uploaded file and associated metadata.
 191     """
 192     objects = NoteManager()
 193
 194     PDF_MIMETYPES = (
 195       'application/pdf',
 196       'application/vnd.ms-powerpoint',
 197       'application/vnd.openxmlformats-officedocument.presentationml.presentation'
 198     )
 199
 200     # Cache the Google drive file link
 201     gdrive_url      = models.URLField(max_length=1024, blank=True, null=True, unique=True)
 202
 203     # Generated by Google Drive but saved locally
 204     text            = models.TextField(blank=True, null=True)
 205
 206     # Number of times this note has been flagged as abusive/spam.
 207     flags           = models.IntegerField(default=0,null=False)
 208
 209     # Social media tracking
 210     tweeted         = models.BooleanField(default=False)
 211     thanks          = models.PositiveIntegerField(default=0)
 212
 213     class Meta:
 214         unique_together = ('fp_file', 'upstream_link')
 215         ordering = ['-uploaded_at']
 216
 217     def __unicode__(self):
 218         return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
 219
 220     def natural_key(self):
 221         """
 222         A Note is uniquely defined by both the Filepicker link and the upstream
 223         link. The Filepicker link should be unique by itself, but it may be
 224         null in the database, so the upstream link component should resolve
 225         those cases.
 226         """
 227         # gdrive_url might also fit the bill?
 228         return (self.fp_file, self.upstream_link)
 229
 230     def get_relative_s3_path(self):
 231         """
 232         returns s3 path relative to the appropriate bucket.
 233         """
 234         # Note.slug will be unique and brought in from RawDocument or created
 235         # upon save() inside RawDocument.convert_to_note(). It makes for a good
 236         # filename and its pretty well guaranteed to be there.
 237         return 'html/{0}.html'.format(self.slug)
 238
 239     def send_to_s3(self, html, do_save=True):
 240         """
 241         Push the given HTML up to S3 for this Note.
 242         Set do_save to False if the note will be saved outside this call.
 243         """
 244         # do nothing if HTML is empty.
 245         if not html or not len(html):
 246             return
 247         # upload the HTML file to static host if it is not already there
 248         filepath = self.get_relative_s3_path()
 249         if not default_storage.exists(filepath):
 250             # This is a pretty ugly hackified answer to some s3boto shortcomings
 251             # and some decent default settings chosen by django-storages.
 252
 253             # Create the new key (key == filename in S3 bucket)
 254             newkey = default_storage.bucket.new_key(filepath)
 255             # Upload data!
 256             newkey.set_contents_from_string(html, headers=s3_upload_headers)
 257             if not newkey.exists():
 258                 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
 259
 260             # set the permissions for everyone to read.
 261             newkey.set_xml_acl(all_read_xml_acl)
 262
 263     def update_note_on_s3(self, html):
 264         # do nothing if HTML is empty.
 265         if not html or not len(html):
 266             return
 267         # if it's not already there then bail out
 268         filepath = self.get_relative_s3_path()
 269         if not default_storage.exists(filepath):
 270             logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
 271             return
 272
 273         key = default_storage.bucket.get_key(filepath)
 274         key.set_contents_from_string(html, headers=s3_upload_headers)
 275         key.set_xml_acl(all_read_xml_acl)
 276
 277     def get_absolute_url(self):
 278         """ Resolve note url, use 'note' route and slug if slug
 279             otherwise use note.id
 280         """
 281         if self.slug is not None:
 282             # return a url ending in slug
 283             if self.course.school:
 284                 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
 285             else:
 286                 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
 287         else:
 288             # return a url ending in id
 289             return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
 290
 291     def get_absolute_keywords_url(self):
 292         """ Resolve note url, use 'note' route and slug if slug
 293             otherwise use note.id
 294         """
 295         if self.slug is not None:
 296             # return a url ending in slug
 297             if self.course.school:
 298                 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
 299             else:
 300                 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
 301         else:
 302             # return a url ending in id
 303             return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
 304
 305
 306     def filter_html(self, html):
 307         """
 308         Apply all sanitizing filters to HTML.
 309         Takes in HTML string and outputs HTML string.
 310         """
 311         # Fun fact: This could be made into a static method.
 312         if not html or not len(html):
 313             # if there was no HTML, return an empty string
 314             return ''
 315
 316         soup = BS(html)
 317         # Iterate through filters, applying all to the soup object.
 318         for soupfilter in (
 319             self.sanitize_anchor_html,
 320             self.set_canonical_link,
 321         ):
 322             soup = soupfilter(soup)
 323         return str(soup)
 324
 325     def sanitize_anchor_html(self, soup):
 326         """
 327         Filter the given BeautifulSoup obj by adding target=_blank to all
 328         anchor tags.
 329         Returns BeautifulSoup obj.
 330         """
 331         # Fun fact: This could be made into a static method.
 332         # Find all a tags in the HTML
 333         a_tags = soup.find_all('a')
 334         if not a_tags or not len(a_tags):
 335             # nothing to process.
 336             return soup
 337
 338         # build a tag sanitizer
 339         def set_attribute_target(tag):
 340             tag['target'] = '_blank'
 341         # set all anchors to have target="_blank"
 342         map(set_attribute_target, a_tags)
 343
 344         # return filtered soup
 345         return soup
 346
 347     @staticmethod
 348     def canonical_link_predicate(tag):
 349         return tag.name == u'link' and \
 350             tag.has_attr('rel') and \
 351             u'canonical' in tag['rel']
 352
 353     def set_canonical_link(self, soup):
 354         """
 355         Filter the given BeautifulSoup obj by adding
 356         <link rel="canonical" href="note.get_absolute_url" />
 357         to the document head.
 358         Returns BeautifulSoup obj.
 359         """
 360         domain = Site.objects.all()[0].domain
 361         note_full_href = 'http://' + domain + self.get_absolute_url()
 362         canonical_tags = soup.find_all(self.canonical_link_predicate)
 363         if canonical_tags:
 364             for tag in canonical_tags:
 365                 tag['href'] = note_full_href
 366         else:
 367             new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
 368             head = soup.find('head')
 369             head.append(new_tag)
 370
 371         # return filtered soup
 372         return soup
 373
 374     def _update_parent_updated_at(self):
 375         """ update the parent Course.updated_at model
 376             with the latest uploaded_at """
 377         self.course.updated_at = self.uploaded_at
 378         self.course.save()
 379
 380     def save(self, *args, **kwargs):
 381         if self.uploaded_at and self.uploaded_at > self.course.updated_at:
 382             self._update_parent_updated_at()
 383         super(Note, self).save(*args, **kwargs)
 384
 385     def has_markdown(self):
 386         return hasattr(self, "notemarkdown")
 387
 388     def is_pdf(self):
 389         return self.mimetype in Note.PDF_MIMETYPES
 390
 391
 392 class NoteMarkdown(models.Model):
 393     note     = models.OneToOneField(Note, primary_key=True)
 394     markdown = models.TextField(blank=True, null=True)
 395
 396 auto_add_check_unique_together(Note)
 397
 398
 399 def update_note_counts(note_instance):
 400     try:
 401         # test if the course still exists, or if this is a cascade delete.
 402         note_instance.course
 403     except Course.DoesNotExist:
 404         # this is a cascade delete. there is no course to update
 405         pass
 406     else:
 407         # course exists
 408         note_instance.course.update_note_count()
 409         if note_instance.course.school:
 410             note_instance.course.school.update_note_count()
 411         elif note_instance.course.department.school:
 412             note_instance.course.department.school.update_note_count()
 413
 414 @receiver(pre_save, sender=Note, weak=False)
 415 def note_pre_save_receiver(sender, **kwargs):
 416     """Stick an instance of the pre-save value of
 417     the given Note instance in the instances itself.
 418     This will be looked at in post_save."""
 419     if not 'instance' in kwargs:
 420         return
 421
 422     try:
 423         kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
 424     except ObjectDoesNotExist:
 425         pass
 426
 427 @receiver(post_save, sender=Note, weak=False)
 428 def note_save_receiver(sender, **kwargs):
 429     if not 'instance' in kwargs:
 430         return
 431     note = kwargs['instance']
 432
 433     if kwargs['created']:
 434         update_note_counts(note)
 435
 436     try:
 437         index = SearchIndex()
 438         if kwargs['created']:
 439             index.add_note(note)
 440         else:
 441             index.update_note(note, note.old_instance)
 442     except Exception:
 443         logger.error("Error with IndexDen:\n" + traceback.format_exc())
 444
 445
 446 @receiver(post_delete, sender=Note, weak=False)
 447 def note_delete_receiver(sender, **kwargs):
 448     if not 'instance' in kwargs:
 449         return
 450     note = kwargs['instance']
 451
 452     # Update course and school counts of how
 453     # many notes they have
 454     update_note_counts(kwargs['instance'])
 455
 456     # Remove document from search index
 457     try:
 458         index = SearchIndex()
 459         index.remove_note(note)
 460     except Exception:
 461         logger.error("Error with IndexDen:\n" + traceback.format_exc())
 462
 463     if note.user:
 464         GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
 465
 466
 467 class UserUploadMapping(models.Model):
 468     user = models.ForeignKey(User)
 469     fp_file = models.CharField(max_length=255)
 470
 471     class Meta:
 472         unique_together = ('user', 'fp_file')
 473
 474
 475 @receiver(user_logged_in, weak=True)
 476 def find_orphan_notes(sender, **kwargs):
 477     user = kwargs['user']
 478     s = kwargs['request'].session
 479     uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
 480     for uploaded_note_url in uploaded_note_urls:
 481         try:
 482             note = Note.objects.get(fp_file=uploaded_note_url)
 483             note.user = user
 484             note.save()
 485             NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
 486         except (ObjectDoesNotExist, MultipleObjectsReturned):
 487             mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)
 488             mapping.save()
 489