3 # Copyright (C) 2012 FinalsClub Foundation
6 Models for the notes django app.
7 Contains only the minimum for handling files and their representation
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
28 from django.conf import settings
29 from django.core.files import File
30 from django.core.files.storage import FileSystemStorage
31 from django.db import models
32 from django.utils.text import slugify
33 import django_filepicker
34 from bs4 import BeautifulSoup as BS
35 from taggit.managers import TaggableManager
37 import bleach_whitelist
40 from karmaworld.apps.courses.models import Course
41 from karmaworld.apps.licenses.models import License
42 from karmaworld.apps.notes.search import SearchIndex
43 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
45 FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
47 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
48 KEYWORD_MTURK_THRESHOLD = 3
50 logger = logging.getLogger(__name__)
51 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
53 # Dictionary for S3 upload headers
55 'Content-Type': 'text/html',
58 # This is a bit hacky, but nothing else works. Grabbed this from a proper
59 # file configured via S3 management console.
60 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
61 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
64 class Document(models.Model):
66 An Abstract Base Class representing a document intended to be subclassed.
68 course = models.ForeignKey(Course)
69 tags = TaggableManager(blank=True)
70 name = models.CharField(max_length=255, blank=True, null=True)
71 slug = models.SlugField(max_length=255, unique=True)
73 LECTURE_NOTES = 'LECTURE_NOTES'
74 STUDY_GUIDE = 'STUDY_GUIDE'
76 ASSIGNMENT = 'ASSIGNMENT'
79 (LECTURE_NOTES, 'Lecture Notes'),
80 (STUDY_GUIDE, 'Study Guide'),
81 (SYLLABUS, 'Syllabus'),
82 (ASSIGNMENT, 'Assignment'),
85 category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
87 # license if different from default
88 license = models.ForeignKey(License, blank=True, null=True)
90 # provide an upstream file link
91 upstream_link = models.URLField(max_length=1024, blank=True, null=True, unique=True)
93 # metadata relevant to the Upload process
94 user = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
95 ip = models.GenericIPAddressField(blank=True, null=True,
96 help_text=u"IP address of the uploader")
97 uploaded_at = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
100 # if True, NEVER show this file
101 # WARNING: This may throw an error on migration
102 is_hidden = models.BooleanField(default=False)
105 # Everything Filepicker, now in one small area
107 # Allow pick (choose files), store (upload to S3), read (from FP repo),
108 # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
109 # seconds). Generated one time, at class definition upon import. So the
110 # server will need to be rebooted at least one time each year or this will
112 fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
113 fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
114 fp_policy = encode_fp_policy(fp_policy_json)
115 fp_signature = sign_fp_policy(fp_policy)
117 # Hack because mimetypes conflict with extensions, but there is no way to
119 # https://github.com/Ink/django-filepicker/issues/22
120 django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
121 # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
123 fp_file = django_filepicker.models.FPFileField(
124 # FPFileField settings
125 apikey=FILEPICKER_API_KEY,
126 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
128 'data-fp-multiple': 'true',
129 'data-fp-folders': 'true',
130 'data-fp-button-class':
131 'inline-button important add-note-btn',
132 'data-fp-button-text': 'Add Notes',
133 'data-fp-extensions':
134 '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
135 'data-fp-store-location': 'S3',
136 'data-fp-policy': fp_policy,
137 'data-fp-signature': fp_signature,
138 'type': 'filepicker',
139 'onchange': "got_file(event)",
142 null=True, blank=True,
143 upload_to='nil', # field ignored because S3, but required.
144 verbose_name='', # prevent a label from showing up
146 mimetype = models.CharField(max_length=255, blank=True, null=True)
150 ordering = ['-uploaded_at']
152 def _generate_unique_slug(self):
153 """ generate a unique slug based on name and uploaded_at """
154 _slug = slugify(unicode(self.name))
155 klass = self.__class__
156 collision = klass.objects.filter(slug=_slug)
158 _slug = u"{0}-{1}-{2}-{3}".format(
159 _slug, self.uploaded_at.month,
160 self.uploaded_at.day, self.uploaded_at.microsecond)
165 Memoized FilepickerFile getter. Returns FilepickerFile.
167 if not hasattr(self, 'cached_fpf'):
168 # Fetch additional_params containing signature, etc
169 aps = self.fp_file.field.additional_params
170 self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
171 return self.cached_fpf
173 def get_fp_url(self):
175 Returns the Filepicker URL for reading the upstream document.
177 fpf = self._get_fpf()
178 # Return proper URL for reading
183 Downloads the file from filepicker.io and returns a Django File wrapper
186 # Fetch FilepickerFile
187 fpf = self._get_fpf()
189 return fpf.get_file()
191 def save(self, *args, **kwargs):
192 if self.name and not self.slug:
193 self._generate_unique_slug()
194 super(Document, self).save(*args, **kwargs)
197 class NoteManager(models.Manager):
198 """ Handle restoring data. """
199 def get_by_natural_key(self, fp_file, upstream_link):
201 Return a Note defined by its Filepicker and upstream URLs.
203 return self.get(fp_file=fp_file,upstream_link=upstream_link)
206 class Note(Document):
208 A django model representing an uploaded file and associated metadata.
210 objects = NoteManager()
214 'application/vnd.ms-powerpoint',
215 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
218 # Cache the Google drive file link
219 gdrive_url = models.URLField(max_length=1024, blank=True, null=True, unique=True)
221 # Generated by Google Drive but saved locally
222 text = models.TextField(blank=True, null=True)
224 # Number of times this note has been flagged as abusive/spam.
225 flags = models.IntegerField(default=0,null=False)
227 # Social media tracking
228 tweeted = models.BooleanField(default=False)
229 thanks = models.PositiveIntegerField(default=0)
232 unique_together = ('fp_file', 'upstream_link')
233 ordering = ['-uploaded_at']
235 def __unicode__(self):
236 return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
238 def natural_key(self):
240 A Note is uniquely defined by both the Filepicker link and the upstream
241 link. The Filepicker link should be unique by itself, but it may be
242 null in the database, so the upstream link component should resolve
245 # gdrive_url might also fit the bill?
246 return (self.fp_file, self.upstream_link)
248 def get_relative_s3_path(self):
250 returns s3 path relative to the appropriate bucket.
252 # Note.slug will be unique and brought in from RawDocument or created
253 # upon save() inside RawDocument.convert_to_note(). It makes for a good
254 # filename and its pretty well guaranteed to be there.
255 return 'html/{0}.html'.format(self.slug)
257 def send_to_s3(self, html, do_save=True):
259 Push the given HTML up to S3 for this Note.
260 Set do_save to False if the note will be saved outside this call.
262 # do nothing if HTML is empty.
263 if not html or not len(html):
265 # upload the HTML file to static host if it is not already there
266 filepath = self.get_relative_s3_path()
267 if not default_storage.exists(filepath):
268 # This is a pretty ugly hackified answer to some s3boto shortcomings
269 # and some decent default settings chosen by django-storages.
271 # Create the new key (key == filename in S3 bucket)
272 newkey = default_storage.bucket.new_key(filepath)
274 newkey.set_contents_from_string(html, headers=s3_upload_headers)
275 if not newkey.exists():
276 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
278 # set the permissions for everyone to read.
279 newkey.set_xml_acl(all_read_xml_acl)
281 def update_note_on_s3(self, html):
282 # do nothing if HTML is empty.
283 if not html or not len(html):
285 # if it's not already there then bail out
286 filepath = self.get_relative_s3_path()
287 if not default_storage.exists(filepath):
288 logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
291 key = default_storage.bucket.get_key(filepath)
292 key.set_contents_from_string(html, headers=s3_upload_headers)
293 key.set_xml_acl(all_read_xml_acl)
295 def remaining_thanks_for_mturk(self):
296 return KEYWORD_MTURK_THRESHOLD - self.thanks
298 def total_thanks_for_mturk(self):
299 return KEYWORD_MTURK_THRESHOLD
301 def get_absolute_url(self):
302 """ Resolve note url, use 'note' route and slug if slug
303 otherwise use note.id
306 # return a url ending in slug
307 if self.course.school:
308 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
310 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
312 # return a url ending in id
313 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
315 def get_absolute_keywords_url(self):
316 """ Resolve note url, use 'note' route and slug if slug
317 otherwise use note.id
319 if self.slug is not None:
320 # return a url ending in slug
321 if self.course.school:
322 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
324 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
326 # return a url ending in id
327 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
329 def get_absolute_quiz_url(self):
330 """ Resolve note url, use 'note' route and slug if slug
331 otherwise use note.id
333 if self.slug is not None:
334 # return a url ending in slug
335 if self.course.school:
336 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
338 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
340 # return a url ending in id
341 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
343 def filter_html(self, html):
345 Apply all sanitizing filters to HTML.
346 Takes in HTML string and outputs HTML string.
348 # Fun fact: This could be made into a static method.
349 if not html or not len(html):
350 # if there was no HTML, return an empty string
354 # Iterate through filters, applying all to the soup object.
356 self.sanitize_anchor_html,
357 self.set_canonical_link,
359 soup = soupfilter(soup)
362 def sanitize_anchor_html(self, soup):
364 Filter the given BeautifulSoup obj by adding target=_blank to all
366 Returns BeautifulSoup obj.
368 # Fun fact: This could be made into a static method.
369 # Find all a tags in the HTML
370 a_tags = soup.find_all('a')
371 if not a_tags or not len(a_tags):
372 # nothing to process.
375 # build a tag sanitizer
376 def set_attribute_target(tag):
377 tag['target'] = '_blank'
378 # set all anchors to have target="_blank"
379 map(set_attribute_target, a_tags)
381 # return filtered soup
385 def canonical_link_predicate(tag):
386 return tag.name == u'link' and \
387 tag.has_attr('rel') and \
388 u'canonical' in tag['rel']
390 def set_canonical_link(self, soup):
392 Filter the given BeautifulSoup obj by adding
393 <link rel="canonical" href="note.get_absolute_url" />
394 to the document head.
395 Returns BeautifulSoup obj.
397 domain = Site.objects.all()[0].domain
398 note_full_href = 'http://' + domain + self.get_absolute_url()
399 canonical_tags = soup.find_all(self.canonical_link_predicate)
401 for tag in canonical_tags:
402 tag['href'] = note_full_href
404 new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
405 head = soup.find('head')
408 # return filtered soup
411 def _update_parent_updated_at(self):
412 """ update the parent Course.updated_at model
413 with the latest uploaded_at """
414 self.course.updated_at = self.uploaded_at
417 def save(self, *args, **kwargs):
418 if self.uploaded_at and self.uploaded_at > self.course.updated_at:
419 self._update_parent_updated_at()
420 super(Note, self).save(*args, **kwargs)
422 def has_markdown(self):
423 return hasattr(self, "notemarkdown")
426 return self.mimetype in Note.PDF_MIMETYPES
429 class NoteMarkdown(models.Model):
430 note = models.OneToOneField(Note, primary_key=True)
431 markdown = models.TextField(blank=True, null=True)
432 html = models.TextField(blank=True, null=True)
435 def sanitize(cls, html):
436 return bleach.clean(html,
437 bleach_whitelist.markdown_tags,
438 bleach_whitelist.markdown_attrs,
441 def save(self, *args, **kwargs):
442 if self.markdown and not self.html:
443 self.html = markdown.markdown(self.markdown)
444 self.html = NoteMarkdown.sanitize(self.html)
445 super(NoteMarkdown, self).save(*args, **kwargs)
447 auto_add_check_unique_together(Note)
450 def update_note_counts(note_instance):
452 # test if the course still exists, or if this is a cascade delete.
454 except Course.DoesNotExist:
455 # this is a cascade delete. there is no course to update
459 note_instance.course.update_thank_count()
460 note_instance.course.update_note_count()
461 if note_instance.course.school:
462 note_instance.course.school.update_note_count()
463 elif note_instance.course.department.school:
464 note_instance.course.department.school.update_note_count()
466 @receiver(pre_save, sender=Note, weak=False)
467 def note_pre_save_receiver(sender, **kwargs):
468 """Stick an instance of the pre-save value of
469 the given Note instance in the instances itself.
470 This will be looked at in post_save."""
471 if not 'instance' in kwargs:
475 kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
476 except ObjectDoesNotExist:
479 @receiver(post_save, sender=Note, weak=False)
480 def note_save_receiver(sender, **kwargs):
481 if not 'instance' in kwargs:
483 note = kwargs['instance']
486 update_note_counts(note)
489 index = SearchIndex()
490 if kwargs['created']:
493 index.update_note(note, note.old_instance)
495 logger.error("Error with IndexDen:\n" + traceback.format_exc())
498 @receiver(post_delete, sender=Note, weak=False)
499 def note_delete_receiver(sender, **kwargs):
500 if not 'instance' in kwargs:
502 note = kwargs['instance']
504 # Update course and school counts of how
505 # many notes they have
506 update_note_counts(kwargs['instance'])
508 # Remove document from search index
510 index = SearchIndex()
511 index.remove_note(note)
513 logger.error("Error with IndexDen:\n" + traceback.format_exc())
516 GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
519 class UserUploadMapping(models.Model):
520 user = models.ForeignKey(User)
521 fp_file = models.CharField(max_length=255)
524 unique_together = ('user', 'fp_file')
527 @receiver(user_logged_in, weak=True)
528 def find_orphan_notes(sender, **kwargs):
529 user = kwargs['user']
530 s = kwargs['request'].session
531 uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
532 for uploaded_note_url in uploaded_note_urls:
534 note = Note.objects.get(fp_file=uploaded_note_url)
537 NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
538 except (ObjectDoesNotExist, MultipleObjectsReturned):
539 mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)