3 # Copyright (C) 2012 FinalsClub Foundation
6 Models for the notes django app.
7 Contains only the minimum for handling files and their representation
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
29 from django.conf import settings
30 from django.core.files import File
31 from django.core.files.storage import FileSystemStorage
32 from django.db import models
33 from django.utils.text import slugify
34 import django_filepicker
35 from bs4 import BeautifulSoup as BS
36 from taggit.managers import TaggableManager
38 from karmaworld.apps.courses.models import Course
39 from karmaworld.apps.licenses.models import License
40 from karmaworld.apps.notes.search import SearchIndex
41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
44 KEYWORD_MTURK_THRESHOLD = 3
46 logger = logging.getLogger(__name__)
47 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
49 # Dictionary for S3 upload headers
51 'Content-Type': 'text/html',
54 # This is a bit hacky, but nothing else works. Grabbed this from a proper
55 # file configured via S3 management console.
56 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
57 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
60 class Document(models.Model):
62 An Abstract Base Class representing a document intended to be subclassed.
64 course = models.ForeignKey(Course)
65 tags = TaggableManager(blank=True)
66 name = models.CharField(max_length=255, blank=True, null=True)
67 slug = models.SlugField(max_length=255, unique=True)
69 LECTURE_NOTES = 'LECTURE_NOTES'
70 STUDY_GUIDE = 'STUDY_GUIDE'
72 ASSIGNMENT = 'ASSIGNMENT'
75 (LECTURE_NOTES, 'Lecture Notes'),
76 (STUDY_GUIDE, 'Study Guide'),
77 (SYLLABUS, 'Syllabus'),
78 (ASSIGNMENT, 'Assignment'),
81 category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
83 # license if different from default
84 license = models.ForeignKey(License, blank=True, null=True)
86 # provide an upstream file link
87 upstream_link = models.URLField(max_length=1024, blank=True, null=True, unique=True)
89 # metadata relevant to the Upload process
90 user = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
91 ip = models.GenericIPAddressField(blank=True, null=True,
92 help_text=u"IP address of the uploader")
93 uploaded_at = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
96 # if True, NEVER show this file
97 # WARNING: This may throw an error on migration
98 is_hidden = models.BooleanField(default=False)
101 # Everything Filepicker, now in one small area
103 # Allow pick (choose files), store (upload to S3), read (from FP repo),
104 # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
105 # seconds). Generated one time, at class definition upon import. So the
106 # server will need to be rebooted at least one time each year or this will
108 fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
109 fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
110 fp_policy = encode_fp_policy(fp_policy_json)
111 fp_signature = sign_fp_policy(fp_policy)
113 # Hack because mimetypes conflict with extensions, but there is no way to
115 # https://github.com/Ink/django-filepicker/issues/22
116 django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
117 # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
119 fp_file = django_filepicker.models.FPFileField(
120 # FPFileField settings
121 apikey=FILEPICKER_API_KEY,
122 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
124 'data-fp-multiple': 'true',
125 'data-fp-folders': 'true',
126 'data-fp-button-class':
127 'inline-button important add-note-btn',
128 'data-fp-button-text': 'Add Notes',
129 'data-fp-extensions':
130 '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
131 'data-fp-store-location': 'S3',
132 'data-fp-policy': fp_policy,
133 'data-fp-signature': fp_signature,
134 'type': 'filepicker',
135 'onchange': "got_file(event)",
138 null=True, blank=True,
139 upload_to='nil', # field ignored because S3, but required.
140 verbose_name='', # prevent a label from showing up
142 mimetype = models.CharField(max_length=255, blank=True, null=True)
146 ordering = ['-uploaded_at']
148 def _generate_unique_slug(self):
149 """ generate a unique slug based on name and uploaded_at """
150 _slug = slugify(unicode(self.name))
151 klass = self.__class__
152 collision = klass.objects.filter(slug=_slug)
154 _slug = u"{0}-{1}-{2}-{3}".format(
155 _slug, self.uploaded_at.month,
156 self.uploaded_at.day, self.uploaded_at.microsecond)
161 Memoized FilepickerFile getter. Returns FilepickerFile.
163 if not hasattr(self, 'cached_fpf'):
164 # Fetch additional_params containing signature, etc
165 aps = self.fp_file.field.additional_params
166 self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
167 return self.cached_fpf
169 def get_fp_url(self):
171 Returns the Filepicker URL for reading the upstream document.
173 fpf = self._get_fpf()
174 # Return proper URL for reading
179 Downloads the file from filepicker.io and returns a Django File wrapper
182 # Fetch FilepickerFile
183 fpf = self._get_fpf()
185 return fpf.get_file()
187 def save(self, *args, **kwargs):
188 if self.name and not self.slug:
189 self._generate_unique_slug()
190 super(Document, self).save(*args, **kwargs)
193 class NoteManager(models.Manager):
194 """ Handle restoring data. """
195 def get_by_natural_key(self, fp_file, upstream_link):
197 Return a Note defined by its Filepicker and upstream URLs.
199 return self.get(fp_file=fp_file,upstream_link=upstream_link)
202 class Note(Document):
204 A django model representing an uploaded file and associated metadata.
206 objects = NoteManager()
210 'application/vnd.ms-powerpoint',
211 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
214 # Cache the Google drive file link
215 gdrive_url = models.URLField(max_length=1024, blank=True, null=True, unique=True)
217 # Generated by Google Drive but saved locally
218 text = models.TextField(blank=True, null=True)
220 # Number of times this note has been flagged as abusive/spam.
221 flags = models.IntegerField(default=0,null=False)
223 # Social media tracking
224 tweeted = models.BooleanField(default=False)
225 thanks = models.PositiveIntegerField(default=0)
228 unique_together = ('fp_file', 'upstream_link')
229 ordering = ['-uploaded_at']
231 def __unicode__(self):
232 return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
234 def natural_key(self):
236 A Note is uniquely defined by both the Filepicker link and the upstream
237 link. The Filepicker link should be unique by itself, but it may be
238 null in the database, so the upstream link component should resolve
241 # gdrive_url might also fit the bill?
242 return (self.fp_file, self.upstream_link)
244 def get_relative_s3_path(self):
246 returns s3 path relative to the appropriate bucket.
248 # Note.slug will be unique and brought in from RawDocument or created
249 # upon save() inside RawDocument.convert_to_note(). It makes for a good
250 # filename and its pretty well guaranteed to be there.
251 return 'html/{0}.html'.format(self.slug)
253 def send_to_s3(self, html, do_save=True):
255 Push the given HTML up to S3 for this Note.
256 Set do_save to False if the note will be saved outside this call.
258 # do nothing if HTML is empty.
259 if not html or not len(html):
261 # upload the HTML file to static host if it is not already there
262 filepath = self.get_relative_s3_path()
263 if not default_storage.exists(filepath):
264 # This is a pretty ugly hackified answer to some s3boto shortcomings
265 # and some decent default settings chosen by django-storages.
267 # Create the new key (key == filename in S3 bucket)
268 newkey = default_storage.bucket.new_key(filepath)
270 newkey.set_contents_from_string(html, headers=s3_upload_headers)
271 if not newkey.exists():
272 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
274 # set the permissions for everyone to read.
275 newkey.set_xml_acl(all_read_xml_acl)
277 def update_note_on_s3(self, html):
278 # do nothing if HTML is empty.
279 if not html or not len(html):
281 # if it's not already there then bail out
282 filepath = self.get_relative_s3_path()
283 if not default_storage.exists(filepath):
284 logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
287 key = default_storage.bucket.get_key(filepath)
288 key.set_contents_from_string(html, headers=s3_upload_headers)
289 key.set_xml_acl(all_read_xml_acl)
291 def remaining_thanks_for_mturk(self):
292 return KEYWORD_MTURK_THRESHOLD - self.thanks
294 def total_thanks_for_mturk(self):
295 return KEYWORD_MTURK_THRESHOLD
297 def get_absolute_url(self):
298 """ Resolve note url, use 'note' route and slug if slug
299 otherwise use note.id
301 if self.slug is not None:
302 # return a url ending in slug
303 if self.course.school:
304 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
306 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
308 # return a url ending in id
309 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
311 def get_absolute_keywords_url(self):
312 """ Resolve note url, use 'note' route and slug if slug
313 otherwise use note.id
315 if self.slug is not None:
316 # return a url ending in slug
317 if self.course.school:
318 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
320 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
322 # return a url ending in id
323 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
325 def get_absolute_quiz_url(self):
326 """ Resolve note url, use 'note' route and slug if slug
327 otherwise use note.id
329 if self.slug is not None:
330 # return a url ending in slug
331 if self.course.school:
332 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
334 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
336 # return a url ending in id
337 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
339 def filter_html(self, html):
341 Apply all sanitizing filters to HTML.
342 Takes in HTML string and outputs HTML string.
344 # Fun fact: This could be made into a static method.
345 if not html or not len(html):
346 # if there was no HTML, return an empty string
350 # Iterate through filters, applying all to the soup object.
352 self.sanitize_anchor_html,
353 self.set_canonical_link,
355 soup = soupfilter(soup)
358 def sanitize_anchor_html(self, soup):
360 Filter the given BeautifulSoup obj by adding target=_blank to all
362 Returns BeautifulSoup obj.
364 # Fun fact: This could be made into a static method.
365 # Find all a tags in the HTML
366 a_tags = soup.find_all('a')
367 if not a_tags or not len(a_tags):
368 # nothing to process.
371 # build a tag sanitizer
372 def set_attribute_target(tag):
373 tag['target'] = '_blank'
374 # set all anchors to have target="_blank"
375 map(set_attribute_target, a_tags)
377 # return filtered soup
381 def canonical_link_predicate(tag):
382 return tag.name == u'link' and \
383 tag.has_attr('rel') and \
384 u'canonical' in tag['rel']
386 def set_canonical_link(self, soup):
388 Filter the given BeautifulSoup obj by adding
389 <link rel="canonical" href="note.get_absolute_url" />
390 to the document head.
391 Returns BeautifulSoup obj.
393 domain = Site.objects.all()[0].domain
394 note_full_href = 'http://' + domain + self.get_absolute_url()
395 canonical_tags = soup.find_all(self.canonical_link_predicate)
397 for tag in canonical_tags:
398 tag['href'] = note_full_href
400 new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
401 head = soup.find('head')
404 # return filtered soup
407 def _update_parent_updated_at(self):
408 """ update the parent Course.updated_at model
409 with the latest uploaded_at """
410 self.course.updated_at = self.uploaded_at
413 def save(self, *args, **kwargs):
414 if self.uploaded_at and self.uploaded_at > self.course.updated_at:
415 self._update_parent_updated_at()
416 super(Note, self).save(*args, **kwargs)
418 def has_markdown(self):
419 return hasattr(self, "notemarkdown")
422 return self.mimetype in Note.PDF_MIMETYPES
425 class NoteMarkdown(models.Model):
426 note = models.OneToOneField(Note, primary_key=True)
427 markdown = models.TextField(blank=True, null=True)
429 auto_add_check_unique_together(Note)
432 def update_note_counts(note_instance):
434 # test if the course still exists, or if this is a cascade delete.
436 except Course.DoesNotExist:
437 # this is a cascade delete. there is no course to update
441 note_instance.course.update_thank_count()
442 note_instance.course.update_note_count()
443 if note_instance.course.school:
444 note_instance.course.school.update_note_count()
445 elif note_instance.course.department.school:
446 note_instance.course.department.school.update_note_count()
448 @receiver(pre_save, sender=Note, weak=False)
449 def note_pre_save_receiver(sender, **kwargs):
450 """Stick an instance of the pre-save value of
451 the given Note instance in the instances itself.
452 This will be looked at in post_save."""
453 if not 'instance' in kwargs:
457 kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
458 except ObjectDoesNotExist:
461 @receiver(post_save, sender=Note, weak=False)
462 def note_save_receiver(sender, **kwargs):
463 if not 'instance' in kwargs:
465 note = kwargs['instance']
468 update_note_counts(note)
471 index = SearchIndex()
472 if kwargs['created']:
475 index.update_note(note, note.old_instance)
477 logger.error("Error with IndexDen:\n" + traceback.format_exc())
480 @receiver(post_delete, sender=Note, weak=False)
481 def note_delete_receiver(sender, **kwargs):
482 if not 'instance' in kwargs:
484 note = kwargs['instance']
486 # Update course and school counts of how
487 # many notes they have
488 update_note_counts(kwargs['instance'])
490 # Remove document from search index
492 index = SearchIndex()
493 index.remove_note(note)
495 logger.error("Error with IndexDen:\n" + traceback.format_exc())
498 GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
501 class UserUploadMapping(models.Model):
502 user = models.ForeignKey(User)
503 fp_file = models.CharField(max_length=255)
506 unique_together = ('user', 'fp_file')
509 @receiver(user_logged_in, weak=True)
510 def find_orphan_notes(sender, **kwargs):
511 user = kwargs['user']
512 s = kwargs['request'].session
513 uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
514 for uploaded_note_url in uploaded_note_urls:
516 note = Note.objects.get(fp_file=uploaded_note_url)
519 NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
520 except (ObjectDoesNotExist, MultipleObjectsReturned):
521 mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)