3 # Copyright (C) 2012 FinalsClub Foundation
6 Models for the notes django app.
7 Contains only the minimum for handling files and their representation
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
28 from django.conf import settings
29 from django.core.files import File
30 from django.core.files.storage import FileSystemStorage
31 from django.db import models
32 from django.utils.text import slugify
33 import django_filepicker
34 from bs4 import BeautifulSoup as BS
35 from taggit.managers import TaggableManager
37 from karmaworld.apps.courses.models import Course
38 from karmaworld.apps.licenses.models import License
39 from karmaworld.apps.notes.search import SearchIndex
40 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
42 FILEPICKER_API_KEY = os.environ['FILEPICKER_API_KEY']
44 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
45 KEYWORD_MTURK_THRESHOLD = 3
47 logger = logging.getLogger(__name__)
48 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
50 # Dictionary for S3 upload headers
52 'Content-Type': 'text/html',
55 # This is a bit hacky, but nothing else works. Grabbed this from a proper
56 # file configured via S3 management console.
57 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
58 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
61 class Document(models.Model):
63 An Abstract Base Class representing a document intended to be subclassed.
65 course = models.ForeignKey(Course)
66 tags = TaggableManager(blank=True)
67 name = models.CharField(max_length=255, blank=True, null=True)
68 slug = models.SlugField(max_length=255, unique=True)
70 LECTURE_NOTES = 'LECTURE_NOTES'
71 STUDY_GUIDE = 'STUDY_GUIDE'
73 ASSIGNMENT = 'ASSIGNMENT'
76 (LECTURE_NOTES, 'Lecture Notes'),
77 (STUDY_GUIDE, 'Study Guide'),
78 (SYLLABUS, 'Syllabus'),
79 (ASSIGNMENT, 'Assignment'),
82 category = models.CharField(max_length=50, choices=NOTE_CATEGORIES, blank=True, null=True)
84 # license if different from default
85 license = models.ForeignKey(License, blank=True, null=True)
87 # provide an upstream file link
88 upstream_link = models.URLField(max_length=1024, blank=True, null=True, unique=True)
90 # metadata relevant to the Upload process
91 user = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
92 ip = models.GenericIPAddressField(blank=True, null=True,
93 help_text=u"IP address of the uploader")
94 uploaded_at = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
97 # if True, NEVER show this file
98 # WARNING: This may throw an error on migration
99 is_hidden = models.BooleanField(default=False)
102 # Everything Filepicker, now in one small area
104 # Allow pick (choose files), store (upload to S3), read (from FP repo),
105 # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
106 # seconds). Generated one time, at class definition upon import. So the
107 # server will need to be rebooted at least one time each year or this will
109 fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
110 fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
111 fp_policy = encode_fp_policy(fp_policy_json)
112 fp_signature = sign_fp_policy(fp_policy)
114 # Hack because mimetypes conflict with extensions, but there is no way to
116 # https://github.com/Ink/django-filepicker/issues/22
117 django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
118 # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
120 fp_file = django_filepicker.models.FPFileField(
121 # FPFileField settings
122 apikey=FILEPICKER_API_KEY,
123 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
125 'data-fp-multiple': 'true',
126 'data-fp-folders': 'true',
127 'data-fp-button-class':
128 'inline-button important add-note-btn',
129 'data-fp-button-text': 'Add Notes',
130 'data-fp-extensions':
131 '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
132 'data-fp-store-location': 'S3',
133 'data-fp-policy': fp_policy,
134 'data-fp-signature': fp_signature,
135 'type': 'filepicker',
136 'onchange': "got_file(event)",
139 null=True, blank=True,
140 upload_to='nil', # field ignored because S3, but required.
141 verbose_name='', # prevent a label from showing up
143 mimetype = models.CharField(max_length=255, blank=True, null=True)
147 ordering = ['-uploaded_at']
149 def _generate_unique_slug(self):
150 """ generate a unique slug based on name and uploaded_at """
151 _slug = slugify(unicode(self.name))
152 klass = self.__class__
153 collision = klass.objects.filter(slug=_slug)
155 _slug = u"{0}-{1}-{2}-{3}".format(
156 _slug, self.uploaded_at.month,
157 self.uploaded_at.day, self.uploaded_at.microsecond)
162 Memoized FilepickerFile getter. Returns FilepickerFile.
164 if not hasattr(self, 'cached_fpf'):
165 # Fetch additional_params containing signature, etc
166 aps = self.fp_file.field.additional_params
167 self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
168 return self.cached_fpf
170 def get_fp_url(self):
172 Returns the Filepicker URL for reading the upstream document.
174 fpf = self._get_fpf()
175 # Return proper URL for reading
180 Downloads the file from filepicker.io and returns a Django File wrapper
183 # Fetch FilepickerFile
184 fpf = self._get_fpf()
186 return fpf.get_file()
188 def save(self, *args, **kwargs):
189 if self.name and not self.slug:
190 self._generate_unique_slug()
191 super(Document, self).save(*args, **kwargs)
194 class NoteManager(models.Manager):
195 """ Handle restoring data. """
196 def get_by_natural_key(self, fp_file, upstream_link):
198 Return a Note defined by its Filepicker and upstream URLs.
200 return self.get(fp_file=fp_file,upstream_link=upstream_link)
203 class Note(Document):
205 A django model representing an uploaded file and associated metadata.
207 objects = NoteManager()
211 'application/vnd.ms-powerpoint',
212 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
215 # Cache the Google drive file link
216 gdrive_url = models.URLField(max_length=1024, blank=True, null=True, unique=True)
218 # Generated by Google Drive but saved locally
219 text = models.TextField(blank=True, null=True)
221 # Number of times this note has been flagged as abusive/spam.
222 flags = models.IntegerField(default=0,null=False)
224 # Social media tracking
225 tweeted = models.BooleanField(default=False)
226 thanks = models.PositiveIntegerField(default=0)
229 unique_together = ('fp_file', 'upstream_link')
230 ordering = ['-uploaded_at']
232 def __unicode__(self):
233 return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
235 def natural_key(self):
237 A Note is uniquely defined by both the Filepicker link and the upstream
238 link. The Filepicker link should be unique by itself, but it may be
239 null in the database, so the upstream link component should resolve
242 # gdrive_url might also fit the bill?
243 return (self.fp_file, self.upstream_link)
245 def get_relative_s3_path(self):
247 returns s3 path relative to the appropriate bucket.
249 # Note.slug will be unique and brought in from RawDocument or created
250 # upon save() inside RawDocument.convert_to_note(). It makes for a good
251 # filename and its pretty well guaranteed to be there.
252 return 'html/{0}.html'.format(self.slug)
254 def send_to_s3(self, html, do_save=True):
256 Push the given HTML up to S3 for this Note.
257 Set do_save to False if the note will be saved outside this call.
259 # do nothing if HTML is empty.
260 if not html or not len(html):
262 # upload the HTML file to static host if it is not already there
263 filepath = self.get_relative_s3_path()
264 if not default_storage.exists(filepath):
265 # This is a pretty ugly hackified answer to some s3boto shortcomings
266 # and some decent default settings chosen by django-storages.
268 # Create the new key (key == filename in S3 bucket)
269 newkey = default_storage.bucket.new_key(filepath)
271 newkey.set_contents_from_string(html, headers=s3_upload_headers)
272 if not newkey.exists():
273 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
275 # set the permissions for everyone to read.
276 newkey.set_xml_acl(all_read_xml_acl)
278 def update_note_on_s3(self, html):
279 # do nothing if HTML is empty.
280 if not html or not len(html):
282 # if it's not already there then bail out
283 filepath = self.get_relative_s3_path()
284 if not default_storage.exists(filepath):
285 logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
288 key = default_storage.bucket.get_key(filepath)
289 key.set_contents_from_string(html, headers=s3_upload_headers)
290 key.set_xml_acl(all_read_xml_acl)
292 def remaining_thanks_for_mturk(self):
293 return KEYWORD_MTURK_THRESHOLD - self.thanks
295 def total_thanks_for_mturk(self):
296 return KEYWORD_MTURK_THRESHOLD
298 def get_absolute_url(self):
299 """ Resolve note url, use 'note' route and slug if slug
300 otherwise use note.id
302 if self.slug is not None:
303 # return a url ending in slug
304 if self.course.school:
305 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
307 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
309 # return a url ending in id
310 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
312 def get_absolute_keywords_url(self):
313 """ Resolve note url, use 'note' route and slug if slug
314 otherwise use note.id
316 if self.slug is not None:
317 # return a url ending in slug
318 if self.course.school:
319 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
321 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
323 # return a url ending in id
324 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
326 def get_absolute_quiz_url(self):
327 """ Resolve note url, use 'note' route and slug if slug
328 otherwise use note.id
330 if self.slug is not None:
331 # return a url ending in slug
332 if self.course.school:
333 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.slug])
335 return reverse('note_quiz', args=[self.course.department.school.slug, self.course.slug, self.slug])
337 # return a url ending in id
338 return reverse('note_quiz', args=[self.course.school.slug, self.course.slug, self.id])
340 def filter_html(self, html):
342 Apply all sanitizing filters to HTML.
343 Takes in HTML string and outputs HTML string.
345 # Fun fact: This could be made into a static method.
346 if not html or not len(html):
347 # if there was no HTML, return an empty string
351 # Iterate through filters, applying all to the soup object.
353 self.sanitize_anchor_html,
354 self.set_canonical_link,
356 soup = soupfilter(soup)
359 def sanitize_anchor_html(self, soup):
361 Filter the given BeautifulSoup obj by adding target=_blank to all
363 Returns BeautifulSoup obj.
365 # Fun fact: This could be made into a static method.
366 # Find all a tags in the HTML
367 a_tags = soup.find_all('a')
368 if not a_tags or not len(a_tags):
369 # nothing to process.
372 # build a tag sanitizer
373 def set_attribute_target(tag):
374 tag['target'] = '_blank'
375 # set all anchors to have target="_blank"
376 map(set_attribute_target, a_tags)
378 # return filtered soup
382 def canonical_link_predicate(tag):
383 return tag.name == u'link' and \
384 tag.has_attr('rel') and \
385 u'canonical' in tag['rel']
387 def set_canonical_link(self, soup):
389 Filter the given BeautifulSoup obj by adding
390 <link rel="canonical" href="note.get_absolute_url" />
391 to the document head.
392 Returns BeautifulSoup obj.
394 domain = Site.objects.all()[0].domain
395 note_full_href = 'http://' + domain + self.get_absolute_url()
396 canonical_tags = soup.find_all(self.canonical_link_predicate)
398 for tag in canonical_tags:
399 tag['href'] = note_full_href
401 new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
402 head = soup.find('head')
405 # return filtered soup
408 def _update_parent_updated_at(self):
409 """ update the parent Course.updated_at model
410 with the latest uploaded_at """
411 self.course.updated_at = self.uploaded_at
414 def save(self, *args, **kwargs):
415 if self.uploaded_at and self.uploaded_at > self.course.updated_at:
416 self._update_parent_updated_at()
417 super(Note, self).save(*args, **kwargs)
419 def has_markdown(self):
420 return hasattr(self, "notemarkdown")
423 return self.mimetype in Note.PDF_MIMETYPES
426 class NoteMarkdown(models.Model):
427 note = models.OneToOneField(Note, primary_key=True)
428 markdown = models.TextField(blank=True, null=True)
430 auto_add_check_unique_together(Note)
433 def update_note_counts(note_instance):
435 # test if the course still exists, or if this is a cascade delete.
437 except Course.DoesNotExist:
438 # this is a cascade delete. there is no course to update
442 note_instance.course.update_thank_count()
443 note_instance.course.update_note_count()
444 if note_instance.course.school:
445 note_instance.course.school.update_note_count()
446 elif note_instance.course.department.school:
447 note_instance.course.department.school.update_note_count()
449 @receiver(pre_save, sender=Note, weak=False)
450 def note_pre_save_receiver(sender, **kwargs):
451 """Stick an instance of the pre-save value of
452 the given Note instance in the instances itself.
453 This will be looked at in post_save."""
454 if not 'instance' in kwargs:
458 kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
459 except ObjectDoesNotExist:
462 @receiver(post_save, sender=Note, weak=False)
463 def note_save_receiver(sender, **kwargs):
464 if not 'instance' in kwargs:
466 note = kwargs['instance']
469 update_note_counts(note)
472 index = SearchIndex()
473 if kwargs['created']:
476 index.update_note(note, note.old_instance)
478 logger.error("Error with IndexDen:\n" + traceback.format_exc())
481 @receiver(post_delete, sender=Note, weak=False)
482 def note_delete_receiver(sender, **kwargs):
483 if not 'instance' in kwargs:
485 note = kwargs['instance']
487 # Update course and school counts of how
488 # many notes they have
489 update_note_counts(kwargs['instance'])
491 # Remove document from search index
493 index = SearchIndex()
494 index.remove_note(note)
496 logger.error("Error with IndexDen:\n" + traceback.format_exc())
499 GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
502 class UserUploadMapping(models.Model):
503 user = models.ForeignKey(User)
504 fp_file = models.CharField(max_length=255)
507 unique_together = ('user', 'fp_file')
510 @receiver(user_logged_in, weak=True)
511 def find_orphan_notes(sender, **kwargs):
512 user = kwargs['user']
513 s = kwargs['request'].session
514 uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
515 for uploaded_note_url in uploaded_note_urls:
517 note = Note.objects.get(fp_file=uploaded_note_url)
520 NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
521 except (ObjectDoesNotExist, MultipleObjectsReturned):
522 mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)