3 # Copyright (C) 2012 FinalsClub Foundation
6 Models for the notes django app.
7 Contains only the minimum for handling files and their representation
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
29 from django.conf import settings
30 from django.core.files import File
31 from django.core.files.storage import FileSystemStorage
32 from django.db import models
33 from django.utils.text import slugify
34 import django_filepicker
35 from bs4 import BeautifulSoup as BS
36 from taggit.managers import TaggableManager
38 from karmaworld.apps.courses.models import Course
39 from karmaworld.apps.licenses.models import License
40 from karmaworld.apps.notes.search import SearchIndex
41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
45 logger = logging.getLogger(__name__)
46 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
48 # Dictionary for S3 upload headers
50 'Content-Type': 'text/html',
53 # This is a bit hacky, but nothing else works. Grabbed this from a proper
54 # file configured via S3 management console.
55 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
56 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
59 class Document(models.Model):
61 An Abstract Base Class representing a document intended to be subclassed.
63 course = models.ForeignKey(Course)
64 tags = TaggableManager(blank=True)
65 name = models.CharField(max_length=255, blank=True, null=True)
66 slug = models.SlugField(max_length=255, unique=True)
68 # license if different from default
69 license = models.ForeignKey(License, blank=True, null=True)
71 # provide an upstream file link
72 upstream_link = models.URLField(max_length=1024, blank=True, null=True, unique=True)
74 # metadata relevant to the Upload process
75 user = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
76 ip = models.GenericIPAddressField(blank=True, null=True,
77 help_text=u"IP address of the uploader")
78 uploaded_at = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
81 # if True, NEVER show this file
82 # WARNING: This may throw an error on migration
83 is_hidden = models.BooleanField(default=False)
86 # Everything Filepicker, now in one small area
88 # Allow pick (choose files), store (upload to S3), read (from FP repo),
89 # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
90 # seconds). Generated one time, at class definition upon import. So the
91 # server will need to be rebooted at least one time each year or this will
93 fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
94 fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
95 fp_policy = encode_fp_policy(fp_policy_json)
96 fp_signature = sign_fp_policy(fp_policy)
98 # Hack because mimetypes conflict with extensions, but there is no way to
100 # https://github.com/Ink/django-filepicker/issues/22
101 django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
102 # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
104 fp_file = django_filepicker.models.FPFileField(
105 # FPFileField settings
106 apikey=FILEPICKER_API_KEY,
107 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
109 'data-fp-multiple': 'true',
110 'data-fp-folders': 'true',
111 'data-fp-button-class':
112 'inline-button important add-note-btn',
113 'data-fp-button-text': 'Add Notes',
114 'data-fp-extensions':
115 '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
116 'data-fp-store-location': 'S3',
117 'data-fp-policy': fp_policy,
118 'data-fp-signature': fp_signature,
119 'type': 'filepicker',
120 'onchange': "got_file(event)",
123 null=True, blank=True,
124 upload_to='nil', # field ignored because S3, but required.
125 verbose_name='', # prevent a label from showing up
127 mimetype = models.CharField(max_length=255, blank=True, null=True)
131 ordering = ['-uploaded_at']
133 def _generate_unique_slug(self):
134 """ generate a unique slug based on name and uploaded_at """
135 _slug = slugify(unicode(self.name))
136 klass = self.__class__
137 collision = klass.objects.filter(slug=_slug)
139 _slug = u"{0}-{1}-{2}-{3}".format(
140 _slug, self.uploaded_at.month,
141 self.uploaded_at.day, self.uploaded_at.microsecond)
146 Memoized FilepickerFile getter. Returns FilepickerFile.
148 if not hasattr(self, 'cached_fpf'):
149 # Fetch additional_params containing signature, etc
150 aps = self.fp_file.field.additional_params
151 self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
152 return self.cached_fpf
154 def get_fp_url(self):
156 Returns the Filepicker URL for reading the upstream document.
158 fpf = self._get_fpf()
159 # Return proper URL for reading
164 Downloads the file from filepicker.io and returns a Django File wrapper
167 # Fetch FilepickerFile
168 fpf = self._get_fpf()
170 return fpf.get_file()
172 def save(self, *args, **kwargs):
173 if self.name and not self.slug:
174 self._generate_unique_slug()
175 super(Document, self).save(*args, **kwargs)
178 class NoteManager(models.Manager):
179 """ Handle restoring data. """
180 def get_by_natural_key(self, fp_file, upstream_link):
182 Return a Note defined by its Filepicker and upstream URLs.
184 return self.get(fp_file=fp_file,upstream_link=upstream_link)
187 class Note(Document):
189 A django model representing an uploaded file and associated metadata.
191 objects = NoteManager()
193 # FIXME: refactor file choices after FP.io integration
195 FILE_TYPE_CHOICES = (
196 ('doc', 'MS Word compatible file (.doc, .docx, .rtf, .odf)'),
197 ('img', 'Scan or picture of notes'),
199 ('ppt', 'Powerpoint'),
201 (UNKNOWN_FILE, 'Unknown file'),
206 'application/vnd.ms-powerpoint',
207 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
210 file_type = models.CharField(max_length=15,
211 choices=FILE_TYPE_CHOICES,
212 default=UNKNOWN_FILE,
213 blank=True, null=True)
215 # Cache the Google drive file link
216 gdrive_url = models.URLField(max_length=1024, blank=True, null=True, unique=True)
218 # Upload files to MEDIA_ROOT/notes/YEAR/MONTH/DAY, 2012/10/30/filename
219 pdf_file = models.FileField(
221 upload_to="notes/%Y/%m/%d/",
222 blank=True, null=True)
224 # Generated by Google Drive but saved locally
225 text = models.TextField(blank=True, null=True)
226 static_html = models.BooleanField(default=False)
228 # html is deprecated. delete once data is all sorted.
229 html = models.TextField(blank=True, null=True)
231 # Academic year of course
232 year = models.IntegerField(blank=True, null=True,\
233 default=datetime.datetime.utcnow().year)
235 # Number of times this note has been flagged as abusive/spam.
236 flags = models.IntegerField(default=0,null=False)
238 # Social media tracking
239 tweeted = models.BooleanField(default=False)
240 thanks = models.PositiveIntegerField(default=0)
243 unique_together = ('fp_file', 'upstream_link')
244 ordering = ['-uploaded_at']
246 def __unicode__(self):
247 return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
249 def natural_key(self):
251 A Note is uniquely defined by both the Filepicker link and the upstream
252 link. The Filepicker link should be unique by itself, but it may be
253 null in the database, so the upstream link component should resolve
256 # gdrive_url might also fit the bill?
257 return (self.fp_file, self.upstream_link)
259 def get_relative_s3_path(self):
261 returns s3 path relative to the appropriate bucket.
263 # Note.slug will be unique and brought in from RawDocument or created
264 # upon save() inside RawDocument.convert_to_note(). It makes for a good
265 # filename and its pretty well guaranteed to be there.
266 return 'html/{0}.html'.format(self.slug)
268 def send_to_s3(self, html, do_save=True):
270 Push the given HTML up to S3 for this Note.
271 Set do_save to False if the note will be saved outside this call.
273 # do nothing if HTML is empty.
274 if not html or not len(html):
276 # do nothing if already uploaded.
277 # Maybe run checksums if possible to confirm its really done?
278 # (but then you gotta wonder was the original correct or is the new
282 # upload the HTML file to static host if it is not already there
283 filepath = self.get_relative_s3_path()
284 if not default_storage.exists(filepath):
285 # This is a pretty ugly hackified answer to some s3boto shortcomings
286 # and some decent default settings chosen by django-storages.
288 # Create the new key (key == filename in S3 bucket)
289 newkey = default_storage.bucket.new_key(filepath)
291 newkey.set_contents_from_string(html, headers=s3_upload_headers)
292 if not newkey.exists():
293 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
295 # set the permissions for everyone to read.
296 newkey.set_xml_acl(all_read_xml_acl)
298 # If the code reaches here, either:
299 # filepath exists on S3 but static_html is not marked.
301 # file was just uploaded successfully to filepath
302 # Regardless, set note as uploaded.
303 self.static_html = True
307 def update_note_on_s3(self, html):
308 # do nothing if HTML is empty.
309 if not html or not len(html):
311 # if it's not already there then bail out
312 filepath = self.get_relative_s3_path()
313 if not default_storage.exists(filepath):
314 logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
317 key = default_storage.bucket.get_key(filepath)
318 key.set_contents_from_string(html, headers=s3_upload_headers)
319 key.set_xml_acl(all_read_xml_acl)
321 def get_absolute_url(self):
322 """ Resolve note url, use 'note' route and slug if slug
323 otherwise use note.id
325 if self.slug is not None:
326 # return a url ending in slug
327 if self.course.school:
328 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
330 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
332 # return a url ending in id
333 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
335 def get_absolute_keywords_url(self):
336 """ Resolve note url, use 'note' route and slug if slug
337 otherwise use note.id
339 if self.slug is not None:
340 # return a url ending in slug
341 if self.course.school:
342 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
344 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
346 # return a url ending in id
347 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
350 def filter_html(self, html):
352 Apply all sanitizing filters to HTML.
353 Takes in HTML string and outputs HTML string.
355 # Fun fact: This could be made into a static method.
356 if not html or not len(html):
357 # if there was no HTML, return an empty string
361 # Iterate through filters, applying all to the soup object.
363 self.sanitize_anchor_html,
364 self.set_canonical_link,
366 soup = soupfilter(soup)
369 def sanitize_anchor_html(self, soup):
371 Filter the given BeautifulSoup obj by adding target=_blank to all
373 Returns BeautifulSoup obj.
375 # Fun fact: This could be made into a static method.
376 # Find all a tags in the HTML
377 a_tags = soup.find_all('a')
378 if not a_tags or not len(a_tags):
379 # nothing to process.
382 # build a tag sanitizer
383 def set_attribute_target(tag):
384 tag['target'] = '_blank'
385 # set all anchors to have target="_blank"
386 map(set_attribute_target, a_tags)
388 # return filtered soup
392 def canonical_link_predicate(tag):
393 return tag.name == u'link' and \
394 tag.has_attr('rel') and \
395 u'canonical' in tag['rel']
397 def set_canonical_link(self, soup):
399 Filter the given BeautifulSoup obj by adding
400 <link rel="canonical" href="note.get_absolute_url" />
401 to the document head.
402 Returns BeautifulSoup obj.
404 domain = Site.objects.all()[0].domain
405 note_full_href = 'http://' + domain + self.get_absolute_url()
406 canonical_tags = soup.find_all(self.canonical_link_predicate)
408 for tag in canonical_tags:
409 tag['href'] = note_full_href
411 new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
412 head = soup.find('head')
415 # return filtered soup
418 def _update_parent_updated_at(self):
419 """ update the parent Course.updated_at model
420 with the latest uploaded_at """
421 self.course.updated_at = self.uploaded_at
424 def save(self, *args, **kwargs):
425 if self.uploaded_at and self.uploaded_at > self.course.updated_at:
426 self._update_parent_updated_at()
427 super(Note, self).save(*args, **kwargs)
429 def has_markdown(self):
430 return hasattr(self, "notemarkdown")
433 return self.mimetype in Note.PDF_MIMETYPES
436 class NoteMarkdown(models.Model):
437 note = models.OneToOneField(Note, primary_key=True)
438 markdown = models.TextField(blank=True, null=True)
440 auto_add_check_unique_together(Note)
443 def update_note_counts(note_instance):
445 # test if the course still exists, or if this is a cascade delete.
447 except Course.DoesNotExist:
448 # this is a cascade delete. there is no course to update
452 note_instance.course.update_note_count()
453 if note_instance.course.school:
454 note_instance.course.school.update_note_count()
455 elif note_instance.course.department.school:
456 note_instance.course.department.school.update_note_count()
458 @receiver(pre_save, sender=Note, weak=False)
459 def note_pre_save_receiver(sender, **kwargs):
460 """Stick an instance of the pre-save value of
461 the given Note instance in the instances itself.
462 This will be looked at in post_save."""
463 if not 'instance' in kwargs:
467 kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
468 except ObjectDoesNotExist:
471 @receiver(post_save, sender=Note, weak=False)
472 def note_save_receiver(sender, **kwargs):
473 if not 'instance' in kwargs:
475 note = kwargs['instance']
477 if kwargs['created']:
478 update_note_counts(note)
481 index = SearchIndex()
482 if kwargs['created']:
485 index.update_note(note, note.old_instance)
487 logger.error("Error with IndexDen:\n" + traceback.format_exc())
490 @receiver(post_delete, sender=Note, weak=False)
491 def note_delete_receiver(sender, **kwargs):
492 if not 'instance' in kwargs:
494 note = kwargs['instance']
496 # Update course and school counts of how
497 # many notes they have
498 update_note_counts(kwargs['instance'])
500 # Remove document from search index
502 index = SearchIndex()
503 index.remove_note(note)
505 logger.error("Error with IndexDen:\n" + traceback.format_exc())
508 GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
511 class UserUploadMapping(models.Model):
512 user = models.ForeignKey(User)
513 fp_file = models.CharField(max_length=255)
516 unique_together = ('user', 'fp_file')
519 @receiver(user_logged_in, weak=True)
520 def find_orphan_notes(sender, **kwargs):
521 user = kwargs['user']
522 s = kwargs['request'].session
523 uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
524 for uploaded_note_url in uploaded_note_urls:
526 note = Note.objects.get(fp_file=uploaded_note_url)
529 NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
530 except (ObjectDoesNotExist, MultipleObjectsReturned):
531 mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)