3 # Copyright (C) 2012 FinalsClub Foundation
6 Models for the notes django app.
7 Contains only the minimum for handling files and their representation
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
29 from django.conf import settings
30 from django.core.files import File
31 from django.core.files.storage import FileSystemStorage
32 from django.db import models
33 from django.utils.text import slugify
34 import django_filepicker
35 from bs4 import BeautifulSoup as BS
36 from taggit.managers import TaggableManager
38 from karmaworld.apps.courses.models import Course
39 from karmaworld.apps.licenses.models import License
40 from karmaworld.apps.notes.search import SearchIndex
41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
45 logger = logging.getLogger(__name__)
46 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
48 # Dictionary for S3 upload headers
50 'Content-Type': 'text/html',
53 # This is a bit hacky, but nothing else works. Grabbed this from a proper
54 # file configured via S3 management console.
55 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
56 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
59 class Document(models.Model):
61 An Abstract Base Class representing a document intended to be subclassed.
63 course = models.ForeignKey(Course)
64 tags = TaggableManager(blank=True)
65 name = models.CharField(max_length=255, blank=True, null=True)
66 slug = models.SlugField(max_length=255, unique=True)
69 # license if different from default
70 license = models.ForeignKey(License, blank=True, null=True)
72 # provide an upstream file link
73 upstream_link = models.URLField(max_length=1024, blank=True, null=True, unique=True)
75 # metadata relevant to the Upload process
76 user = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
77 ip = models.GenericIPAddressField(blank=True, null=True,
78 help_text=u"IP address of the uploader")
79 uploaded_at = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
82 # if True, NEVER show this file
83 # WARNING: This may throw an error on migration
84 is_hidden = models.BooleanField(default=False)
87 # Everything Filepicker, now in one small area
89 # Allow pick (choose files), store (upload to S3), read (from FP repo),
90 # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
91 # seconds). Generated one time, at class definition upon import. So the
92 # server will need to be rebooted at least one time each year or this will
94 fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
95 fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
96 fp_policy = encode_fp_policy(fp_policy_json)
97 fp_signature = sign_fp_policy(fp_policy)
99 # Hack because mimetypes conflict with extensions, but there is no way to
101 # https://github.com/Ink/django-filepicker/issues/22
102 django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
103 # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
105 fp_file = django_filepicker.models.FPFileField(
106 # FPFileField settings
107 apikey=FILEPICKER_API_KEY,
108 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
110 'data-fp-multiple': 'true',
111 'data-fp-folders': 'true',
112 'data-fp-button-class':
113 'inline-button important add-note-btn',
114 'data-fp-button-text': 'Add Notes',
115 'data-fp-extensions':
116 '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
117 'data-fp-store-location': 'S3',
118 'data-fp-policy': fp_policy,
119 'data-fp-signature': fp_signature,
120 'type': 'filepicker',
121 'onchange': "got_file(event)",
124 null=True, blank=True,
125 upload_to='nil', # field ignored because S3, but required.
126 verbose_name='', # prevent a label from showing up
128 mimetype = models.CharField(max_length=255, blank=True, null=True)
132 ordering = ['-uploaded_at']
134 def _generate_unique_slug(self):
135 """ generate a unique slug based on name and uploaded_at """
136 _slug = slugify(unicode(self.name))
137 klass = self.__class__
138 collision = klass.objects.filter(slug=_slug)
140 _slug = u"{0}-{1}-{2}-{3}".format(
141 _slug, self.uploaded_at.month,
142 self.uploaded_at.day, self.uploaded_at.microsecond)
147 Memoized FilepickerFile getter. Returns FilepickerFile.
149 if not hasattr(self, 'cached_fpf'):
150 # Fetch additional_params containing signature, etc
151 aps = self.fp_file.field.additional_params
152 self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
153 return self.cached_fpf
155 def get_fp_url(self):
157 Returns the Filepicker URL for reading the upstream document.
159 fpf = self._get_fpf()
160 # Return proper URL for reading
165 Downloads the file from filepicker.io and returns a Django File wrapper
168 # Fetch FilepickerFile
169 fpf = self._get_fpf()
171 return fpf.get_file()
173 def save(self, *args, **kwargs):
174 if self.name and not self.slug:
175 self._generate_unique_slug()
176 super(Document, self).save(*args, **kwargs)
179 class NoteManager(models.Manager):
180 """ Handle restoring data. """
181 def get_by_natural_key(self, fp_file, upstream_link):
183 Return a Note defined by its Filepicker and upstream URLs.
185 return self.get(fp_file=fp_file,upstream_link=upstream_link)
188 class Note(Document):
190 A django model representing an uploaded file and associated metadata.
192 objects = NoteManager()
196 'application/vnd.ms-powerpoint',
197 'application/vnd.openxmlformats-officedocument.presentationml.presentation'
200 # Cache the Google drive file link
201 gdrive_url = models.URLField(max_length=1024, blank=True, null=True, unique=True)
203 # Generated by Google Drive but saved locally
204 text = models.TextField(blank=True, null=True)
206 # Number of times this note has been flagged as abusive/spam.
207 flags = models.IntegerField(default=0,null=False)
209 # Social media tracking
210 tweeted = models.BooleanField(default=False)
211 thanks = models.PositiveIntegerField(default=0)
214 unique_together = ('fp_file', 'upstream_link')
215 ordering = ['-uploaded_at']
217 def __unicode__(self):
218 return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
220 def natural_key(self):
222 A Note is uniquely defined by both the Filepicker link and the upstream
223 link. The Filepicker link should be unique by itself, but it may be
224 null in the database, so the upstream link component should resolve
227 # gdrive_url might also fit the bill?
228 return (self.fp_file, self.upstream_link)
230 def get_relative_s3_path(self):
232 returns s3 path relative to the appropriate bucket.
234 # Note.slug will be unique and brought in from RawDocument or created
235 # upon save() inside RawDocument.convert_to_note(). It makes for a good
236 # filename and its pretty well guaranteed to be there.
237 return 'html/{0}.html'.format(self.slug)
239 def send_to_s3(self, html, do_save=True):
241 Push the given HTML up to S3 for this Note.
242 Set do_save to False if the note will be saved outside this call.
244 # do nothing if HTML is empty.
245 if not html or not len(html):
247 # upload the HTML file to static host if it is not already there
248 filepath = self.get_relative_s3_path()
249 if not default_storage.exists(filepath):
250 # This is a pretty ugly hackified answer to some s3boto shortcomings
251 # and some decent default settings chosen by django-storages.
253 # Create the new key (key == filename in S3 bucket)
254 newkey = default_storage.bucket.new_key(filepath)
256 newkey.set_contents_from_string(html, headers=s3_upload_headers)
257 if not newkey.exists():
258 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
260 # set the permissions for everyone to read.
261 newkey.set_xml_acl(all_read_xml_acl)
263 def update_note_on_s3(self, html):
264 # do nothing if HTML is empty.
265 if not html or not len(html):
267 # if it's not already there then bail out
268 filepath = self.get_relative_s3_path()
269 if not default_storage.exists(filepath):
270 logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
273 key = default_storage.bucket.get_key(filepath)
274 key.set_contents_from_string(html, headers=s3_upload_headers)
275 key.set_xml_acl(all_read_xml_acl)
277 def get_absolute_url(self):
278 """ Resolve note url, use 'note' route and slug if slug
279 otherwise use note.id
281 if self.slug is not None:
282 # return a url ending in slug
283 if self.course.school:
284 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
286 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
288 # return a url ending in id
289 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
291 def get_absolute_keywords_url(self):
292 """ Resolve note url, use 'note' route and slug if slug
293 otherwise use note.id
295 if self.slug is not None:
296 # return a url ending in slug
297 if self.course.school:
298 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
300 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
302 # return a url ending in id
303 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
306 def filter_html(self, html):
308 Apply all sanitizing filters to HTML.
309 Takes in HTML string and outputs HTML string.
311 # Fun fact: This could be made into a static method.
312 if not html or not len(html):
313 # if there was no HTML, return an empty string
317 # Iterate through filters, applying all to the soup object.
319 self.sanitize_anchor_html,
320 self.set_canonical_link,
322 soup = soupfilter(soup)
325 def sanitize_anchor_html(self, soup):
327 Filter the given BeautifulSoup obj by adding target=_blank to all
329 Returns BeautifulSoup obj.
331 # Fun fact: This could be made into a static method.
332 # Find all a tags in the HTML
333 a_tags = soup.find_all('a')
334 if not a_tags or not len(a_tags):
335 # nothing to process.
338 # build a tag sanitizer
339 def set_attribute_target(tag):
340 tag['target'] = '_blank'
341 # set all anchors to have target="_blank"
342 map(set_attribute_target, a_tags)
344 # return filtered soup
348 def canonical_link_predicate(tag):
349 return tag.name == u'link' and \
350 tag.has_attr('rel') and \
351 u'canonical' in tag['rel']
353 def set_canonical_link(self, soup):
355 Filter the given BeautifulSoup obj by adding
356 <link rel="canonical" href="note.get_absolute_url" />
357 to the document head.
358 Returns BeautifulSoup obj.
360 domain = Site.objects.all()[0].domain
361 note_full_href = 'http://' + domain + self.get_absolute_url()
362 canonical_tags = soup.find_all(self.canonical_link_predicate)
364 for tag in canonical_tags:
365 tag['href'] = note_full_href
367 new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
368 head = soup.find('head')
371 # return filtered soup
374 def _update_parent_updated_at(self):
375 """ update the parent Course.updated_at model
376 with the latest uploaded_at """
377 self.course.updated_at = self.uploaded_at
380 def save(self, *args, **kwargs):
381 if self.uploaded_at and self.uploaded_at > self.course.updated_at:
382 self._update_parent_updated_at()
383 super(Note, self).save(*args, **kwargs)
385 def has_markdown(self):
386 return hasattr(self, "notemarkdown")
389 return self.mimetype in Note.PDF_MIMETYPES
392 class NoteMarkdown(models.Model):
393 note = models.OneToOneField(Note, primary_key=True)
394 markdown = models.TextField(blank=True, null=True)
396 auto_add_check_unique_together(Note)
399 def update_note_counts(note_instance):
401 # test if the course still exists, or if this is a cascade delete.
403 except Course.DoesNotExist:
404 # this is a cascade delete. there is no course to update
408 note_instance.course.update_note_count()
409 if note_instance.course.school:
410 note_instance.course.school.update_note_count()
411 elif note_instance.course.department.school:
412 note_instance.course.department.school.update_note_count()
414 @receiver(pre_save, sender=Note, weak=False)
415 def note_pre_save_receiver(sender, **kwargs):
416 """Stick an instance of the pre-save value of
417 the given Note instance in the instances itself.
418 This will be looked at in post_save."""
419 if not 'instance' in kwargs:
423 kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
424 except ObjectDoesNotExist:
427 @receiver(post_save, sender=Note, weak=False)
428 def note_save_receiver(sender, **kwargs):
429 if not 'instance' in kwargs:
431 note = kwargs['instance']
433 if kwargs['created']:
434 update_note_counts(note)
437 index = SearchIndex()
438 if kwargs['created']:
441 index.update_note(note, note.old_instance)
443 logger.error("Error with IndexDen:\n" + traceback.format_exc())
446 @receiver(post_delete, sender=Note, weak=False)
447 def note_delete_receiver(sender, **kwargs):
448 if not 'instance' in kwargs:
450 note = kwargs['instance']
452 # Update course and school counts of how
453 # many notes they have
454 update_note_counts(kwargs['instance'])
456 # Remove document from search index
458 index = SearchIndex()
459 index.remove_note(note)
461 logger.error("Error with IndexDen:\n" + traceback.format_exc())
464 GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
467 class UserUploadMapping(models.Model):
468 user = models.ForeignKey(User)
469 fp_file = models.CharField(max_length=255)
472 unique_together = ('user', 'fp_file')
475 @receiver(user_logged_in, weak=True)
476 def find_orphan_notes(sender, **kwargs):
477 user = kwargs['user']
478 s = kwargs['request'].session
479 uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
480 for uploaded_note_url in uploaded_note_urls:
482 note = Note.objects.get(fp_file=uploaded_note_url)
485 NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
486 except (ObjectDoesNotExist, MultipleObjectsReturned):
487 mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)