3 # Copyright (C) 2012 FinalsClub Foundation
6 Models for the notes django app.
7 Contains only the minimum for handling files and their representation
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.utils.safestring import mark_safe
16 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
17 from django.core.files.storage import default_storage
18 from django.db.models import SET_NULL
19 from django.db.models.signals import post_save, post_delete, pre_save
20 from django.dispatch import receiver
21 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
22 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
23 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
28 from django.conf import settings
29 from django.core.files import File
30 from django.core.files.storage import FileSystemStorage
31 from django.db import models
32 from django.utils.text import slugify
33 import django_filepicker
34 from bs4 import BeautifulSoup as BS
35 from taggit.managers import TaggableManager
37 from karmaworld.apps.courses.models import Course
38 from karmaworld.apps.licenses.models import License
39 from karmaworld.apps.notes.search import SearchIndex
40 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
42 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
44 logger = logging.getLogger(__name__)
45 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
47 # Dictionary for S3 upload headers
49 'Content-Type': 'text/html',
52 # This is a bit hacky, but nothing else works. Grabbed this from a proper
53 # file configured via S3 management console.
54 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
55 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
58 class Document(models.Model):
60 An Abstract Base Class representing a document intended to be subclassed.
62 course = models.ForeignKey(Course)
63 tags = TaggableManager(blank=True)
64 name = models.CharField(max_length=255, blank=True, null=True)
65 slug = models.SlugField(max_length=255, unique=True)
67 # license if different from default
68 license = models.ForeignKey(License, blank=True, null=True)
70 # provide an upstream file link
71 upstream_link = models.URLField(max_length=1024, blank=True, null=True, unique=True)
73 # metadata relevant to the Upload process
74 user = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
75 ip = models.GenericIPAddressField(blank=True, null=True,
76 help_text=u"IP address of the uploader")
77 uploaded_at = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
80 # if True, NEVER show this file
81 # WARNING: This may throw an error on migration
82 is_hidden = models.BooleanField(default=False)
85 # Everything Filepicker, now in one small area
87 # Allow pick (choose files), store (upload to S3), read (from FP repo),
88 # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
89 # seconds). Generated one time, at class definition upon import. So the
90 # server will need to be rebooted at least one time each year or this will
92 fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
93 fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
94 fp_policy = encode_fp_policy(fp_policy_json)
95 fp_signature = sign_fp_policy(fp_policy)
97 # Hack because mimetypes conflict with extensions, but there is no way to
99 # https://github.com/Ink/django-filepicker/issues/22
100 django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
101 # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
103 fp_file = django_filepicker.models.FPFileField(
104 # FPFileField settings
105 apikey=FILEPICKER_API_KEY,
106 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
108 'data-fp-multiple': 'true',
109 'data-fp-folders': 'true',
110 'data-fp-button-class':
111 'add-note-btn small-10 columns large-4',
112 'data-fp-button-text':
113 mark_safe("<i class='fa fa-arrow-circle-o-up'></i> add notes"),
114 'data-fp-drag-class':
115 'dragdrop show-for-medium-up large-7 columns',
116 'data-fp-drag-text': 'Drop Some Knowledge',
117 'data-fp-extensions':
118 '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
119 'data-fp-store-location': 'S3',
120 'data-fp-policy': fp_policy,
121 'data-fp-signature': fp_signature,
122 'onchange': "got_file(event)",
125 null=True, blank=True,
126 upload_to='nil', # field ignored because S3, but required.
127 verbose_name='', # prevent a label from showing up
129 mimetype = models.CharField(max_length=255, blank=True, null=True)
133 ordering = ['-uploaded_at']
135 def _generate_unique_slug(self):
136 """ generate a unique slug based on name and uploaded_at """
137 _slug = slugify(unicode(self.name))
138 klass = self.__class__
139 collision = klass.objects.filter(slug=_slug)
141 _slug = u"{0}-{1}-{2}-{3}".format(
142 _slug, self.uploaded_at.month,
143 self.uploaded_at.day, self.uploaded_at.microsecond)
148 Memoized FilepickerFile getter. Returns FilepickerFile.
150 if not hasattr(self, 'cached_fpf'):
151 # Fetch additional_params containing signature, etc
152 aps = self.fp_file.field.additional_params
153 self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
154 return self.cached_fpf
156 def get_fp_url(self):
158 Returns the Filepicker URL for reading the upstream document.
160 # Fetch FilepickerFile
161 if not self.fp_file.name:
164 fpf = self._get_fpf()
165 # Return proper URL for reading
170 Downloads the file from filepicker.io and returns a Django File wrapper
173 # Fetch FilepickerFile
174 fpf = self._get_fpf()
176 return fpf.get_file()
178 def save(self, *args, **kwargs):
179 if self.name and not self.slug:
180 self._generate_unique_slug()
181 super(Document, self).save(*args, **kwargs)
184 class NoteManager(models.Manager):
185 """ Handle restoring data. """
186 def get_by_natural_key(self, fp_file, upstream_link):
188 Return a Note defined by its Filepicker and upstream URLs.
190 return self.get(fp_file=fp_file,upstream_link=upstream_link)
193 class Note(Document):
195 A django model representing an uploaded file and associated metadata.
197 objects = NoteManager()
199 # FIXME: refactor file choices after FP.io integration
201 FILE_TYPE_CHOICES = (
202 ('doc', 'MS Word compatible file (.doc, .docx, .rtf, .odf)'),
203 ('img', 'Scan or picture of notes'),
205 ('ppt', 'Powerpoint'),
207 (UNKNOWN_FILE, 'Unknown file'),
210 file_type = models.CharField(max_length=15,
211 choices=FILE_TYPE_CHOICES,
212 default=UNKNOWN_FILE,
213 blank=True, null=True)
215 # Cache the Google drive file link
216 gdrive_url = models.URLField(max_length=1024, blank=True, null=True, unique=True)
218 # Upload files to MEDIA_ROOT/notes/YEAR/MONTH/DAY, 2012/10/30/filename
219 pdf_file = models.FileField(
221 upload_to="notes/%Y/%m/%d/",
222 blank=True, null=True)
224 # Generated by Google Drive but saved locally
225 text = models.TextField(blank=True, null=True)
226 static_html = models.BooleanField(default=False)
228 # html is deprecated. delete once data is all sorted.
229 html = models.TextField(blank=True, null=True)
231 # Academic year of course
232 year = models.IntegerField(blank=True, null=True,\
233 default=datetime.datetime.utcnow().year)
235 # Number of times this note has been flagged as abusive/spam.
236 flags = models.IntegerField(default=0,null=False)
238 # Social media tracking
239 tweeted = models.BooleanField(default=False)
240 thanks = models.PositiveIntegerField(default=0)
243 unique_together = ('fp_file', 'upstream_link')
244 ordering = ['-uploaded_at']
246 def __unicode__(self):
247 return u"Note at {0} (from {1})".format(self.fp_file, self.upstream_link)
249 def natural_key(self):
251 A Note is uniquely defined by both the Filepicker link and the upstream
252 link. The Filepicker link should be unique by itself, but it may be
253 null in the database, so the upstream link component should resolve
256 # gdrive_url might also fit the bill?
257 return (self.fp_file, self.upstream_link)
259 def get_relative_s3_path(self):
261 returns s3 path relative to the appropriate bucket.
263 # Note.slug will be unique and brought in from RawDocument or created
264 # upon save() inside RawDocument.convert_to_note(). It makes for a good
265 # filename and its pretty well guaranteed to be there.
266 return 'html/{0}.html'.format(self.slug)
268 def send_to_s3(self, html, do_save=True):
270 Push the given HTML up to S3 for this Note.
271 Set do_save to False if the note will be saved outside this call.
273 # do nothing if HTML is empty.
274 if not html or not len(html):
276 # do nothing if already uploaded.
277 # Maybe run checksums if possible to confirm its really done?
278 # (but then you gotta wonder was the original correct or is the new
282 # upload the HTML file to static host if it is not already there
283 filepath = self.get_relative_s3_path()
284 if not default_storage.exists(filepath):
285 # This is a pretty ugly hackified answer to some s3boto shortcomings
286 # and some decent default settings chosen by django-storages.
288 # Create the new key (key == filename in S3 bucket)
289 newkey = default_storage.bucket.new_key(filepath)
291 newkey.set_contents_from_string(html, headers=s3_upload_headers)
292 if not newkey.exists():
293 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
295 # set the permissions for everyone to read.
296 newkey.set_xml_acl(all_read_xml_acl)
298 # If the code reaches here, either:
299 # filepath exists on S3 but static_html is not marked.
301 # file was just uploaded successfully to filepath
302 # Regardless, set note as uploaded.
303 self.static_html = True
307 def update_note_on_s3(self, html):
308 # do nothing if HTML is empty.
309 if not html or not len(html):
311 # if it's not already there then bail out
312 filepath = self.get_relative_s3_path()
313 if not default_storage.exists(filepath):
314 logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
317 key = default_storage.bucket.get_key(filepath)
318 key.set_contents_from_string(html, headers=s3_upload_headers)
319 key.set_xml_acl(all_read_xml_acl)
321 def get_absolute_url(self):
322 """ Resolve note url, use 'note' route and slug if slug
323 otherwise use note.id
325 if self.slug is not None:
326 # return a url ending in slug
327 return u"/{0}/{1}/{2}".format(self.course.school.slug, self.course.slug, self.slug)
329 # return a url ending in id
330 return u"/{0}/{1}/{2}".format(self.course.school.slug, self.course.slug, self.id)
332 def filter_html(self, html):
334 Apply all sanitizing filters to HTML.
335 Takes in HTML string and outputs HTML string.
337 # Fun fact: This could be made into a static method.
338 if not html or not len(html):
339 # if there was no HTML, return an empty string
343 # Iterate through filters, applying all to the soup object.
345 self.sanitize_anchor_html,
346 self.set_canonical_link,
348 soup = soupfilter(soup)
351 def sanitize_anchor_html(self, soup):
353 Filter the given BeautifulSoup obj by adding target=_blank to all
355 Returns BeautifulSoup obj.
357 # Fun fact: This could be made into a static method.
358 # Find all a tags in the HTML
359 a_tags = soup.find_all('a')
360 if not a_tags or not len(a_tags):
361 # nothing to process.
364 # build a tag sanitizer
365 def set_attribute_target(tag):
366 tag['target'] = '_blank'
367 # set all anchors to have target="_blank"
368 map(set_attribute_target, a_tags)
370 # return filtered soup
374 def canonical_link_predicate(tag):
375 return tag.name == u'link' and \
376 tag.has_attr('rel') and \
377 u'canonical' in tag['rel']
379 def set_canonical_link(self, soup):
381 Filter the given BeautifulSoup obj by adding
382 <link rel="canonical" href="note.get_absolute_url" />
383 to the document head.
384 Returns BeautifulSoup obj.
386 domain = Site.objects.all()[0].domain
387 note_full_href = 'http://' + domain + self.get_absolute_url()
388 canonical_tags = soup.find_all(self.canonical_link_predicate)
390 for tag in canonical_tags:
391 tag['href'] = note_full_href
393 new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
394 head = soup.find('head')
397 # return filtered soup
400 def _update_parent_updated_at(self):
401 """ update the parent Course.updated_at model
402 with the latest uploaded_at """
403 self.course.updated_at = self.uploaded_at
406 def save(self, *args, **kwargs):
407 if self.uploaded_at and self.uploaded_at > self.course.updated_at:
408 self._update_parent_updated_at()
409 super(Note, self).save(*args, **kwargs)
412 auto_add_check_unique_together(Note)
415 def update_note_counts(note_instance):
417 # test if the course still exists, or if this is a cascade delete.
419 except Course.DoesNotExist:
420 # this is a cascade delete. there is no course to update
424 note_instance.course.update_note_count()
425 note_instance.course.school.update_note_count()
427 @receiver(pre_save, sender=Note, weak=False)
428 def note_pre_save_receiver(sender, **kwargs):
429 """Stick an instance of the pre-save value of
430 the given Note instance in the instances itself.
431 This will be looked at in post_save."""
432 if not 'instance' in kwargs:
436 kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
437 except ObjectDoesNotExist:
440 @receiver(post_save, sender=Note, weak=False)
441 def note_save_receiver(sender, **kwargs):
442 if not 'instance' in kwargs:
444 note = kwargs['instance']
446 if kwargs['created']:
447 update_note_counts(note)
450 index = SearchIndex()
451 if kwargs['created']:
454 index.update_note(note, note.old_instance)
456 logger.error("Error with IndexDen:\n" + traceback.format_exc())
459 @receiver(post_delete, sender=Note, weak=False)
460 def note_delete_receiver(sender, **kwargs):
461 if not 'instance' in kwargs:
463 note = kwargs['instance']
465 # Update course and school counts of how
466 # many notes they have
467 update_note_counts(kwargs['instance'])
469 # Remove document from search index
471 index = SearchIndex()
472 index.remove_note(note)
474 logger.error("Error with IndexDen:\n" + traceback.format_exc())
477 GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
480 class UserUploadMapping(models.Model):
481 user = models.ForeignKey(User)
482 fp_file = models.CharField(max_length=255)
485 unique_together = ('user', 'fp_file')
488 @receiver(user_logged_in, weak=True)
489 def find_orphan_notes(sender, **kwargs):
490 user = kwargs['user']
491 s = kwargs['request'].session
492 uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
493 for uploaded_note_url in uploaded_note_urls:
495 note = Note.objects.get(fp_file=uploaded_note_url)
498 NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
499 except (ObjectDoesNotExist, MultipleObjectsReturned):
500 mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)