Remove a bunch of unused fields from Note
[oweals/karmaworld.git] / karmaworld / apps / notes / models.py
1 #!/usr/bin/env python
2 # -*- coding:utf8 -*-
3 # Copyright (C) 2012  FinalsClub Foundation
4
5 """
6     Models for the notes django app.
7     Contains only the minimum for handling files and their representation
8 """
9 import datetime
10 import traceback
11 import logging
12 from allauth.account.signals import user_logged_in
13 from django.contrib.auth.models import User
14 from django.contrib.sites.models import Site
15 from django.core.urlresolvers import reverse
16 from django.utils.safestring import mark_safe
17 from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
18 from django.core.files.storage import default_storage
19 from django.db.models import SET_NULL
20 from django.db.models.signals import post_save, post_delete, pre_save
21 from django.dispatch import receiver
22 from karmaworld.apps.users.models import NoteKarmaEvent, GenericKarmaEvent
23 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
24 from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy
25 import os
26 import time
27 import urllib
28
29 from django.conf import settings
30 from django.core.files import File
31 from django.core.files.storage import FileSystemStorage
32 from django.db import models
33 from django.utils.text import slugify
34 import django_filepicker
35 from bs4 import BeautifulSoup as BS
36 from taggit.managers import TaggableManager
37
38 from karmaworld.apps.courses.models import Course
39 from karmaworld.apps.licenses.models import License
40 from karmaworld.apps.notes.search import SearchIndex
41 from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
42
43 ANONYMOUS_UPLOAD_URLS = 'anonymous_upload_urls'
44
45 logger = logging.getLogger(__name__)
46 fs = FileSystemStorage(location=settings.MEDIA_ROOT)
47
48 # Dictionary for S3 upload headers
49 s3_upload_headers = {
50     'Content-Type': 'text/html',
51 }
52
53 # This is a bit hacky, but nothing else works. Grabbed this from a proper
54 # file configured via S3 management console.
55 # https://github.com/FinalsClub/karmaworld/issues/273#issuecomment-32572169
56 all_read_xml_acl = '<?xml version="1.0" encoding="UTF-8"?>\n<AccessControlPolicy xmlns="http://s3.amazonaws.com/doc/2006-03-01/"><Owner><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Owner><AccessControlList><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>READ_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="CanonicalUser"><ID>710efc05767903a0eae5064bbc541f1c8e68f8f344fa809dc92682146b401d9c</ID><DisplayName>Andrew</DisplayName></Grantee><Permission>WRITE_ACP</Permission></Grant><Grant><Grantee xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:type="Group"><URI>http://acs.amazonaws.com/groups/global/AllUsers</URI></Grantee><Permission>READ</Permission></Grant></AccessControlList></AccessControlPolicy>'
57
58
59 class Document(models.Model):
60     """
61     An Abstract Base Class representing a document intended to be subclassed.
62     """
63     course          = models.ForeignKey(Course)
64     tags            = TaggableManager(blank=True)
65     name            = models.CharField(max_length=255, blank=True, null=True)
66     slug            = models.SlugField(max_length=255, unique=True)
67     
68
69     # license if different from default
70     license         = models.ForeignKey(License, blank=True, null=True)
71
72     # provide an upstream file link
73     upstream_link   = models.URLField(max_length=1024, blank=True, null=True, unique=True)
74
75     # metadata relevant to the Upload process
76     user            = models.ForeignKey(User, blank=True, null=True, on_delete=SET_NULL)
77     ip              = models.GenericIPAddressField(blank=True, null=True,
78                         help_text=u"IP address of the uploader")
79     uploaded_at     = models.DateTimeField(null=True, default=datetime.datetime.utcnow)
80
81
82     # if True, NEVER show this file
83     # WARNING: This may throw an error on migration
84     is_hidden       = models.BooleanField(default=False)
85
86     ###
87     # Everything Filepicker, now in one small area
88
89     # Allow pick (choose files), store (upload to S3), read (from FP repo),
90     # stat (status of FP repo files) for 1 year (current time + 365 * 24 * 3600
91     # seconds). Generated one time, at class definition upon import. So the
92     # server will need to be rebooted at least one time each year or this will
93     # go stale.
94     fp_policy_json = '{{"expiry": {0}, "call": ["pick","store","read","stat"]}}'
95     fp_policy_json = fp_policy_json.format(int(time.time() + 31536000))
96     fp_policy      = encode_fp_policy(fp_policy_json)
97     fp_signature   = sign_fp_policy(fp_policy)
98
99     # Hack because mimetypes conflict with extensions, but there is no way to
100     # disable mimetypes.
101     # https://github.com/Ink/django-filepicker/issues/22
102     django_filepicker.forms.FPFieldMixin.default_mimetypes = ''
103     # Now let django-filepicker do the heavy lifting. Sort of. Look at all those
104     # parameters!
105     fp_file = django_filepicker.models.FPFileField(
106                 # FPFileField settings
107                 apikey=FILEPICKER_API_KEY,
108                 services='COMPUTER,DROPBOX,URL,GOOGLE_DRIVE,EVERNOTE,GMAIL,BOX,FACEBOOK,FLICKR,PICASA,IMAGE_SEARCH,WEBCAM,FTP',
109                 additional_params={
110                     'data-fp-multiple': 'true', 
111                     'data-fp-folders': 'true',
112                     'data-fp-button-class':
113                       'inline-button important add-note-btn',
114                     'data-fp-button-text': 'Add Notes',
115                     'data-fp-extensions':
116                       '.pdf,.doc,.docx,.txt,.html,.rtf,.odt,.png,.jpg,.jpeg,.ppt,.pptx',
117                     'data-fp-store-location': 'S3',
118                     'data-fp-policy': fp_policy,
119                     'data-fp-signature': fp_signature,
120                     'type': 'filepicker',
121                     'onchange': "got_file(event)",
122                 },
123                 # FileField settings
124                 null=True, blank=True,
125                 upload_to='nil', # field ignored because S3, but required.
126                 verbose_name='', # prevent a label from showing up
127                 )
128     mimetype = models.CharField(max_length=255, blank=True, null=True)
129
130     class Meta:
131         abstract = True
132         ordering = ['-uploaded_at']
133
134     def _generate_unique_slug(self):
135         """ generate a unique slug based on name and uploaded_at  """
136         _slug = slugify(unicode(self.name))
137         klass = self.__class__
138         collision = klass.objects.filter(slug=_slug)
139         if collision:
140             _slug = u"{0}-{1}-{2}-{3}".format(
141                     _slug, self.uploaded_at.month,
142                     self.uploaded_at.day, self.uploaded_at.microsecond)
143         self.slug = _slug
144
145     def _get_fpf(self):
146         """
147         Memoized FilepickerFile getter. Returns FilepickerFile.
148         """
149         if not hasattr(self, 'cached_fpf'):
150             # Fetch additional_params containing signature, etc
151             aps = self.fp_file.field.additional_params
152             self.cached_fpf = django_filepicker.utils.FilepickerFile(self.fp_file.name, aps)
153         return self.cached_fpf
154
155     def get_fp_url(self):
156         """
157         Returns the Filepicker URL for reading the upstream document.
158         """
159         fpf = self._get_fpf()
160         # Return proper URL for reading
161         return fpf.get_url()
162
163     def get_file(self):
164         """
165         Downloads the file from filepicker.io and returns a Django File wrapper
166         object.
167         """
168         # Fetch FilepickerFile
169         fpf = self._get_fpf()
170         # Return Django File
171         return fpf.get_file()
172
173     def save(self, *args, **kwargs):
174         if self.name and not self.slug:
175             self._generate_unique_slug()
176         super(Document, self).save(*args, **kwargs)
177
178
179 class NoteManager(models.Manager):
180     """ Handle restoring data. """
181     def get_by_natural_key(self, fp_file, upstream_link):
182         """
183         Return a Note defined by its Filepicker and upstream URLs.
184         """
185         return self.get(fp_file=fp_file,upstream_link=upstream_link)
186
187
188 class Note(Document):
189     """ 
190     A django model representing an uploaded file and associated metadata.
191     """
192     objects = NoteManager()
193
194     PDF_MIMETYPES = (
195       'application/pdf',
196       'application/vnd.ms-powerpoint',
197       'application/vnd.openxmlformats-officedocument.presentationml.presentation'
198     )
199
200     # Cache the Google drive file link
201     gdrive_url      = models.URLField(max_length=1024, blank=True, null=True, unique=True)
202
203     # Generated by Google Drive but saved locally
204     text            = models.TextField(blank=True, null=True)
205
206     # Number of times this note has been flagged as abusive/spam.
207     flags           = models.IntegerField(default=0,null=False)
208
209     # Social media tracking
210     tweeted         = models.BooleanField(default=False)
211     thanks          = models.PositiveIntegerField(default=0)
212
213     class Meta:
214         unique_together = ('fp_file', 'upstream_link')
215         ordering = ['-uploaded_at']
216
217     def __unicode__(self):
218         return u"Note at {0} (from {1}) ({2})".format(self.fp_file, self.upstream_link, self.id)
219
220     def natural_key(self):
221         """
222         A Note is uniquely defined by both the Filepicker link and the upstream
223         link. The Filepicker link should be unique by itself, but it may be
224         null in the database, so the upstream link component should resolve
225         those cases.
226         """
227         # gdrive_url might also fit the bill?
228         return (self.fp_file, self.upstream_link)
229
230     def get_relative_s3_path(self):
231         """
232         returns s3 path relative to the appropriate bucket.
233         """
234         # Note.slug will be unique and brought in from RawDocument or created
235         # upon save() inside RawDocument.convert_to_note(). It makes for a good
236         # filename and its pretty well guaranteed to be there.
237         return 'html/{0}.html'.format(self.slug)
238
239     def send_to_s3(self, html, do_save=True):
240         """
241         Push the given HTML up to S3 for this Note.
242         Set do_save to False if the note will be saved outside this call.
243         """
244         # do nothing if HTML is empty.
245         if not html or not len(html):
246             return
247         # upload the HTML file to static host if it is not already there
248         filepath = self.get_relative_s3_path()
249         if not default_storage.exists(filepath):
250             # This is a pretty ugly hackified answer to some s3boto shortcomings
251             # and some decent default settings chosen by django-storages.
252
253             # Create the new key (key == filename in S3 bucket)
254             newkey = default_storage.bucket.new_key(filepath)
255             # Upload data!
256             newkey.set_contents_from_string(html, headers=s3_upload_headers)
257             if not newkey.exists():
258                 raise LookupError('Unable to find uploaded S3 document {0}'.format(str(newkey)))
259
260             # set the permissions for everyone to read.
261             newkey.set_xml_acl(all_read_xml_acl)
262
263     def update_note_on_s3(self, html):
264         # do nothing if HTML is empty.
265         if not html or not len(html):
266             return
267         # if it's not already there then bail out
268         filepath = self.get_relative_s3_path()
269         if not default_storage.exists(filepath):
270             logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
271             return
272
273         key = default_storage.bucket.get_key(filepath)
274         key.set_contents_from_string(html, headers=s3_upload_headers)
275         key.set_xml_acl(all_read_xml_acl)
276
277     def get_absolute_url(self):
278         """ Resolve note url, use 'note' route and slug if slug
279             otherwise use note.id
280         """
281         if self.slug is not None:
282             # return a url ending in slug
283             if self.course.school:
284                 return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.slug])
285             else:
286                 return reverse('note_detail', args=[self.course.department.school.slug, self.course.slug, self.slug])
287         else:
288             # return a url ending in id
289             return reverse('note_detail', args=[self.course.school.slug, self.course.slug, self.id])
290
291     def get_absolute_keywords_url(self):
292         """ Resolve note url, use 'note' route and slug if slug
293             otherwise use note.id
294         """
295         if self.slug is not None:
296             # return a url ending in slug
297             if self.course.school:
298                 return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.slug])
299             else:
300                 return reverse('note_keywords', args=[self.course.department.school.slug, self.course.slug, self.slug])
301         else:
302             # return a url ending in id
303             return reverse('note_keywords', args=[self.course.school.slug, self.course.slug, self.id])
304
305
306     def filter_html(self, html):
307         """
308         Apply all sanitizing filters to HTML.
309         Takes in HTML string and outputs HTML string.
310         """
311         # Fun fact: This could be made into a static method.
312         if not html or not len(html):
313             # if there was no HTML, return an empty string
314             return ''
315
316         soup = BS(html)
317         # Iterate through filters, applying all to the soup object.
318         for soupfilter in (
319             self.sanitize_anchor_html,
320             self.set_canonical_link,
321         ):
322             soup = soupfilter(soup)
323         return str(soup)
324
325     def sanitize_anchor_html(self, soup):
326         """
327         Filter the given BeautifulSoup obj by adding target=_blank to all
328         anchor tags.
329         Returns BeautifulSoup obj.
330         """
331         # Fun fact: This could be made into a static method.
332         # Find all a tags in the HTML
333         a_tags = soup.find_all('a')
334         if not a_tags or not len(a_tags):
335             # nothing to process.
336             return soup
337
338         # build a tag sanitizer
339         def set_attribute_target(tag):
340             tag['target'] = '_blank'
341         # set all anchors to have target="_blank"
342         map(set_attribute_target, a_tags)
343
344         # return filtered soup
345         return soup
346
347     @staticmethod
348     def canonical_link_predicate(tag):
349         return tag.name == u'link' and \
350             tag.has_attr('rel') and \
351             u'canonical' in tag['rel']
352
353     def set_canonical_link(self, soup):
354         """
355         Filter the given BeautifulSoup obj by adding
356         <link rel="canonical" href="note.get_absolute_url" />
357         to the document head.
358         Returns BeautifulSoup obj.
359         """
360         domain = Site.objects.all()[0].domain
361         note_full_href = 'http://' + domain + self.get_absolute_url()
362         canonical_tags = soup.find_all(self.canonical_link_predicate)
363         if canonical_tags:
364             for tag in canonical_tags:
365                 tag['href'] = note_full_href
366         else:
367             new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
368             head = soup.find('head')
369             head.append(new_tag)
370
371         # return filtered soup
372         return soup
373
374     def _update_parent_updated_at(self):
375         """ update the parent Course.updated_at model
376             with the latest uploaded_at """
377         self.course.updated_at = self.uploaded_at
378         self.course.save()
379
380     def save(self, *args, **kwargs):
381         if self.uploaded_at and self.uploaded_at > self.course.updated_at:
382             self._update_parent_updated_at()
383         super(Note, self).save(*args, **kwargs)
384
385     def has_markdown(self):
386         return hasattr(self, "notemarkdown")
387
388     def is_pdf(self):
389         return self.mimetype in Note.PDF_MIMETYPES
390
391
392 class NoteMarkdown(models.Model):
393     note     = models.OneToOneField(Note, primary_key=True)
394     markdown = models.TextField(blank=True, null=True)
395
396 auto_add_check_unique_together(Note)
397
398
399 def update_note_counts(note_instance):
400     try:
401         # test if the course still exists, or if this is a cascade delete.
402         note_instance.course
403     except Course.DoesNotExist:
404         # this is a cascade delete. there is no course to update
405         pass
406     else:
407         # course exists
408         note_instance.course.update_note_count()
409         if note_instance.course.school:
410             note_instance.course.school.update_note_count()
411         elif note_instance.course.department.school:
412             note_instance.course.department.school.update_note_count()
413
414 @receiver(pre_save, sender=Note, weak=False)
415 def note_pre_save_receiver(sender, **kwargs):
416     """Stick an instance of the pre-save value of
417     the given Note instance in the instances itself.
418     This will be looked at in post_save."""
419     if not 'instance' in kwargs:
420         return
421
422     try:
423         kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
424     except ObjectDoesNotExist:
425         pass
426
427 @receiver(post_save, sender=Note, weak=False)
428 def note_save_receiver(sender, **kwargs):
429     if not 'instance' in kwargs:
430         return
431     note = kwargs['instance']
432
433     if kwargs['created']:
434         update_note_counts(note)
435
436     try:
437         index = SearchIndex()
438         if kwargs['created']:
439             index.add_note(note)
440         else:
441             index.update_note(note, note.old_instance)
442     except Exception:
443         logger.error("Error with IndexDen:\n" + traceback.format_exc())
444
445
446 @receiver(post_delete, sender=Note, weak=False)
447 def note_delete_receiver(sender, **kwargs):
448     if not 'instance' in kwargs:
449         return
450     note = kwargs['instance']
451
452     # Update course and school counts of how
453     # many notes they have
454     update_note_counts(kwargs['instance'])
455
456     # Remove document from search index
457     try:
458         index = SearchIndex()
459         index.remove_note(note)
460     except Exception:
461         logger.error("Error with IndexDen:\n" + traceback.format_exc())
462
463     if note.user:
464         GenericKarmaEvent.create_event(note.user, note.name, GenericKarmaEvent.NOTE_DELETED)
465
466
467 class UserUploadMapping(models.Model):
468     user = models.ForeignKey(User)
469     fp_file = models.CharField(max_length=255)
470
471     class Meta:
472         unique_together = ('user', 'fp_file')
473
474
475 @receiver(user_logged_in, weak=True)
476 def find_orphan_notes(sender, **kwargs):
477     user = kwargs['user']
478     s = kwargs['request'].session
479     uploaded_note_urls = s.get(ANONYMOUS_UPLOAD_URLS, [])
480     for uploaded_note_url in uploaded_note_urls:
481         try:
482             note = Note.objects.get(fp_file=uploaded_note_url)
483             note.user = user
484             note.save()
485             NoteKarmaEvent.create_event(user, note, NoteKarmaEvent.UPLOAD)
486         except (ObjectDoesNotExist, MultipleObjectsReturned):
487             mapping = UserUploadMapping.objects.create(fp_file=uploaded_note_url, user=user)
488             mapping.save()
489