--- /dev/null
+#!/usr/bin/env python
+# -*- coding:utf8 -*-
+# Copyright (C) 2014 FinalsClub Foundation
+from bs4 import BeautifulSoup
+from django.core.management import BaseCommand
+from karmaworld.apps.notes.models import Note
+from karmaworld.secret.static_s3 import S3_URL
+import requests
+
+
+class Command(BaseCommand):
+ help = """
+ Add a <link rel='canonical' ... /> to every note stored in S3
+ """
+
+ def handle(self, *args, **kwargs):
+ for note in Note.objects.all():
+ note_path = 'http:' + S3_URL + note.get_relative_s3_path()
+ resp = requests.get(note_path)
+ if resp.status_code != 200:
+ print("Could not retrieve " + note_path)
+ continue
+ html = resp.text
+
+ soup = BeautifulSoup(html)
+ soup = note.set_canonical_link(soup)
+
+ note.update_note_on_s3(unicode(soup))
+ print("Updated note " + unicode(note))
+
import logging
from allauth.account.signals import user_logged_in
from django.contrib.auth.models import User
+from django.contrib.sites.models import Site
from django.utils.safestring import mark_safe
from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned
from django.core.files.storage import default_storage
if do_save:
self.save()
+ def update_note_on_s3(self, html):
+ # do nothing if HTML is empty.
+ if not html or not len(html):
+ return
+ # if it's not already there then bail out
+ filepath = self.get_relative_s3_path()
+ if not default_storage.exists(filepath):
+ logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self))
+ return
+
+ key = default_storage.bucket.get_key(filepath)
+ key.set_contents_from_string(html, headers=s3_upload_headers)
+ key.set_xml_acl(all_read_xml_acl)
+
def get_absolute_url(self):
""" Resolve note url, use 'note' route and slug if slug
otherwise use note.id
soup = BS(html)
# Iterate through filters, applying all to the soup object.
for soupfilter in (
- self.sanitize_anchor_html,
+ self.sanitize_anchor_html,
+ self.set_canonical_link,
):
soup = soupfilter(soup)
return str(soup)
# return filtered soup
return soup
+ @staticmethod
+ def canonical_link_predicate(tag):
+ return tag.name == u'link' and \
+ tag.has_attr('rel') and \
+ u'canonical' in tag['rel']
+
+ def set_canonical_link(self, soup):
+ """
+ Filter the given BeautifulSoup obj by adding
+ <link rel="canonical" href="note.get_absolute_url" />
+ to the document head.
+ Returns BeautifulSoup obj.
+ """
+ domain = Site.objects.all()[0].domain
+ note_full_href = 'http://' + domain + self.get_absolute_url()
+ canonical_tags = soup.find_all(self.canonical_link_predicate)
+ if canonical_tags:
+ for tag in canonical_tags:
+ tag['href'] = note_full_href
+ else:
+ new_tag = soup.new_tag('link', rel='canonical', href=note_full_href)
+ head = soup.find('head')
+ head.append(new_tag)
+
+ # return filtered soup
+ return soup
+
def _update_parent_updated_at(self):
""" update the parent Course.updated_at model
with the latest uploaded_at """