From d63b0fc49a301c0ab8c65bc4d9803924a5dbd105 Mon Sep 17 00:00:00 2001 From: Charles Connell Date: Sat, 8 Feb 2014 16:12:52 -0500 Subject: [PATCH] Set canonical links in note contents #309 --- .../management/commands/add_canonical_link.py | 30 +++++++++++++ karmaworld/apps/notes/models.py | 45 ++++++++++++++++++- reqs/common.txt | 2 +- 3 files changed, 75 insertions(+), 2 deletions(-) create mode 100644 karmaworld/apps/notes/management/commands/add_canonical_link.py diff --git a/karmaworld/apps/notes/management/commands/add_canonical_link.py b/karmaworld/apps/notes/management/commands/add_canonical_link.py new file mode 100644 index 0000000..b543573 --- /dev/null +++ b/karmaworld/apps/notes/management/commands/add_canonical_link.py @@ -0,0 +1,30 @@ +#!/usr/bin/env python +# -*- coding:utf8 -*- +# Copyright (C) 2014 FinalsClub Foundation +from bs4 import BeautifulSoup +from django.core.management import BaseCommand +from karmaworld.apps.notes.models import Note +from karmaworld.secret.static_s3 import S3_URL +import requests + + +class Command(BaseCommand): + help = """ + Add a to every note stored in S3 + """ + + def handle(self, *args, **kwargs): + for note in Note.objects.all(): + note_path = 'http:' + S3_URL + note.get_relative_s3_path() + resp = requests.get(note_path) + if resp.status_code != 200: + print("Could not retrieve " + note_path) + continue + html = resp.text + + soup = BeautifulSoup(html) + soup = note.set_canonical_link(soup) + + note.update_note_on_s3(unicode(soup)) + print("Updated note " + unicode(note)) + diff --git a/karmaworld/apps/notes/models.py b/karmaworld/apps/notes/models.py index 83f00c3..3e58f48 100644 --- a/karmaworld/apps/notes/models.py +++ b/karmaworld/apps/notes/models.py @@ -11,6 +11,7 @@ import traceback import logging from allauth.account.signals import user_logged_in from django.contrib.auth.models import User +from django.contrib.sites.models import Site from django.utils.safestring import mark_safe from django.core.exceptions import ObjectDoesNotExist, MultipleObjectsReturned from django.core.files.storage import default_storage @@ -300,6 +301,20 @@ class Note(Document): if do_save: self.save() + def update_note_on_s3(self, html): + # do nothing if HTML is empty. + if not html or not len(html): + return + # if it's not already there then bail out + filepath = self.get_relative_s3_path() + if not default_storage.exists(filepath): + logger.warn("Cannot update note on S3, it does not exist already: " + unicode(self)) + return + + key = default_storage.bucket.get_key(filepath) + key.set_contents_from_string(html, headers=s3_upload_headers) + key.set_xml_acl(all_read_xml_acl) + def get_absolute_url(self): """ Resolve note url, use 'note' route and slug if slug otherwise use note.id @@ -324,7 +339,8 @@ class Note(Document): soup = BS(html) # Iterate through filters, applying all to the soup object. for soupfilter in ( - self.sanitize_anchor_html, + self.sanitize_anchor_html, + self.set_canonical_link, ): soup = soupfilter(soup) return str(soup) @@ -351,6 +367,33 @@ class Note(Document): # return filtered soup return soup + @staticmethod + def canonical_link_predicate(tag): + return tag.name == u'link' and \ + tag.has_attr('rel') and \ + u'canonical' in tag['rel'] + + def set_canonical_link(self, soup): + """ + Filter the given BeautifulSoup obj by adding + + to the document head. + Returns BeautifulSoup obj. + """ + domain = Site.objects.all()[0].domain + note_full_href = 'http://' + domain + self.get_absolute_url() + canonical_tags = soup.find_all(self.canonical_link_predicate) + if canonical_tags: + for tag in canonical_tags: + tag['href'] = note_full_href + else: + new_tag = soup.new_tag('link', rel='canonical', href=note_full_href) + head = soup.find('head') + head.append(new_tag) + + # return filtered soup + return soup + def _update_parent_updated_at(self): """ update the parent Course.updated_at model with the latest uploaded_at """ diff --git a/reqs/common.txt b/reqs/common.txt index 08ec9c8..9a058cf 100644 --- a/reqs/common.txt +++ b/reqs/common.txt @@ -9,7 +9,7 @@ urllib3==1.5 google-api-python-client==1.0 django-grappelli==2.4.8 git+https://github.com/FinalsClub/django-taggit.git -git+https://github.com/btbonval/django-filepicker +git+https://github.com/btbonval/django-filepicker.git filemagic==1.6 requests beautifulsoup4 -- 2.25.1