From: Bryan Date: Thu, 12 Dec 2013 01:20:26 +0000 (-0500) Subject: automating USDE course synchronization for #195, but a bit of swing and miss. fabfile... X-Git-Tag: release-20150131~386^2~15 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=20d6e86d1f4b480fea6a839608359dad35e84dab;p=oweals%2Fkarmaworld.git automating USDE course synchronization for #195, but a bit of swing and miss. fabfile modification should be moved under Django's manage.py --- diff --git a/Vagrantfile b/Vagrantfile index f64610f..57a9e98 100644 --- a/Vagrantfile +++ b/Vagrantfile @@ -41,7 +41,8 @@ apt-get update apt-get upgrade -y apt-get install -y python-pip postgresql python-virtualenv virtualenvwrapper \ git nginx postgresql-server-dev-9.1 libxslt1-dev \ - libxml2-dev libmemcached-dev python-dev rabbitmq-server + libxml2-dev libmemcached-dev python-dev rabbitmq-server \ + p7zip-full echo "CREATE USER vagrant WITH CREATEROLE LOGIN; CREATE DATABASE karmaworld OWNER vagrant;" | su postgres -c "psql" diff --git a/fabfile.py b/fabfile.py index 0b84a56..18ad015 100644 --- a/fabfile.py +++ b/fabfile.py @@ -3,8 +3,10 @@ Finals Club (c) 2013""" import os +import requests import ConfigParser +from bs4 import BeautifulSoup as BS from fabric.api import cd, env, lcd, prefix, run, sudo, task, local, settings from fabric.contrib import files @@ -21,6 +23,8 @@ env.supervisor_conf = '{0}/confs/{1}/supervisord.conf'.format(env.code_root, env env.use_ssh_config = True +USDE_LINK = "http://ope.ed.gov/accreditation/GetDownloadFile.aspx" + ######## Define host(s) def here(): """ @@ -264,6 +268,38 @@ def check_secrets(): if errors: raise Exception('\n'.join(errors)) +@task +def fetch_accreditation(): + """ + Connects to USDE accreditation and drops a CSV into confs. + """ + r = requests.get(USDE_LINK) + # Ensure the page was retrieved with 200 + if not r.ok: + r.raise_for_status() + + # Process the HTML with BeautifulSoup + soup = BS(r.text) + # Extract all the anchor links. + a = soup.find_all('a') + # TODO maybe hit up itertools for speed? Probably. + # Extract the HREFs from anchors. + def get_href(anchor): + return anchor.get('href') + a = map(get_href, a) + # Filter out all but the Accreditation links. + def contains_accreditation(link): + return 'Accreditation' in link and 'zip' in link + a = filter(contains_accreditation, a) + # Find the most recent. (Accreditation_YYYY_MM.zip means alphanumeric sort) + link = sorted(a)[-1] + + # Download the linked file to the FS and extract the CSV + tempfile = '/tmp/accreditation.zip' + csvfile = env.proj_root + '/confs/accreditation.csv' + run('wget -B {0} -O {1} {2}'.format(USDE_LINK, tempfile, link)) + run("7z e -i'!*.csv' -so {0} >> {1}".format(tempfile, csvfile)) + @task def first_deploy(): """ diff --git a/reqs/common.txt b/reqs/common.txt index 9a13af1..0c215a4 100644 --- a/reqs/common.txt +++ b/reqs/common.txt @@ -13,3 +13,5 @@ git+https://github.com/FinalsClub/django-taggit.git django-filepicker==0.1.5 filemagic==1.6 fabric-virtualenv +requests +beautifulsoup4