automating USDE course synchronization for #195, but a bit of swing and miss. fabfile...

author Bryan <btbonval@gmail.com>

Thu, 12 Dec 2013 01:20:26 +0000 (20:20 -0500)

committer Bryan <btbonval@gmail.com>

Thu, 12 Dec 2013 01:20:26 +0000 (20:20 -0500)
author Bryan <btbonval@gmail.com>
Thu, 12 Dec 2013 01:20:26 +0000 (20:20 -0500)
committer Bryan <btbonval@gmail.com>
Thu, 12 Dec 2013 01:20:26 +0000 (20:20 -0500)
diff --git a/Vagrantfile b/Vagrantfile

index f64610f128537f58515e0c18df87cc8507ec8243..57a9e9892d79e5b4f6b555035ec68dbe93deb8ca 100644 (file)
--- a/Vagrantfile
+++ b/Vagrantfile
@@ -41,7 +41,8 @@ apt-get update
  apt-get upgrade -y
  apt-get install -y python-pip postgresql python-virtualenv virtualenvwrapper \
                     git nginx postgresql-server-dev-9.1 libxslt1-dev \
-                   libxml2-dev libmemcached-dev python-dev rabbitmq-server
+                   libxml2-dev libmemcached-dev python-dev rabbitmq-server \
+                   p7zip-full
  
  echo "CREATE USER vagrant WITH CREATEROLE LOGIN; CREATE DATABASE karmaworld OWNER vagrant;" | su postgres -c "psql"
  
diff --git a/fabfile.py b/fabfile.py

index 0b84a565f3ce4f84353df50f04cdab676cc40248..18ad0150f7f36ea48f7efcc95abbbbc74143d0ae 100644 (file)
--- a/fabfile.py
+++ b/fabfile.py
@@ -3,8 +3,10 @@
      Finals Club (c) 2013"""
  
  import os
+import requests
  import ConfigParser
  
+from bs4 import BeautifulSoup as BS
  from fabric.api import cd, env, lcd, prefix, run, sudo, task, local, settings
  from fabric.contrib import files
  
@@ -21,6 +23,8 @@ env.supervisor_conf = '{0}/confs/{1}/supervisord.conf'.format(env.code_root, env
  
  env.use_ssh_config = True
  
+USDE_LINK = "http://ope.ed.gov/accreditation/GetDownloadFile.aspx"
+
  ######## Define host(s)
  def here():
      """
@@ -264,6 +268,38 @@ def check_secrets():
      if errors:
          raise Exception('\n'.join(errors))
  
+@task
+def fetch_accreditation():
+    """
+    Connects to USDE accreditation and drops a CSV into confs.
+    """
+    r = requests.get(USDE_LINK)
+    # Ensure the page was retrieved with 200
+    if not r.ok:
+        r.raise_for_status()
+
+    # Process the HTML with BeautifulSoup
+    soup = BS(r.text)
+    # Extract all the anchor links.
+    a = soup.find_all('a')
+    # TODO maybe hit up itertools for speed? Probably.
+    # Extract the HREFs from anchors.
+    def get_href(anchor):
+        return anchor.get('href')
+    a = map(get_href, a)
+    # Filter out all but the Accreditation links.
+    def contains_accreditation(link):
+        return 'Accreditation' in link and 'zip' in link
+    a = filter(contains_accreditation, a)      
+    # Find the most recent. (Accreditation_YYYY_MM.zip means alphanumeric sort)
+    link = sorted(a)[-1]
+
+    # Download the linked file to the FS and extract the CSV
+    tempfile = '/tmp/accreditation.zip'
+    csvfile = env.proj_root + '/confs/accreditation.csv'
+    run('wget -B {0} -O {1} {2}'.format(USDE_LINK, tempfile, link))
+    run("7z e -i'!*.csv' -so {0} >> {1}".format(tempfile, csvfile))
+
  @task
  def first_deploy():
      """
diff --git a/reqs/common.txt b/reqs/common.txt

index 9a13af16f331eafb30798d15b841440203e95a74..0c215a43bf072d60f2ba81830b43d7612eec7b3b 100644 (file)
--- a/reqs/common.txt
+++ b/reqs/common.txt
@@ -13,3 +13,5 @@ git+https://github.com/FinalsClub/django-taggit.git
  django-filepicker==0.1.5
  filemagic==1.6
  fabric-virtualenv
+requests
+beautifulsoup4
author	Bryan <btbonval@gmail.com>
	Thu, 12 Dec 2013 01:20:26 +0000 (20:20 -0500)
committer	Bryan <btbonval@gmail.com>
	Thu, 12 Dec 2013 01:20:26 +0000 (20:20 -0500)
Vagrantfile		patch \| blob \| history
fabfile.py		patch \| blob \| history
reqs/common.txt		patch \| blob \| history