From: Bryan Date: Wed, 8 Jan 2014 05:30:39 +0000 (-0500) Subject: Reverting half-merged commits b6f336c6a13b2a41b2c29b35884277aea9daca50 and 3d4edeb156... X-Git-Tag: release-20150131~294 X-Git-Url: https://git.librecmc.org/?a=commitdiff_plain;h=dab4a237910ee521e5889c31925957e5660033f6;p=oweals%2Fkarmaworld.git Reverting half-merged commits b6f336c6a13b2a41b2c29b35884277aea9daca50 and 3d4edeb1565e41f692d0c45f064ce3df13e5f211 --- diff --git a/karmaworld/apps/courses/management/commands/fetch_usde_csv.py b/karmaworld/apps/courses/management/commands/fetch_usde_csv.py new file mode 100644 index 0000000..57f6415 --- /dev/null +++ b/karmaworld/apps/courses/management/commands/fetch_usde_csv.py @@ -0,0 +1,61 @@ +import csv +import requests +import itertools as it + +from bs4 import BeautifulSoup as BS +from urlparse import urljoin +from subprocess import call + +from django.core.management.base import BaseCommand +from karmaworld.apps.courses.models import School + +class Command(BaseCommand): + args = '' + USDE_LINK = 'http://ope.ed.gov/accreditation/GetDownloadFile.aspx' + help = (""" Downloads data from US Department of Education. + Supply a destination for the csv file to be written to. """) + + def handle(self, *args, **kwargs): + + if len(args) < 1: + self.stdout.write('Provide a filename to save csv data into.\n') + return + + filename = args[0] + + r = requests.get(self.USDE_LINK) + # Ensure the page was retrieved with 200 + if not r.ok: + r.raise_for_status() + + # Process the HTML with BeautifulSoup + soup = BS(r.text) + # Extract all the anchor links. + a = soup.find_all('a') + + # Extract the HREFs from anchors. + def get_href(anchor): + return anchor.get('href') + #a = map(get_href, a) + + # Filter out all but the Accreditation links. + def contains_accreditation(link): + return 'Accreditation' in link and 'zip' in link + #a = filter(contains_accreditation, a) + + # do the above stuff with itertools + a_iter = it.ifilter(contains_accreditation, it.imap(get_href, iter(a))) + + # Find the most recent. (Accreditation_YYYY_MM.zip means alphanumeric sort) + link = sorted(a_iter)[-1] + + # Ensure link is absolute not relative + link = urljoin(self.USDE_LINK, link) + + # Download the linked file to the FS and extract the CSV + tempfile = '/tmp/accreditation.zip' + call(['wget', '-O', tempfile, link]) + fd = open(filename, 'w') + call(['7z', 'e', "-i!*.csv", '-so', tempfile], stdout=fd) + fd.close() + call(['rm', tempfile]) diff --git a/karmaworld/apps/courses/management/commands/import_usde_csv.py b/karmaworld/apps/courses/management/commands/import_usde_csv.py new file mode 100644 index 0000000..8422624 --- /dev/null +++ b/karmaworld/apps/courses/management/commands/import_usde_csv.py @@ -0,0 +1,76 @@ +import csv +from itertools import izip + +from django.core.management.base import BaseCommand +from karmaworld.apps.courses.models import School + + +class Command(BaseCommand): + args = '' + help = ("""Import USDE csv file. add schools to the UsdeSchool model. + Assumes the following header: + Institution_ID,Institution_Name,Institution_Address,Institution_City,Institution_State,Institution_Zip,Institution_Phone,Institution_OPEID,Institution_IPEDS_UnitID,Institution_Web_Address,Campus_ID,Campus_Name,Campus_Address,Campus_City,Campus_State,Campus_Zip,Campus_IPEDS_UnitID,Accreditation_Type,Agency_Name,Agency_Status,Program_Name,Accreditation_Status,Accreditation_Date_Type,Periods,Last Action""" + ) + + def parse_school_csv(self, filename): + """parse a csv file, and return a list of dictionaries + """ + headers = False + schools = [] + + with open(filename) as f: + + reader = csv.reader(f) + headers = reader.next() + for row in reader: + schools.append(row) + + headers = [s.lower() for s in headers] + + return [ dict(izip(headers,school)) for school in schools ] + + def handle(self, *args, **kwargs): + + if len(args) < 1: + self.stdout.write('Provide a filename\n') + return + + filename = args[0] + + school_dicts = self.parse_school_csv(filename) + + self.stdout.write('Importing from list of %d schools\n' % len(school_dicts)) + + count = 0 + + for d in school_dicts: + + if 'institution_id' not in d or not d['institution_id']: + print d + raise Exception('Error: School does not have an institution_id!') + + try: + school = School.objects.get(usde_id=d['institution_id']) + + except School.DoesNotExist: + school = School() + #print d['institution_id'] + #print d['institution_name'] + count += 1 + + + school.name = d['institution_name'] + school.location = d['institution_city'] + ', ' + d['institution_state'] + school.url = d['institution_web_address'] + school.usde_id = d['institution_id'] + school.save() + + self.stdout.write('Imported %d NEW unique schools\n' % count) + + + + + + + + diff --git a/karmaworld/apps/courses/management/commands/sanitize_usde_schools.py b/karmaworld/apps/courses/management/commands/sanitize_usde_schools.py new file mode 100644 index 0000000..119e420 --- /dev/null +++ b/karmaworld/apps/courses/management/commands/sanitize_usde_schools.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python +# -*- coding:utf8 -*- +# Copyright (C) 2012 FinalsClub Foundation +""" A script to sanitize the imported USDE database. + It will remove schools who's name contains words + in the RESTRICTED_WORDS list """ + +from django.core.management.base import BaseCommand +from django.db.models import Q + +from karmaworld.apps.courses.models import School + +RESTRICTED_WORDS = [ + 'internship', + 'dietetic', + 'massage', + 'therapy', + 'residency', + 'months', + 'hair', + 'cosmetology', + 'beauty', + 'nail', + 'acupuncture', + 'chiropractic', + 'careers', + 'adults', + 'hospital', + 'childcare'] + + +class Command(BaseCommand): + """ Delete Schools that contain RESTRICTED WORDS in their names """ + args = 'none' + help = """ Delete Schools that contain RESTRICTED WORDS in their names """ + + def get_input(self, input_prompt): + """ Get user input with repeated requests on incorrect input """ + + y_n = raw_input(input_prompt) + y_n = y_n.replace(" ", "") # strip extra spaces + y_n = y_n.lower() + + if y_n == 'y': + return True + elif y_n == 'n': + return False + else: + error_prompt = "Valid responses are [yYnN]\n" + return self.get_input(error_prompt + input_prompt) + + + def handle(self, *args, **kwargs): + """ The function that gets called to run this command """ + # generate an |(or)'d list of queries searching inexact for each of RESTRICTED_WORDS + queries_list = map(lambda word: Q(name__icontains=word), RESTRICTED_WORDS) + queries_or = reduce(lambda a, b: a | b, queries_list) + schools = School.objects.filter(queries_or) + self._schools_count = schools.count() + + # if there are no schools, exit + if not self._schools_count: + self.stdout.write('\n') + self.stdout.write('There are no schools worth sanitizing.\n') + return False + + self.stdout.write(u"\n\nWARNING: Are you sure you want to delete these schools:\n") + for s in schools: + self.stdout.write('%s: %s' % (s.id, s.__unicode__())) + self.stdout.write('\n') + + if self.get_input("Do you want to delete these schools? [y/n] "): + self.stdout.write("...") + try: + schools.delete() + except: + self.stdout.write("that is too many to delete at once\n") + self.stdout.write("you are probabily using sqlite , doing them in batches\n") + for _i, a_school in enumerate(schools): + self.stdout.write("deleting %s of %s..." % (_i, self._schools_count)) + a_school.delete() + self.stdout.write("done\n") + self.stdout.write("...") + + self.stdout.write("all done!\n") + self.stdout.write("Deleted %s schools" % (self._schools_count)) diff --git a/karmaworld/apps/courses/models.py b/karmaworld/apps/courses/models.py index 60ceec1..3fdabfd 100644 --- a/karmaworld/apps/courses/models.py +++ b/karmaworld/apps/courses/models.py @@ -14,9 +14,62 @@ from django.db import models from django.template import defaultfilters from karmaworld.settings.manual_unique_together import auto_add_check_unique_together -from karmaworld.apps.schools.models import School -from karmaworld.apps.schools.models import Department -from karmaworld.apps.professors.models import Professor + +class School(models.Model): + """ A grouping that contains many courses """ + name = models.CharField(max_length=255) + slug = models.SlugField(max_length=150, null=True) + location = models.CharField(max_length=255, blank=True, null=True) + url = models.URLField(max_length=511, blank=True) + # Facebook keeps a unique identifier for all schools + facebook_id = models.BigIntegerField(blank=True, null=True) + # United States Department of Education institution_id + usde_id = models.BigIntegerField(blank=True, null=True) + file_count = models.IntegerField(default=0) + priority = models.BooleanField(default=0) + alias = models.CharField(max_length=255, null=True, blank=True) + hashtag = models.CharField(max_length=16, null=True, blank=True, unique=True, help_text='School abbreviation without #') + + class Meta: + """ Sort School by file_count descending, name abc=> """ + ordering = ['-file_count','-priority', 'name'] + + def __unicode__(self): + return self.name + + def save(self, *args, **kwargs): + """ Save school and generate a slug if one doesn't exist """ + if not self.slug: + self.slug = defaultfilters.slugify(self.name) + super(School, self).save(*args, **kwargs) + + @staticmethod + def autocomplete_search_fields(): + return ("name__icontains",) + + def update_note_count(self): + """ Update the School.file_count by summing the + contained course.file_count + """ + self.file_count = sum([course.file_count for course in self.course_set.all()]) + self.save() + + +class Department(models.Model): + """ Department within a School. """ + name = models.CharField(max_length=255) + school = models.ForeignKey(School) # Should this be optional ever? + slug = models.SlugField(max_length=150, null=True) + url = models.URLField(max_length=511, blank=True, null=True) + + def __unicode__(self): + return self.name + + def save(self, *args, **kwargs): + """ Save department and generate a slug if one doesn't exist """ + if not self.slug: + self.slug = defaultfilters.slugify(self.name) + super(Department, self).save(*args, **kwargs) class Professor(models.Model): diff --git a/karmaworld/apps/courses/views.py b/karmaworld/apps/courses/views.py index 2023dec..657d756 100644 --- a/karmaworld/apps/courses/views.py +++ b/karmaworld/apps/courses/views.py @@ -17,7 +17,7 @@ from django.views.generic.list import ListView from karmaworld.apps.courses.forms import CourseForm from karmaworld.apps.courses.models import Course -from karmaworld.apps.schools.models import School +from karmaworld.apps.courses.models import School from karmaworld.apps.notes.models import Note diff --git a/karmaworld/apps/document_upload/tests.py b/karmaworld/apps/document_upload/tests.py index 2097f59..843aa99 100644 --- a/karmaworld/apps/document_upload/tests.py +++ b/karmaworld/apps/document_upload/tests.py @@ -7,7 +7,7 @@ Replace this with more appropriate tests for your application. from django.test import TestCase, Client from karmaworld.apps.courses.models import Course -from karmaworld.apps.schools.models import School +from karmaworld.apps.courses.models import School from karmaworld.apps.document_upload.forms import RawDocumentForm from karmaworld.apps.notes.gdrive import * from karmaworld.apps.notes.models import Note diff --git a/karmaworld/apps/notes/tests.py b/karmaworld/apps/notes/tests.py index 8949fb6..8e6646d 100644 --- a/karmaworld/apps/notes/tests.py +++ b/karmaworld/apps/notes/tests.py @@ -17,7 +17,7 @@ from karmaworld.apps.notes.search import SearchIndex from karmaworld.apps.notes.models import Note from karmaworld.apps.courses.models import Course -from karmaworld.apps.schools.models import School +from karmaworld.apps.courses.models import School import indextank.client as itc class TestNoes(TestCase): diff --git a/karmaworld/apps/schools/__init__.py b/karmaworld/apps/schools/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/karmaworld/apps/schools/admin.py b/karmaworld/apps/schools/admin.py deleted file mode 100644 index 70271b5..0000000 --- a/karmaworld/apps/schools/admin.py +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env python -# -*- coding:utf8 -*- -# Copyright (C) 2012 FinalsClub Foundation -""" Administration configuration for notes """ - -from django.contrib import admin - -from karmaworld.apps.schools.models import School -from karmaworld.apps.schools.models import Department - -admin.site.register(School) -admin.site.register(Department) diff --git a/karmaworld/apps/schools/management/__init__.py b/karmaworld/apps/schools/management/__init__.py deleted file mode 100644 index 8b13789..0000000 --- a/karmaworld/apps/schools/management/__init__.py +++ /dev/null @@ -1 +0,0 @@ - diff --git a/karmaworld/apps/schools/management/commands/__init__.py b/karmaworld/apps/schools/management/commands/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/karmaworld/apps/schools/management/commands/fetch_usde_csv.py b/karmaworld/apps/schools/management/commands/fetch_usde_csv.py deleted file mode 100644 index 6d627f8..0000000 --- a/karmaworld/apps/schools/management/commands/fetch_usde_csv.py +++ /dev/null @@ -1,61 +0,0 @@ -import csv -import requests -import itertools as it - -from bs4 import BeautifulSoup as BS -from urlparse import urljoin -from subprocess import call - -from django.core.management.base import BaseCommand -from karmaworld.apps.schools.models import School - -class Command(BaseCommand): - args = '' - USDE_LINK = 'http://ope.ed.gov/accreditation/GetDownloadFile.aspx' - help = (""" Downloads data from US Department of Education. - Supply a destination for the csv file to be written to. """) - - def handle(self, *args, **kwargs): - - if len(args) < 1: - self.stdout.write('Provide a filename to save csv data into.\n') - return - - filename = args[0] - - r = requests.get(self.USDE_LINK) - # Ensure the page was retrieved with 200 - if not r.ok: - r.raise_for_status() - - # Process the HTML with BeautifulSoup - soup = BS(r.text) - # Extract all the anchor links. - a = soup.find_all('a') - - # Extract the HREFs from anchors. - def get_href(anchor): - return anchor.get('href') - #a = map(get_href, a) - - # Filter out all but the Accreditation links. - def contains_accreditation(link): - return 'Accreditation' in link and 'zip' in link - #a = filter(contains_accreditation, a) - - # do the above stuff with itertools - a_iter = it.ifilter(contains_accreditation, it.imap(get_href, iter(a))) - - # Find the most recent. (Accreditation_YYYY_MM.zip means alphanumeric sort) - link = sorted(a_iter)[-1] - - # Ensure link is absolute not relative - link = urljoin(self.USDE_LINK, link) - - # Download the linked file to the FS and extract the CSV - tempfile = '/tmp/accreditation.zip' - call(['wget', '-O', tempfile, link]) - fd = open(filename, 'w') - call(['7z', 'e', "-i!*.csv", '-so', tempfile], stdout=fd) - fd.close() - call(['rm', tempfile]) diff --git a/karmaworld/apps/schools/management/commands/import_usde_csv.py b/karmaworld/apps/schools/management/commands/import_usde_csv.py deleted file mode 100644 index 87f398c..0000000 --- a/karmaworld/apps/schools/management/commands/import_usde_csv.py +++ /dev/null @@ -1,76 +0,0 @@ -import csv -from itertools import izip - -from django.core.management.base import BaseCommand -from karmaworld.apps.schools.models import School - - -class Command(BaseCommand): - args = '' - help = ("""Import USDE csv file. add schools to the UsdeSchool model. - Assumes the following header: - Institution_ID,Institution_Name,Institution_Address,Institution_City,Institution_State,Institution_Zip,Institution_Phone,Institution_OPEID,Institution_IPEDS_UnitID,Institution_Web_Address,Campus_ID,Campus_Name,Campus_Address,Campus_City,Campus_State,Campus_Zip,Campus_IPEDS_UnitID,Accreditation_Type,Agency_Name,Agency_Status,Program_Name,Accreditation_Status,Accreditation_Date_Type,Periods,Last Action""" - ) - - def parse_school_csv(self, filename): - """parse a csv file, and return a list of dictionaries - """ - headers = False - schools = [] - - with open(filename) as f: - - reader = csv.reader(f) - headers = reader.next() - for row in reader: - schools.append(row) - - headers = [s.lower() for s in headers] - - return [ dict(izip(headers,school)) for school in schools ] - - def handle(self, *args, **kwargs): - - if len(args) < 1: - self.stdout.write('Provide a filename\n') - return - - filename = args[0] - - school_dicts = self.parse_school_csv(filename) - - self.stdout.write('Importing from list of %d schools\n' % len(school_dicts)) - - count = 0 - - for d in school_dicts: - - if 'institution_id' not in d or not d['institution_id']: - print d - raise Exception('Error: School does not have an institution_id!') - - try: - school = School.objects.get(usde_id=d['institution_id']) - - except School.DoesNotExist: - school = School() - #print d['institution_id'] - #print d['institution_name'] - count += 1 - - - school.name = d['institution_name'] - school.location = d['institution_city'] + ', ' + d['institution_state'] - school.url = d['institution_web_address'] - school.usde_id = d['institution_id'] - school.save() - - self.stdout.write('Imported %d NEW unique schools\n' % count) - - - - - - - - diff --git a/karmaworld/apps/schools/management/commands/sanitize_usde_schools.py b/karmaworld/apps/schools/management/commands/sanitize_usde_schools.py deleted file mode 100644 index 8639832..0000000 --- a/karmaworld/apps/schools/management/commands/sanitize_usde_schools.py +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env python -# -*- coding:utf8 -*- -# Copyright (C) 2012 FinalsClub Foundation -""" A script to sanitize the imported USDE database. - It will remove schools who's name contains words - in the RESTRICTED_WORDS list """ - -from django.core.management.base import BaseCommand -from django.db.models import Q - -from karmaworld.apps.schools.models import School - -RESTRICTED_WORDS = [ - 'internship', - 'dietetic', - 'massage', - 'therapy', - 'residency', - 'months', - 'hair', - 'cosmetology', - 'beauty', - 'nail', - 'acupuncture', - 'chiropractic', - 'careers', - 'adults', - 'hospital', - 'childcare'] - - -class Command(BaseCommand): - """ Delete Schools that contain RESTRICTED WORDS in their names """ - args = 'none' - help = """ Delete Schools that contain RESTRICTED WORDS in their names """ - - def get_input(self, input_prompt): - """ Get user input with repeated requests on incorrect input """ - - y_n = raw_input(input_prompt) - y_n = y_n.replace(" ", "") # strip extra spaces - y_n = y_n.lower() - - if y_n == 'y': - return True - elif y_n == 'n': - return False - else: - error_prompt = "Valid responses are [yYnN]\n" - return self.get_input(error_prompt + input_prompt) - - - def handle(self, *args, **kwargs): - """ The function that gets called to run this command """ - # generate an |(or)'d list of queries searching inexact for each of RESTRICTED_WORDS - queries_list = map(lambda word: Q(name__icontains=word), RESTRICTED_WORDS) - queries_or = reduce(lambda a, b: a | b, queries_list) - schools = School.objects.filter(queries_or) - self._schools_count = schools.count() - - # if there are no schools, exit - if not self._schools_count: - self.stdout.write('\n') - self.stdout.write('There are no schools worth sanitizing.\n') - return False - - self.stdout.write(u"\n\nWARNING: Are you sure you want to delete these schools:\n") - for s in schools: - self.stdout.write('%s: %s' % (s.id, s.__unicode__())) - self.stdout.write('\n') - - if self.get_input("Do you want to delete these schools? [y/n] "): - self.stdout.write("...") - try: - schools.delete() - except: - self.stdout.write("that is too many to delete at once\n") - self.stdout.write("you are probabily using sqlite , doing them in batches\n") - for _i, a_school in enumerate(schools): - self.stdout.write("deleting %s of %s..." % (_i, self._schools_count)) - a_school.delete() - self.stdout.write("done\n") - self.stdout.write("...") - - self.stdout.write("all done!\n") - self.stdout.write("Deleted %s schools" % (self._schools_count)) diff --git a/karmaworld/apps/schools/models.py b/karmaworld/apps/schools/models.py deleted file mode 100644 index c7b13af..0000000 --- a/karmaworld/apps/schools/models.py +++ /dev/null @@ -1,70 +0,0 @@ -#!/usr/bin/env python -# -*- coding:utf8 -*- -# Copyright (C) 2012 FinalsClub Foundation - -""" - Models for schools. - Handles schools and departments. -""" -import datetime - -from django.db import models -from django.template import defaultfilters - - -class School(models.Model): - """ A grouping that contains many courses """ - name = models.CharField(max_length=255) - slug = models.SlugField(max_length=150, null=True) - location = models.CharField(max_length=255, blank=True, null=True) - url = models.URLField(max_length=511, blank=True) - # Facebook keeps a unique identifier for all schools - facebook_id = models.BigIntegerField(blank=True, null=True) - # United States Department of Education institution_id - usde_id = models.BigIntegerField(blank=True, null=True) - file_count = models.IntegerField(default=0) - priority = models.BooleanField(default=0) - alias = models.CharField(max_length=255, null=True, blank=True) - hashtag = models.CharField(max_length=16, null=True, blank=True, unique=True, help_text='School abbreviation without #') - - class Meta: - """ Sort School by file_count descending, name abc=> """ - ordering = ['-file_count','-priority', 'name'] - - - def __unicode__(self): - return self.name - - def save(self, *args, **kwargs): - """ Save school and generate a slug if one doesn't exist """ - if not self.slug: - self.slug = defaultfilters.slugify(self.name) - super(School, self).save(*args, **kwargs) - - @staticmethod - def autocomplete_search_fields(): - return ("name__icontains",) - - def update_note_count(self): - """ Update the School.file_count by summing the - contained course.file_count - """ - self.file_count = sum([course.file_count for course in self.course_set.all()]) - self.save() - - -class Department(models.Model): - """ Department within a School. """ - name = models.CharField(max_length=255) - school = models.ForeignKey(School) # Should this be optional ever? - slug = models.SlugField(max_length=150, null=True) - url = models.URLField(max_length=511, blank=True, null=True) - - def __unicode__(self): - return self.name - - def save(self, *args, **kwargs): - """ Save department and generate a slug if one doesn't exist """ - if not self.slug: - self.slug = defaultfilters.slugify(self.name) - super(Department, self).save(*args, **kwargs) diff --git a/karmaworld/settings/common.py b/karmaworld/settings/common.py index 963506f..bfc6fb1 100644 --- a/karmaworld/settings/common.py +++ b/karmaworld/settings/common.py @@ -216,7 +216,6 @@ LOCAL_APPS = ( 'karmaworld.apps.users', 'karmaworld.apps.moderation', 'karmaworld.apps.licenses', - 'karmaworld.apps.schools', ) # See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps