--- /dev/null
+import csv
+import requests
+import itertools as it
+
+from bs4 import BeautifulSoup as BS
+from urlparse import urljoin
+from subprocess import call
+
+from django.core.management.base import BaseCommand
+from karmaworld.apps.courses.models import School
+
+class Command(BaseCommand):
+ args = '<destination>'
+ USDE_LINK = 'http://ope.ed.gov/accreditation/GetDownloadFile.aspx'
+ help = (""" Downloads data from US Department of Education.
+ Supply a destination for the csv file to be written to. """)
+
+ def handle(self, *args, **kwargs):
+
+ if len(args) < 1:
+ self.stdout.write('Provide a filename to save csv data into.\n')
+ return
+
+ filename = args[0]
+
+ r = requests.get(self.USDE_LINK)
+ # Ensure the page was retrieved with 200
+ if not r.ok:
+ r.raise_for_status()
+
+ # Process the HTML with BeautifulSoup
+ soup = BS(r.text)
+ # Extract all the anchor links.
+ a = soup.find_all('a')
+
+ # Extract the HREFs from anchors.
+ def get_href(anchor):
+ return anchor.get('href')
+ #a = map(get_href, a)
+
+ # Filter out all but the Accreditation links.
+ def contains_accreditation(link):
+ return 'Accreditation' in link and 'zip' in link
+ #a = filter(contains_accreditation, a)
+
+ # do the above stuff with itertools
+ a_iter = it.ifilter(contains_accreditation, it.imap(get_href, iter(a)))
+
+ # Find the most recent. (Accreditation_YYYY_MM.zip means alphanumeric sort)
+ link = sorted(a_iter)[-1]
+
+ # Ensure link is absolute not relative
+ link = urljoin(self.USDE_LINK, link)
+
+ # Download the linked file to the FS and extract the CSV
+ tempfile = '/tmp/accreditation.zip'
+ call(['wget', '-O', tempfile, link])
+ fd = open(filename, 'w')
+ call(['7z', 'e', "-i!*.csv", '-so', tempfile], stdout=fd)
+ fd.close()
+ call(['rm', tempfile])
--- /dev/null
+import csv
+from itertools import izip
+
+from django.core.management.base import BaseCommand
+from karmaworld.apps.courses.models import School
+
+
+class Command(BaseCommand):
+ args = '<filename>'
+ help = ("""Import USDE csv file. add schools to the UsdeSchool model.
+ Assumes the following header:
+ Institution_ID,Institution_Name,Institution_Address,Institution_City,Institution_State,Institution_Zip,Institution_Phone,Institution_OPEID,Institution_IPEDS_UnitID,Institution_Web_Address,Campus_ID,Campus_Name,Campus_Address,Campus_City,Campus_State,Campus_Zip,Campus_IPEDS_UnitID,Accreditation_Type,Agency_Name,Agency_Status,Program_Name,Accreditation_Status,Accreditation_Date_Type,Periods,Last Action"""
+ )
+
+ def parse_school_csv(self, filename):
+ """parse a csv file, and return a list of dictionaries
+ """
+ headers = False
+ schools = []
+
+ with open(filename) as f:
+
+ reader = csv.reader(f)
+ headers = reader.next()
+ for row in reader:
+ schools.append(row)
+
+ headers = [s.lower() for s in headers]
+
+ return [ dict(izip(headers,school)) for school in schools ]
+
+ def handle(self, *args, **kwargs):
+
+ if len(args) < 1:
+ self.stdout.write('Provide a filename\n')
+ return
+
+ filename = args[0]
+
+ school_dicts = self.parse_school_csv(filename)
+
+ self.stdout.write('Importing from list of %d schools\n' % len(school_dicts))
+
+ count = 0
+
+ for d in school_dicts:
+
+ if 'institution_id' not in d or not d['institution_id']:
+ print d
+ raise Exception('Error: School does not have an institution_id!')
+
+ try:
+ school = School.objects.get(usde_id=d['institution_id'])
+
+ except School.DoesNotExist:
+ school = School()
+ #print d['institution_id']
+ #print d['institution_name']
+ count += 1
+
+
+ school.name = d['institution_name']
+ school.location = d['institution_city'] + ', ' + d['institution_state']
+ school.url = d['institution_web_address']
+ school.usde_id = d['institution_id']
+ school.save()
+
+ self.stdout.write('Imported %d NEW unique schools\n' % count)
+
+
+
+
+
+
+
+
--- /dev/null
+#!/usr/bin/env python
+# -*- coding:utf8 -*-
+# Copyright (C) 2012 FinalsClub Foundation
+""" A script to sanitize the imported USDE database.
+ It will remove schools who's name contains words
+ in the RESTRICTED_WORDS list """
+
+from django.core.management.base import BaseCommand
+from django.db.models import Q
+
+from karmaworld.apps.courses.models import School
+
+RESTRICTED_WORDS = [
+ 'internship',
+ 'dietetic',
+ 'massage',
+ 'therapy',
+ 'residency',
+ 'months',
+ 'hair',
+ 'cosmetology',
+ 'beauty',
+ 'nail',
+ 'acupuncture',
+ 'chiropractic',
+ 'careers',
+ 'adults',
+ 'hospital',
+ 'childcare']
+
+
+class Command(BaseCommand):
+ """ Delete Schools that contain RESTRICTED WORDS in their names """
+ args = 'none'
+ help = """ Delete Schools that contain RESTRICTED WORDS in their names """
+
+ def get_input(self, input_prompt):
+ """ Get user input with repeated requests on incorrect input """
+
+ y_n = raw_input(input_prompt)
+ y_n = y_n.replace(" ", "") # strip extra spaces
+ y_n = y_n.lower()
+
+ if y_n == 'y':
+ return True
+ elif y_n == 'n':
+ return False
+ else:
+ error_prompt = "Valid responses are [yYnN]\n"
+ return self.get_input(error_prompt + input_prompt)
+
+
+ def handle(self, *args, **kwargs):
+ """ The function that gets called to run this command """
+ # generate an |(or)'d list of queries searching inexact for each of RESTRICTED_WORDS
+ queries_list = map(lambda word: Q(name__icontains=word), RESTRICTED_WORDS)
+ queries_or = reduce(lambda a, b: a | b, queries_list)
+ schools = School.objects.filter(queries_or)
+ self._schools_count = schools.count()
+
+ # if there are no schools, exit
+ if not self._schools_count:
+ self.stdout.write('\n')
+ self.stdout.write('There are no schools worth sanitizing.\n')
+ return False
+
+ self.stdout.write(u"\n\nWARNING: Are you sure you want to delete these schools:\n")
+ for s in schools:
+ self.stdout.write('%s: %s' % (s.id, s.__unicode__()))
+ self.stdout.write('\n')
+
+ if self.get_input("Do you want to delete these schools? [y/n] "):
+ self.stdout.write("...")
+ try:
+ schools.delete()
+ except:
+ self.stdout.write("that is too many to delete at once\n")
+ self.stdout.write("you are probabily using sqlite , doing them in batches\n")
+ for _i, a_school in enumerate(schools):
+ self.stdout.write("deleting %s of %s..." % (_i, self._schools_count))
+ a_school.delete()
+ self.stdout.write("done\n")
+ self.stdout.write("...")
+
+ self.stdout.write("all done!\n")
+ self.stdout.write("Deleted %s schools" % (self._schools_count))
from django.template import defaultfilters
from karmaworld.settings.manual_unique_together import auto_add_check_unique_together
-from karmaworld.apps.schools.models import School
-from karmaworld.apps.schools.models import Department
-from karmaworld.apps.professors.models import Professor
+
+class School(models.Model):
+ """ A grouping that contains many courses """
+ name = models.CharField(max_length=255)
+ slug = models.SlugField(max_length=150, null=True)
+ location = models.CharField(max_length=255, blank=True, null=True)
+ url = models.URLField(max_length=511, blank=True)
+ # Facebook keeps a unique identifier for all schools
+ facebook_id = models.BigIntegerField(blank=True, null=True)
+ # United States Department of Education institution_id
+ usde_id = models.BigIntegerField(blank=True, null=True)
+ file_count = models.IntegerField(default=0)
+ priority = models.BooleanField(default=0)
+ alias = models.CharField(max_length=255, null=True, blank=True)
+ hashtag = models.CharField(max_length=16, null=True, blank=True, unique=True, help_text='School abbreviation without #')
+
+ class Meta:
+ """ Sort School by file_count descending, name abc=> """
+ ordering = ['-file_count','-priority', 'name']
+
+ def __unicode__(self):
+ return self.name
+
+ def save(self, *args, **kwargs):
+ """ Save school and generate a slug if one doesn't exist """
+ if not self.slug:
+ self.slug = defaultfilters.slugify(self.name)
+ super(School, self).save(*args, **kwargs)
+
+ @staticmethod
+ def autocomplete_search_fields():
+ return ("name__icontains",)
+
+ def update_note_count(self):
+ """ Update the School.file_count by summing the
+ contained course.file_count
+ """
+ self.file_count = sum([course.file_count for course in self.course_set.all()])
+ self.save()
+
+
+class Department(models.Model):
+ """ Department within a School. """
+ name = models.CharField(max_length=255)
+ school = models.ForeignKey(School) # Should this be optional ever?
+ slug = models.SlugField(max_length=150, null=True)
+ url = models.URLField(max_length=511, blank=True, null=True)
+
+ def __unicode__(self):
+ return self.name
+
+ def save(self, *args, **kwargs):
+ """ Save department and generate a slug if one doesn't exist """
+ if not self.slug:
+ self.slug = defaultfilters.slugify(self.name)
+ super(Department, self).save(*args, **kwargs)
class Professor(models.Model):
from karmaworld.apps.courses.forms import CourseForm
from karmaworld.apps.courses.models import Course
-from karmaworld.apps.schools.models import School
+from karmaworld.apps.courses.models import School
from karmaworld.apps.notes.models import Note
from django.test import TestCase, Client
from karmaworld.apps.courses.models import Course
-from karmaworld.apps.schools.models import School
+from karmaworld.apps.courses.models import School
from karmaworld.apps.document_upload.forms import RawDocumentForm
from karmaworld.apps.notes.gdrive import *
from karmaworld.apps.notes.models import Note
from karmaworld.apps.notes.models import Note
from karmaworld.apps.courses.models import Course
-from karmaworld.apps.schools.models import School
+from karmaworld.apps.courses.models import School
import indextank.client as itc
class TestNoes(TestCase):
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding:utf8 -*-
-# Copyright (C) 2012 FinalsClub Foundation
-""" Administration configuration for notes """
-
-from django.contrib import admin
-
-from karmaworld.apps.schools.models import School
-from karmaworld.apps.schools.models import Department
-
-admin.site.register(School)
-admin.site.register(Department)
+++ /dev/null
-import csv
-import requests
-import itertools as it
-
-from bs4 import BeautifulSoup as BS
-from urlparse import urljoin
-from subprocess import call
-
-from django.core.management.base import BaseCommand
-from karmaworld.apps.schools.models import School
-
-class Command(BaseCommand):
- args = '<destination>'
- USDE_LINK = 'http://ope.ed.gov/accreditation/GetDownloadFile.aspx'
- help = (""" Downloads data from US Department of Education.
- Supply a destination for the csv file to be written to. """)
-
- def handle(self, *args, **kwargs):
-
- if len(args) < 1:
- self.stdout.write('Provide a filename to save csv data into.\n')
- return
-
- filename = args[0]
-
- r = requests.get(self.USDE_LINK)
- # Ensure the page was retrieved with 200
- if not r.ok:
- r.raise_for_status()
-
- # Process the HTML with BeautifulSoup
- soup = BS(r.text)
- # Extract all the anchor links.
- a = soup.find_all('a')
-
- # Extract the HREFs from anchors.
- def get_href(anchor):
- return anchor.get('href')
- #a = map(get_href, a)
-
- # Filter out all but the Accreditation links.
- def contains_accreditation(link):
- return 'Accreditation' in link and 'zip' in link
- #a = filter(contains_accreditation, a)
-
- # do the above stuff with itertools
- a_iter = it.ifilter(contains_accreditation, it.imap(get_href, iter(a)))
-
- # Find the most recent. (Accreditation_YYYY_MM.zip means alphanumeric sort)
- link = sorted(a_iter)[-1]
-
- # Ensure link is absolute not relative
- link = urljoin(self.USDE_LINK, link)
-
- # Download the linked file to the FS and extract the CSV
- tempfile = '/tmp/accreditation.zip'
- call(['wget', '-O', tempfile, link])
- fd = open(filename, 'w')
- call(['7z', 'e', "-i!*.csv", '-so', tempfile], stdout=fd)
- fd.close()
- call(['rm', tempfile])
+++ /dev/null
-import csv
-from itertools import izip
-
-from django.core.management.base import BaseCommand
-from karmaworld.apps.schools.models import School
-
-
-class Command(BaseCommand):
- args = '<filename>'
- help = ("""Import USDE csv file. add schools to the UsdeSchool model.
- Assumes the following header:
- Institution_ID,Institution_Name,Institution_Address,Institution_City,Institution_State,Institution_Zip,Institution_Phone,Institution_OPEID,Institution_IPEDS_UnitID,Institution_Web_Address,Campus_ID,Campus_Name,Campus_Address,Campus_City,Campus_State,Campus_Zip,Campus_IPEDS_UnitID,Accreditation_Type,Agency_Name,Agency_Status,Program_Name,Accreditation_Status,Accreditation_Date_Type,Periods,Last Action"""
- )
-
- def parse_school_csv(self, filename):
- """parse a csv file, and return a list of dictionaries
- """
- headers = False
- schools = []
-
- with open(filename) as f:
-
- reader = csv.reader(f)
- headers = reader.next()
- for row in reader:
- schools.append(row)
-
- headers = [s.lower() for s in headers]
-
- return [ dict(izip(headers,school)) for school in schools ]
-
- def handle(self, *args, **kwargs):
-
- if len(args) < 1:
- self.stdout.write('Provide a filename\n')
- return
-
- filename = args[0]
-
- school_dicts = self.parse_school_csv(filename)
-
- self.stdout.write('Importing from list of %d schools\n' % len(school_dicts))
-
- count = 0
-
- for d in school_dicts:
-
- if 'institution_id' not in d or not d['institution_id']:
- print d
- raise Exception('Error: School does not have an institution_id!')
-
- try:
- school = School.objects.get(usde_id=d['institution_id'])
-
- except School.DoesNotExist:
- school = School()
- #print d['institution_id']
- #print d['institution_name']
- count += 1
-
-
- school.name = d['institution_name']
- school.location = d['institution_city'] + ', ' + d['institution_state']
- school.url = d['institution_web_address']
- school.usde_id = d['institution_id']
- school.save()
-
- self.stdout.write('Imported %d NEW unique schools\n' % count)
-
-
-
-
-
-
-
-
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding:utf8 -*-
-# Copyright (C) 2012 FinalsClub Foundation
-""" A script to sanitize the imported USDE database.
- It will remove schools who's name contains words
- in the RESTRICTED_WORDS list """
-
-from django.core.management.base import BaseCommand
-from django.db.models import Q
-
-from karmaworld.apps.schools.models import School
-
-RESTRICTED_WORDS = [
- 'internship',
- 'dietetic',
- 'massage',
- 'therapy',
- 'residency',
- 'months',
- 'hair',
- 'cosmetology',
- 'beauty',
- 'nail',
- 'acupuncture',
- 'chiropractic',
- 'careers',
- 'adults',
- 'hospital',
- 'childcare']
-
-
-class Command(BaseCommand):
- """ Delete Schools that contain RESTRICTED WORDS in their names """
- args = 'none'
- help = """ Delete Schools that contain RESTRICTED WORDS in their names """
-
- def get_input(self, input_prompt):
- """ Get user input with repeated requests on incorrect input """
-
- y_n = raw_input(input_prompt)
- y_n = y_n.replace(" ", "") # strip extra spaces
- y_n = y_n.lower()
-
- if y_n == 'y':
- return True
- elif y_n == 'n':
- return False
- else:
- error_prompt = "Valid responses are [yYnN]\n"
- return self.get_input(error_prompt + input_prompt)
-
-
- def handle(self, *args, **kwargs):
- """ The function that gets called to run this command """
- # generate an |(or)'d list of queries searching inexact for each of RESTRICTED_WORDS
- queries_list = map(lambda word: Q(name__icontains=word), RESTRICTED_WORDS)
- queries_or = reduce(lambda a, b: a | b, queries_list)
- schools = School.objects.filter(queries_or)
- self._schools_count = schools.count()
-
- # if there are no schools, exit
- if not self._schools_count:
- self.stdout.write('\n')
- self.stdout.write('There are no schools worth sanitizing.\n')
- return False
-
- self.stdout.write(u"\n\nWARNING: Are you sure you want to delete these schools:\n")
- for s in schools:
- self.stdout.write('%s: %s' % (s.id, s.__unicode__()))
- self.stdout.write('\n')
-
- if self.get_input("Do you want to delete these schools? [y/n] "):
- self.stdout.write("...")
- try:
- schools.delete()
- except:
- self.stdout.write("that is too many to delete at once\n")
- self.stdout.write("you are probabily using sqlite , doing them in batches\n")
- for _i, a_school in enumerate(schools):
- self.stdout.write("deleting %s of %s..." % (_i, self._schools_count))
- a_school.delete()
- self.stdout.write("done\n")
- self.stdout.write("...")
-
- self.stdout.write("all done!\n")
- self.stdout.write("Deleted %s schools" % (self._schools_count))
+++ /dev/null
-#!/usr/bin/env python
-# -*- coding:utf8 -*-
-# Copyright (C) 2012 FinalsClub Foundation
-
-"""
- Models for schools.
- Handles schools and departments.
-"""
-import datetime
-
-from django.db import models
-from django.template import defaultfilters
-
-
-class School(models.Model):
- """ A grouping that contains many courses """
- name = models.CharField(max_length=255)
- slug = models.SlugField(max_length=150, null=True)
- location = models.CharField(max_length=255, blank=True, null=True)
- url = models.URLField(max_length=511, blank=True)
- # Facebook keeps a unique identifier for all schools
- facebook_id = models.BigIntegerField(blank=True, null=True)
- # United States Department of Education institution_id
- usde_id = models.BigIntegerField(blank=True, null=True)
- file_count = models.IntegerField(default=0)
- priority = models.BooleanField(default=0)
- alias = models.CharField(max_length=255, null=True, blank=True)
- hashtag = models.CharField(max_length=16, null=True, blank=True, unique=True, help_text='School abbreviation without #')
-
- class Meta:
- """ Sort School by file_count descending, name abc=> """
- ordering = ['-file_count','-priority', 'name']
-
-
- def __unicode__(self):
- return self.name
-
- def save(self, *args, **kwargs):
- """ Save school and generate a slug if one doesn't exist """
- if not self.slug:
- self.slug = defaultfilters.slugify(self.name)
- super(School, self).save(*args, **kwargs)
-
- @staticmethod
- def autocomplete_search_fields():
- return ("name__icontains",)
-
- def update_note_count(self):
- """ Update the School.file_count by summing the
- contained course.file_count
- """
- self.file_count = sum([course.file_count for course in self.course_set.all()])
- self.save()
-
-
-class Department(models.Model):
- """ Department within a School. """
- name = models.CharField(max_length=255)
- school = models.ForeignKey(School) # Should this be optional ever?
- slug = models.SlugField(max_length=150, null=True)
- url = models.URLField(max_length=511, blank=True, null=True)
-
- def __unicode__(self):
- return self.name
-
- def save(self, *args, **kwargs):
- """ Save department and generate a slug if one doesn't exist """
- if not self.slug:
- self.slug = defaultfilters.slugify(self.name)
- super(Department, self).save(*args, **kwargs)
'karmaworld.apps.users',
'karmaworld.apps.moderation',
'karmaworld.apps.licenses',
- 'karmaworld.apps.schools',
)
# See: https://docs.djangoproject.com/en/dev/ref/settings/#installed-apps