Adding 'careers', 'adults', 'hospital', 'childcare' to RESTRICTED_WORDS
[oweals/karmaworld.git] / karmaworld / apps / courses / management / commands / sanitize_usde_schools.py
1 #!/usr/bin/env python
2 # -*- coding:utf8 -*-
3 # Copyright (C) 2012  FinalsClub Foundation
4 """ A script to sanitize the imported USDE database.
5     It will remove schools who's name contains words
6     in the RESTRICTED_WORDS list """
7
8 from django.core.management.base import BaseCommand
9 from django.db.models import Q
10
11 from karmaworld.apps.courses.models import School
12
13 RESTRICTED_WORDS = [
14                 'internship',
15                 'dietetic',
16                 'massage',
17                 'therapy',
18                 'residency',
19                 'months',
20                 'hair',
21                 'cosmetology',
22                 'beauty',
23                 'nail',
24                 'acupuncture',
25                 'chiropractic',
26                 'careers',
27                 'adults',
28                 'hospital',
29                 'childcare']
30
31 class Command(BaseCommand):
32     """ Delete Schools that contain RESTRICTED WORDS in their names """
33     args = 'none'
34     help = """ Delete Schools that contain RESTRICTED WORDS in their names """
35
36     def get_input(self, input_prompt):
37         """ Get user input with repeated requests on incorrect input """
38
39         y_n = raw_input(input_prompt)
40         y_n = y_n.replace(" ", "") # strip extra spaces
41         y_n = y_n.lower()
42
43         if y_n == 'y':
44             return True
45         elif y_n == 'n':
46             return False
47         else:
48             error_prompt = "Valid responses are [yYnN]\n"
49             return self.get_input(error_prompt + input_prompt)
50
51
52     def handle(self, *args, **kwargs):
53         """ The function that gets called to run this command """
54         # generate an |(or)'d list of queries searching inexact for each of RESTRICTED_WORDS
55         queries_list    = map(lambda word: Q(name__icontains=word), RESTRICTED_WORDS)
56         queries_or      = reduce(lambda a, b: a | b, queries_list)
57         schools = School.objects.filter(queries_or)
58         self._schools_count = schools.count()
59
60         # if there are no schools, exit
61         if not self._schools_count:
62             self.stdout.write('\n')
63             self.stdout.write('There are no schools worth sanitizing.\n')
64             return False
65
66         self.stdout.write(u"\n\nWARNING: Are you sure you want to delete these schools:\n")
67         for s in schools:
68             self.stdout.write('%s: %s' % (s.id, s.__unicode__()))
69             self.stdout.write('\n')
70
71         if self.get_input("Do you want to delete these schools? [y/n]  "):
72             self.stdout.write("...")
73             try:
74                 schools.delete()
75             except:
76                 self.stdout.write("that is too many to delete at once\n")
77                 self.stdout.write("you are probabily using sqlite , doing them in batches\n")
78                 for _i, a_school in enumerate(schools):
79                     self.stdout.write("deleting %s of %s..." % (_i, self._schools_count))
80                     a_school.delete()
81                     self.stdout.write("done\n")
82                 self.stdout.write("...")
83
84             self.stdout.write("all done!\n")
85             self.stdout.write("Deleted %s schools" % (self._schools_count))