3 # Copyright (C) 2012 FinalsClub Foundation
9 from apps.notes.models import Note
10 from apps.notes.gdrive import convert_raw_document
11 from apps.courses.models import Course
12 from apps.schools.models import School
13 from apps.schools.models import Department
14 from apps.licenses.models import License
15 from apps.document_upload.models import RawDocument
16 from django.core.management.base import BaseCommand
17 from karmaworld.secret.filepicker import FILEPICKER_API_KEY
19 class Command(BaseCommand):
20 args = 'directory containing json files from mit-ocw-scraper'
22 This command will systematically parse all *.json files in the given
23 directory and load them into the database as course notes, uploaded
26 It is assumed the json files are generated by (or follow the same
27 format as) mit-ocw-scraper:
28 https://github.com/AndrewMagliozzi/mit-ocw-scraper
31 def handle(self, *args, **kwargs):
33 raise ArgumentError("Expected one argument, got none: please specify a directory to parse.")
35 # Convert given path to an absolute path, not relative.
36 path = os.path.abspath(args[0])
38 if not os.path.isdir(path):
39 raise ArgumentError("First argument should be a directory to parse.")
41 # for now, assume the school is MIT and find by its US DepEd ID.
42 # TODO for later, do something more clever
43 dbschool = School.objects.filter(usde_id=121415)[0]
45 # for now, assume license is the default OCW license: CC-BY-NC 3
46 # TODO for later, do something more clever.
47 dblicense = License.objects.filter(name='cc-by-nc-3.0')[0]
49 # build Filepicker upload URL
50 # http://stackoverflow.com/questions/14115280/store-files-to-filepicker-io-from-the-command-line
51 fpurl = 'https://www.filepicker.io/api/store/S3?key={0}'.format(FILEPICKER_API_KEY)
53 # find all *.json files in the given directory
54 def is_json_file(filename):
55 return filename[-5:].lower() == '.json'
56 json_files = filter(is_json_file, os.listdir(path))
57 # prepend filenames with absolute paths
58 def full_path_to_file(filename):
59 return os.path.sep.join((path, filename))
60 json_files = map(full_path_to_file, json_files)
62 # parse each json file and process it for courses and notes.
63 for filename in json_files:
64 with open(filename, 'r') as jsondata:
65 # parse JSON into python
66 parsed = json.load(jsondata)
68 # find the department or create one.
70 'name': parsed['subject'],
72 'url': parsed['departmentLink'],
74 dbdept = Department.objects.get_or_create(**dept_info)[0]
77 for course in parsed['courses']:
78 # Extract the course info
80 'name': course['courseTitle'],
81 'instructor_name': course['professor'],
84 # Create or Find the Course object.
85 dbcourse = Course.objects.get_or_create(**course_info)[0]
86 dbcourse.department = dbdept;
88 print "Course is in the database: {0}".format(dbcourse.name)
90 if 'noteLinks' not in course:
91 print "No Notes in course."
94 # process notes for each course
95 for note in course['noteLinks']:
96 # Check to see if the Note is already uploaded.
97 if len(Note.objects.filter(upstream_link=note['link'])):
98 print "Already there, moving on: {0}".format(note['link'])
101 # Upload URL of note to Filepicker if it is not already
103 rd_test = RawDocument.objects.filter(upstream_link=note['link'])
105 # https://developers.inkfilepicker.com/docs/web/#inkblob-store
106 print "Uploading link {0} to FP.".format(note['link'])
107 ulresp = requests.post(fpurl, data={
110 ulresp.raise_for_status()
111 # Filepicker returns JSON, so use that
112 uljson = ulresp.json()
114 print "Saving raw document to database."
115 # Extract the note info
116 dbnote = RawDocument()
117 dbnote.course = dbcourse
118 dbnote.name = note['fileName']
119 dbnote.license = dblicense
120 dbnote.upstream_link = note['link']
121 dbnote.fp_file = uljson['url']
122 dbnote.mimetype = uljson['type']
123 dbnote.is_processed = True # hack to bypass celery
124 # Create the RawDocument object.
127 # Find the right RawDocument
128 print "Already uploaded link {0} to FP.".format(note['link'])
132 dbnote.tags.add('mit-ocw','karma')
134 print "Sending to GDrive and saving note to database."
135 convert_raw_document(dbnote)
136 print "This note is done."
139 print "Notes for {0} are done.".format(dbcourse.name)