--- /dev/null
+#!/usr/bin/env python
+# -*- coding:utf8 -*-
+# Copyright (C) 2012 FinalsClub Foundation
+
+import json
+import os.path
+import requests
+
+from apps.notes.models import Note
+from apps.notes.gdrive import convert_raw_document
+from apps.courses.models import Course
+from apps.courses.models import School
+from apps.licenses.models import License
+from apps.document_upload.models import RawDocument
+from django.core.management.base import BaseCommand
+from karmaworld.secret.filepicker import FILEPICKER_API_KEY
+
+class Command(BaseCommand):
+ args = 'directory containing json files from mit-ocw-scraper'
+ help = """
+ This command will systematically parse all *.json files in the given
+ directory and load them into the database as course notes, uploaded
+ through Filepicker.
+
+ It is assumed the json files are generated by (or follow the same
+ format as) mit-ocw-scraper:
+ https://github.com/AndrewMagliozzi/mit-ocw-scraper
+ """
+
+ def handle(self, *args, **kwargs):
+ if len(args) != 1:
+ raise ArgumentError("Expected one argument, got none: please specify a directory to parse.")
+
+ # Convert given path to an absolute path, not relative.
+ path = os.path.abspath(args[0])
+
+ if not os.path.isdir(path):
+ raise ArgumentError("First argument should be a directory to parse.")
+
+ # for now, assume the school is MIT and find by its US DepEd ID.
+ # TODO for later, do something more clever
+ dbschool = School.objects.filter(usde_id=121415)[0]
+
+ # for now, assume license is the default OCW license: CC-BY-NC 3
+ # TODO for later, do something more clever.
+ dblicense = License.objects.filter(name='cc-by-nc-3.0')[0]
+
+ # build Filepicker upload URL
+ # http://stackoverflow.com/questions/14115280/store-files-to-filepicker-io-from-the-command-line
+ fpurl = 'https://www.filepicker.io/api/store/S3?key={0}'.format(FILEPICKER_API_KEY)
+
+ # find all *.json files in the given directory
+ def is_json_file(filename):
+ return filename[-5:].lower() == '.json'
+ json_files = filter(is_json_file, os.listdir(path))
+ # prepend filenames with absolute paths
+ def full_path_to_file(filename):
+ return os.path.sep.join((path, filename))
+ json_files = map(full_path_to_file, json_files)
+
+ # parse each json file and process it for courses and notes.
+ for filename in json_files:
+ with open(filename, 'r') as jsondata:
+ # parse JSON into python
+ parsed = json.load(jsondata)
+ # process courses
+ for course in parsed['courses']:
+
+ # Extract the course info
+ course_info = {
+ 'name': course['courseTitle'],
+ 'instructor_name': course['professor'],
+ 'school': dbschool,
+ # courseLink is "course-number-name-semester-year"
+ 'academic_year': \
+ int(course['courseLink'].split('-')[-1])
+ }
+ # Create or Find the Course object.
+ dbcourse = Course.objects.get_or_create(**course_info)[0]
+ print "Course is in the database: {0}".format(dbcourse.name)
+
+ # process notes for each course
+ for note in course['noteLinks']:
+ # Check to see if the Note is already there.
+ if len(RawDocument.objects.filter(upstream_link=note['link'])):
+ print "Already there, moving on: {0}".format(note['link'])
+ continue
+
+ # Download the note into memory.
+ print "Downloading {0}".format(note['link'])
+ dlresp = requests.get(note['link'])
+ # Check there weren't any problems
+ dlresp.raise_for_status()
+
+ # Upload raw contents of note to Filepicker
+ # https://developers.inkfilepicker.com/docs/web/#inkblob-store
+ print "Uploading to FP."
+ ulresp = requests.post(fpurl, files={
+ #'fileUpload': (note['fileName'], dlresp.raw)
+ 'fileUpload': dlresp.raw,
+ })
+ ulresp.raise_for_status()
+ # Filepicker returns JSON, so use that
+ uljson = ulresp.json()
+
+ print "Saving raw document to database."
+ # Extract the note info
+ dbnote = RawDocument()
+ dbnote.course = dbcourse
+ dbnote.name = note['fileName']
+ dbnote.license = dblicense
+ dbnote.upstream_link = note['link']
+ dbnote.fp_file = uljson['url']
+ dbnote.mimetype = uljson['type']
+ # Create the RawDocument object.
+ dbnote.save()
+
+ # Do tags separately
+ dbnote.tags.add('mit-ocw','karma')
+
+ print "Sending to GDrive and saving note to database."
+ convert_raw_document(dbnote)
+ print "This note is done."
+
+
+ print "Notes for {0} are done.".format(dbcourse.name)