import requests
from karmaworld.apps.notes.models import Note
+from karmaworld.apps.notes.models import NoteMarkdown as NoteContent
+from karmaworld.apps.notes.models import Document
from karmaworld.apps.notes.gdrive import convert_raw_document
from karmaworld.apps.courses.models import Course
from karmaworld.apps.courses.models import School
def handle(self, *args, **kwargs):
if len(args) != 1:
- raise ArgumentError("Expected one argument, got none: please specify a directory to parse.")
+ raise TypeError("Expected one argument, got none: please specify a directory to parse.")
# Convert given path to an absolute path, not relative.
path = os.path.abspath(args[0])
if not os.path.isdir(path):
- raise ArgumentError("First argument should be a directory to parse.")
+ raise TypeError("First argument should be a directory to parse.")
# for now, assume the school is MIT and find by its US DepEd ID.
# TODO for later, do something more clever
dbschool = School.objects.filter(usde_id=121415)[0]
- # for now, assume license is the default OCW license: CC-BY-NC 3
+ # for now, assume license is the default OCW license: CC-BY-NC
# TODO for later, do something more clever.
dblicense = License.objects.get_or_create(
name='cc-by-nc-3.0',
- html='<a rel="license" href="http://creativecommons.org/licenses/by-nc/4.0/"><img alt="Creative Commons License" style="border-width:0" src="http://i.creativecommons.org/l/by-nc/4.0/88x31.png" /></a>'
+ html='<a rel="license" href="http://creativecommons.org/licenses/by-nc/3.0/"><img alt="Creative Commons License" style="border-width:0" src="http://i.creativecommons.org/l/by-nc/3.0/88x31.png" /></a>'
)[0]
- # build Filepicker upload URL
- # http://stackoverflow.com/questions/14115280/store-files-to-filepicker-io-from-the-command-line
- fpurl = 'https://www.filepicker.io/api/store/S3?key={0}'.format(FILEPICKER_API_KEY)
-
# find all *.json files in the given directory
def is_json_file(filename):
return filename[-5:].lower() == '.json'
# parse each json file and process it for courses and notes.
for filename in json_files:
+ # each file is assumed to contain courses for a single department
with open(filename, 'r') as jsondata:
# parse JSON into python
parsed = json.load(jsondata)
'school': dbschool,
'url': parsed['departmentLink'],
}
- dbdept = Department.objects.get_or_create(**dept_info)[0]
+ # Defer department creation only until there is a valid course.
+ # keeping it at this scope is a bit like caching.
+ dbdept = None
# process courses
for course in parsed['courses']:
+ if 'noteLinks' not in course or not course['noteLinks']:
+ print "No Notes in course."
+ continue
+
+ # only create department if necessary at this time
+ if dbdept is None:
+ dbdept = Department.objects.get_or_create(**dept_info)[0]
+
# Assume first hit is always right. Solving the identity
# problem by name alone will always be a fool's errand.
dbprof = Professor.objects.get_or_create(name=course['professor'])[0]
- # Associate the professor with the department.
- # (no need to track the result)
- ProfessorAffiliation.objects.get_or_create(
- professor=dbprof,
- department=dbdept)
-
# Extract the course info
course_info = {
'name': course['courseTitle'],
}
# Create or Find the Course object.
dbcourse = Course.objects.get_or_create(**course_info)[0]
- dbcourse.professor = dbprof
+ dbcourse.professor.add(dbprof)
dbcourse.instructor_name = course['professor']
dbcourse.school = dbschool
dbcourse.save()
print "Course is in the database: {0}".format(dbcourse.name)
- ProfessorTaught.objects.get_or_create(
- professor=dbprof,
- course=dbcourse)
-
- if 'noteLinks' not in course or not course['noteLinks']:
- print "No Notes in course."
- continue
-
# process notes for each course
for note in course['noteLinks']:
# Check to see if the Note is already uploaded.
continue
if len(dbnote) == 1:
dbnote = dbnote[0]
- if dbnote.text and len(dbnote.text) or \
- dbnote.html and len(dbnote.html):
+ # should only be 1 entry, but get() errors loudly
+ # when none are found. filter is easier to work with
+ dbcontent = NoteContent.objects.filter(note_id=dbnote.id)
+ if dbnote.text and len(dbnote.text) and \
+ len(dbcontent) and \
+ len(dbcontent[0].html):
print "Already there, moving on: {0}".format(url)
continue
else:
# Partially completed note. Remove it and try
# again.
dbnote.tags.set() # clear tags
+ for content in dbcontent:
+ dbcontent.delete() # delete any note content
dbnote.delete() # delete note
print "Found and removed incomplete note {0}.".format(url)
# in RawDocument.
rd_test = RawDocument.objects.filter(upstream_link=url)
if not len(rd_test):
- # https://developers.inkfilepicker.com/docs/web/#inkblob-store
+ # https://developers.filepicker.io/docs/web/rest/#blob-store
print "Uploading link {0} to FP.".format(url)
- ulresp = requests.post(fpurl, data={
- 'url': url,
- })
+ ulresp = requests.post(
+ 'https://www.filepicker.io/api/store/S3',
+ params={
+ 'key': FILEPICKER_API_KEY,
+ 'policy': Document.fp_policy,
+ 'signature': Document.fp_signature,
+ },
+ data={
+ 'url': url,
+ })
try:
ulresp.raise_for_status()
except Exception, e:
# Do tags separately
dbnote.tags.add('mit-ocw','karma')
- print "Converting document and saving note to S3."
+ print "Converting document and saving note text."
while True:
try:
convert_raw_document(dbnote)
break
print "Notes for {0} are done.".format(dbcourse.name)
+ # Informational output for debugging
+ if dbdept is None:
+ print "No courses with notes found for {name}.".format(**dept_info)