From 49afcbccbe2a42cd3bd14b2827eb7ad4ca6b748d Mon Sep 17 00:00:00 2001 From: Charles Connell Date: Fri, 6 Jun 2014 16:56:55 -0400 Subject: [PATCH] WIP on mail box --- karmaworld/apps/quizzes/tasks.py | 229 +++++++++++++++++++++++++++---- 1 file changed, 202 insertions(+), 27 deletions(-) diff --git a/karmaworld/apps/quizzes/tasks.py b/karmaworld/apps/quizzes/tasks.py index fca872f..fa8d5cc 100644 --- a/karmaworld/apps/quizzes/tasks.py +++ b/karmaworld/apps/quizzes/tasks.py @@ -1,25 +1,33 @@ #!/usr/bin/env python # -*- coding:utf8 -*- # Copyright (C) 2013 FinalsClub Foundation +import email +import poplib +import base64 +import re +import time +import json +from karmaworld.utils.filepicker import encode_fp_policy, sign_fp_policy import os from boto.mturk.qualification import PercentAssignmentsApprovedRequirement, Qualifications from boto.mturk.question import Overview, FormattedContent, QuestionContent, Question, FreeTextAnswer, QuestionForm, \ - AnswerSpecification + AnswerSpecification, SelectionAnswer from celery import task from celery.utils.log import get_task_logger from boto.mturk.connection import MTurkConnection from django.contrib.sites.models import Site -from django.core.exceptions import ObjectDoesNotExist -from karmaworld.apps.notes.models import Note +from karmaworld.apps.notes.models import Document from karmaworld.apps.quizzes.models import Keyword, HIT from django.conf import settings +import requests + logger = get_task_logger(__name__) -HIT_TITLE_TEMPLATE = 'Get paid to learn {course} at {school}' -HIT_DESCRIPTION = "Read students' course notes on KarmaNotes.org and " \ +KEYWORDS_HIT_TITLE_TEMPLATE = 'Get paid to learn {course} at {school}' +KEYWORDS_HIT_DESCRIPTION = "Read students' course notes on KarmaNotes.org and " \ "identify 10 or more keywords along with descriptions of them" -HIT_OVERVIEW_TEMPLATE = \ +KEYWORDS_HIT_OVERVIEW_TEMPLATE = \ '

KarmaNotes.org is a non-profit organization dedicated to free and open education. ' \ 'We need your help to identify keywords and definitions in college student lecture notes. ' \ 'Here is one example from an American History course:

' \ @@ -32,13 +40,13 @@ HIT_OVERVIEW_TEMPLATE = \ '

In these notes, please find 10 to 20 key words and definitions within these student notes. ' \ 'With your help, we will generate free and open flashcards and quizzes to help ' \ 'students study. Together we can open education, one lecture at a time.

' -HIT_KEYWORDS = 'writing, summary, keywords' -HIT_DURATION = 60 * 60 * 24 * 7 -HIT_REWARD = 0.92 -HIT_PERCENT_APPROVED_REQUIREMENT = PercentAssignmentsApprovedRequirement(comparator='GreaterThan', integer_value=95) -HIT_QUALIFICATION = Qualifications(requirements=[HIT_PERCENT_APPROVED_REQUIREMENT]) +KEYWORDS_HIT_KEYWORDS = 'writing, summary, keywords' +KEYWORDS_HIT_DURATION = 60 * 60 * 24 * 7 +KEYWORDS_HIT_REWARD = 0.92 +KEYWORDS_HIT_PERCENT_APPROVED_REQUIREMENT = PercentAssignmentsApprovedRequirement(comparator='GreaterThan', integer_value=95) +KEYWORDS_HIT_QUALIFICATION = Qualifications(requirements=[KEYWORDS_HIT_PERCENT_APPROVED_REQUIREMENT]) -KEYWORD_FIELDS = [ +KEYWORDS_HIT_KEYWORD_FIELDS = [ ('keyword01', 'Keyword 1'), ('keyword02', 'Keyword 2'), ('keyword03', 'Keyword 3'), @@ -61,7 +69,7 @@ KEYWORD_FIELDS = [ ('keyword20', 'Keyword 20'), ] -DEFINITION_FIELDS = [ +KEYWORDS_HIT_DEFINITION_FIELDS = [ ('definition01', 'Definition 1'), ('definition02', 'Definition 2'), ('definition03', 'Definition 3'), @@ -99,12 +107,12 @@ def submit_extract_keywords_hit(note): host=MTURK_HOST) if note.course.school: - title = HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.school.name) + title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.school.name) else: - title = HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.department.school.name) + title = KEYWORDS_HIT_TITLE_TEMPLATE.format(course=note.course.name, school=note.course.department.school.name) overview = Overview() - overview.append(FormattedContent(HIT_OVERVIEW_TEMPLATE.format(domain=Site.objects.get_current(), + overview.append(FormattedContent(KEYWORDS_HIT_OVERVIEW_TEMPLATE.format(domain=Site.objects.get_current(), link=note.get_absolute_url()))) keyword_fta = FreeTextAnswer() @@ -116,27 +124,27 @@ def submit_extract_keywords_hit(note): question_form = QuestionForm() question_form.append(overview) - for i in range(min(len(KEYWORD_FIELDS), len(DEFINITION_FIELDS))): + for i in range(min(len(KEYWORDS_HIT_KEYWORD_FIELDS), len(KEYWORDS_HIT_DEFINITION_FIELDS))): keyword_content = QuestionContent() - keyword_content.append_field('Title', KEYWORD_FIELDS[i][1]) - keyword_question = Question(identifier=KEYWORD_FIELDS[i][0], + keyword_content.append_field('Title', KEYWORDS_HIT_KEYWORD_FIELDS[i][1]) + keyword_question = Question(identifier=KEYWORDS_HIT_KEYWORD_FIELDS[i][0], content=keyword_content, answer_spec=AnswerSpecification(keyword_fta), is_required=True if i <= 10 else False) question_form.append(keyword_question) definition_content = QuestionContent() - definition_content.append_field('Title', DEFINITION_FIELDS[i][1]) - definition_question = Question(identifier=DEFINITION_FIELDS[i][0], + definition_content.append_field('Title', KEYWORDS_HIT_DEFINITION_FIELDS[i][1]) + definition_question = Question(identifier=KEYWORDS_HIT_DEFINITION_FIELDS[i][0], content=definition_content, answer_spec=AnswerSpecification(definition_fta), is_required=False) question_form.append(definition_question) hit = connection.create_hit(questions=question_form, max_assignments=1, - title=title, description=HIT_DESCRIPTION, - keywords=HIT_KEYWORDS, duration=HIT_DURATION, - reward=HIT_REWARD, qualifications=HIT_QUALIFICATION, + title=title, description=KEYWORDS_HIT_DESCRIPTION, + keywords=KEYWORDS_HIT_KEYWORDS, duration=KEYWORDS_HIT_DURATION, + reward=KEYWORDS_HIT_REWARD, qualifications=KEYWORDS_HIT_QUALIFICATION, annotation=str(note.id))[0] HIT.objects.create(HITId=hit.HITId, note=note, processed=False) @@ -165,9 +173,9 @@ def get_extract_keywords_results(): for question_form_answer in assignment.answers[0]: answers[question_form_answer.qid] = question_form_answer.fields[0] - for i in range(min(len(KEYWORD_FIELDS), len(DEFINITION_FIELDS))): - keyword_qid = KEYWORD_FIELDS[i][0] - definition_qid = DEFINITION_FIELDS[i][0] + for i in range(min(len(KEYWORDS_HIT_KEYWORD_FIELDS), len(KEYWORDS_HIT_DEFINITION_FIELDS))): + keyword_qid = KEYWORDS_HIT_KEYWORD_FIELDS[i][0] + definition_qid = KEYWORDS_HIT_DEFINITION_FIELDS[i][0] try: keyword = answers[keyword_qid] definition = answers[definition_qid] @@ -180,3 +188,170 @@ def get_extract_keywords_results(): logger.info('Done processing HIT {0}'.format(hit_object.HITId)) hit_object.processed = True hit_object.save() + + +EMAIL_HIT_TITLE_TEMPLATE = 'Identify fields in an email' +EMAIL_HIT_DESCRIPTION = "Read an email about a college course and pull out information " \ + "about that course." +EMAIL_HIT_OVERVIEW_TEMPLATE = \ + '

KarmaNotes.org is a non-profit organization dedicated to free and open education. ' \ + 'We receive emails from students with their course notes attached. The bodies of the emails ' \ + 'usually describe the course that the notes are about, as well as the notes themselves. ' \ + 'We need your help to convert this into a format ' \ + 'our system can understand. See the email printed below, and fill out as much information ' \ + 'about the course as you are able. You may need to look at the documents attached to the email, ' \ + 'which are linked to below it.

' \ + '

Subject: {subject}

' \ + '
{body}
' +EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE = '

The follow document was attached to the email. View it and fill out \ + as much information as you can about it. {name}

' +EMAIL_HIT_KEYWORDS = 'email, copying, reading, writing' +EMAIL_HIT_DURATION = 60 * 60 * 24 * 7 +EMAIL_HIT_REWARD = 0.10 +EMAIL_HIT_PERCENT_APPROVED_REQUIREMENT = PercentAssignmentsApprovedRequirement(comparator='GreaterThan', integer_value=95) +EMAIL_HIT_QUALIFICATION = Qualifications(requirements=[KEYWORDS_HIT_PERCENT_APPROVED_REQUIREMENT]) + +COURSE_NAME_QID = 'course_name' +INSTRUCTOR_NAMES_QID = 'instructor_names' +SCHOOL_NAME_QID = 'school_name' +DEPARTMENT_NAME_QID = 'department_name' +CATEGORY_QID = 'category' +TAGS_QID = 'tags' +NOTE_CATEGORIES_FOR_MTURK = [(c[1], c[0]) for c in Document.NOTE_CATEGORIES] + +FP_POLICY_JSON_READ_WRITE = '{{"expiry": {0}, "call": ["store","read","stat"]}}' +FP_POLICY_JSON_READ_WRITE = FP_POLICY_JSON_READ_WRITE.format(int(time.time() + 31536000)) +FP_POLICY_READ_WRITE = encode_fp_policy(FP_POLICY_JSON_READ_WRITE) +FP_SIGNATURE_READ_WRITE = sign_fp_policy(FP_POLICY_READ_WRITE) + +FP_POLICY_JSON_READ = '{{"expiry": {0}, "call": ["read","stat"]}}' +FP_POLICY_JSON_READ = FP_POLICY_JSON_READ.format(int(time.time() + 31536000)) +FP_POLICY_READ = encode_fp_policy(FP_POLICY_JSON_READ) +FP_SIGNATURE_READ = sign_fp_policy(FP_POLICY_READ) + +CONTENT_DISPOSITION_REGEX = r'filename="(?P.+)"' + + +@task(name='check_notes_mailbox') +def check_notes_mailbox(): + try: + user = os.environ['NOTES_MAILBOX_USERNAME'] + password = os.environ['NOTES_MAILBOX_PASSWORD'] + filepicker_api_key = os.environ['FILEPICKER_API_KEY'] + except: + logger.warn('Could not find notes mailbox secrets, not running check_notes_mailbox') + return + + mailbox = poplib.POP3_SSL('pop.gmail.com', 995) + mailbox.user(user) + mailbox.pass_(password) + numMessages = len(mailbox.list()[1]) + for i in range(numMessages): + # construct message object from raw message + raw_message_string = '\n'.join(mailbox.retr(i+1)[1]) + message = email.message_from_string(raw_message_string) + + if not message.is_multipart(): + logger.warn('Got an email with no attachments') + continue + + attachments = [] + message_body = '' + + message_parts = message.get_payload() + for part in message_parts: + # Look for the message's plain text body + if part.get_content_type() == 'text/plain' and part['Content-Disposition'] is None: + message_body = part.get_payload() + + # Look for attachments + elif part['Content-Disposition'] and 'attachment;' in part['Content-Disposition']: + attachment_mimetype = part.get_content_type() + attachment_filename = re.search(CONTENT_DISPOSITION_REGEX, part['Content-Disposition']).group('filename') + + if part['Content-Transfer-Encoding'] == 'base64': + attachment_data = base64.decodestring(part.get_payload()) + else: + attachment_data = part.get_payload() + + # Upload attachment to filepicker + resp = requests.post('https://www.filepicker.io/api/store/S3?key={key}&policy={policy}&' \ + 'signature={signature}&mimetype={mimetype}&filename={filename}' + .format(key=filepicker_api_key, policy=FP_POLICY_READ_WRITE, + signature=FP_SIGNATURE_READ_WRITE, mimetype=attachment_mimetype, + filename=attachment_filename), + data=attachment_data) + + if resp.status_code == 200: + url = json.loads(resp.text)['url'] + url = url + '?policy={policy}&signature={signature}'\ + .format(policy=FP_POLICY_READ, signature=FP_SIGNATURE_READ) + attachments.append((url, attachment_filename)) + else: + logger.warn('Could not upload an attachment to filepicker') + + message_subject = message['Subject'] + + overview = Overview() + overview.append(FormattedContent( + EMAIL_HIT_OVERVIEW_TEMPLATE.format(subject=message_subject, body=message_body, attachments=''))) + + question_form = QuestionForm() + question_form.append(overview) + + course_name_content = QuestionContent() + course_name_content.append_field('Title', 'Course Name') + course_name = Question(identifier=COURSE_NAME_QID, + content=course_name_content, + answer_spec=AnswerSpecification(FreeTextAnswer()), + is_required=True) + question_form.append(course_name) + + instructor_names_content = QuestionContent() + instructor_names_content.append_field('Title', 'Instructor Name(s)') + instructor_names = Question(identifier=INSTRUCTOR_NAMES_QID, + content=instructor_names_content, + answer_spec=AnswerSpecification(FreeTextAnswer()), + is_required=False) + question_form.append(instructor_names) + + school_name_content = QuestionContent() + school_name_content.append_field('Title', 'School Name') + school_name = Question(identifier=SCHOOL_NAME_QID, + content=school_name_content, + answer_spec=AnswerSpecification(FreeTextAnswer()), + is_required=True) + question_form.append(school_name) + + department_name_content = QuestionContent() + department_name_content.append_field('Title', 'Department Name') + department_name = Question(identifier=DEPARTMENT_NAME_QID, + content=department_name_content, + answer_spec=AnswerSpecification(FreeTextAnswer()), + is_required=False) + question_form.append(department_name) + + for attachment in attachments: + overview = Overview() + overview.append(FormattedContent( + EMAIL_HIT_ATTACHMENT_OVERVIEW_TEMPLATE.format(link=attachment[0], name=attachment[1]))) + + category_content = QuestionContent() + category_content.append_field('Title', 'Note Title') + category = Question(identifier=NOTE_TITLE_QID, + content=category_content, + answer_spec=AnswerSpecification(FreeTextAnswer()), + is_required=True) + question_form.append(category) + + + category_content = QuestionContent() + category_content.append_field('Title', 'Note Category') + answer = SelectionAnswer(style='dropdown', selections=NOTE_CATEGORIES_FOR_MTURK) + category = Question(identifier=CATEGORY_QID, + content=category_content, + answer_spec=AnswerSpecification(answer), + is_required=True) + question_form.append(category) + + -- 2.25.1