"""
import datetime
from django.db.models import SET_NULL
-from django.db.models.signals import post_save, post_delete
+from django.db.models.signals import post_save, post_delete, pre_save
from django.dispatch import receiver
import os
import urllib
from karmaworld.apps.courses.models import Course
from karmaworld.apps.licenses.models import License
-import karmaworld.apps.notes.search as search
+from karmaworld.apps.notes.search import SearchIndex
fs = FileSystemStorage(location=settings.MEDIA_ROOT)
note_instance.course.update_note_count()
note_instance.course.school.update_note_count()
+@receiver(pre_save, sender=Note, weak=False)
+def note_pre_save_receiver(sender, **kwargs):
+ """Stick an instance of the pre-save value of
+ the given Note instance in the instances itself.
+ This will be looked at in post_save."""
+ if not 'instance' in kwargs:
+ return
+
+ kwargs['instance'].old_instance = Note.objects.get(id=kwargs['instance'].id)
+
@receiver(post_save, sender=Note, weak=False)
def note_save_receiver(sender, **kwargs):
if not 'instance' in kwargs:
return
note = kwargs['instance']
- # Update course and school counts of how
- # many notes they have
+ index = SearchIndex()
if kwargs['created']:
update_note_counts(note)
-
- # Add or update document in search index
- search.add_document(note)
+ index.add_note(note)
+ else:
+ index.update_note(note, note.old_instance)
@receiver(post_delete, sender=Note, weak=False)
update_note_counts(kwargs['instance'])
# Remove document from search index
- search.remove_document(note)
+ index = SearchIndex()
+ index.remove_note(note)
#!/usr/bin/env python
# -*- coding:utf8 -*-
# Copyright (C) 2013 FinalsClub Foundation
+
import calendar
import time
logging.basicConfig()
logger = logging.getLogger(__name__)
-api_client = itc.ApiClient(secret.PRIVATE_URL)
-if not api_client.get_index(secret.INDEX).exists():
- api_client.create_index(secret.INDEX, {'public_search': False})
-
-index = api_client.get_index(secret.INDEX)
-
-while not index.has_started():
- time.sleep(0.5)
-
-# Default scoring function
-# Results are sorted by combination of "relevance"
-# and number of thanks they have received.
-# "Relevance" is a black box provided by IndexDen.
-index.add_function(0, 'relevance * log(doc.var[0])')
class SearchResult(object):
+ """The result of making a query into IndexDen.
+ @param ordered_ids A list of the note IDs found, in order they
+ should be displayed
+ @param snippet_dict A dictionary mapping note IDs to snippets
+ to show in search results
+ @param has_more A boolean indicating if the user should
+ request more results by increasing
+ the page number of the query."""
def __init__(self, ordered_ids, snippet_dict, has_more):
self.ordered_ids = ordered_ids
self.snippet_dict = snippet_dict
self.has_more = has_more
-def note_to_dict(note):
- d = {
- 'name': note.name,
- 'text': note.text
- }
-
- if note.tags.exists():
- d['tags'] = ' '.join([str(tag) for tag in note.tags.all()])
-
- if note.course:
- d['course_id'] = note.course.id
-
- if note.uploaded_at:
- d['timestamp'] = calendar.timegm(note.uploaded_at.timetuple())
-
- return d
-
-def add_document(note):
- if note.text:
- logger.info("Indexing {n}".format(n=note))
- index.add_document(note.id, note_to_dict(note), variables={0: note.thanks})
- else:
- logger.warn("Note {n} has no text, will not add to IndexDen".format(n=note))
-
-def remove_document(note):
- index.delete_document(note.id)
-
-def search(query, course_id=None, page=0):
- """Returns note IDs matching the given query,
- filtered by course ID if given"""
- if course_id:
- real_query = '("%s" OR name:"%s") AND course_id:%s' % (query, query, course_id)
- else:
- real_query = '"%s" OR name:"%s"' % (query, query)
-
- raw_results = index.search(real_query, snippet_fields=['text'],
- length=PAGE_SIZE, start=(page*PAGE_SIZE))
-
- ordered_ids = [int(r['docid']) for r in raw_results['results']]
- snippet_dict = {int(r['docid']): r['snippet_text'] for r in raw_results['results']}
- # Are there more results to show the user if they want?
- has_more = True if int(raw_results['matches']) > ((page+1) * PAGE_SIZE) else False
+class Singleton(type):
+ """Set this as the metaclass of another
+ class to ensure that it will only have one instance.
+ Borrowed from
+ http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python"""
+
+ _instances = {}
+ def __call__(cls, *args, **kwargs):
+ if cls not in cls._instances:
+ cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
+ return cls._instances[cls]
+
+
+class SearchIndex(object):
+ """A singleton class used to interface with the IndexDen
+ search index."""
- return SearchResult(ordered_ids, snippet_dict, has_more)
+ __metaclass__ = Singleton
+
+ def __init__(self):
+ api_client = itc.ApiClient(secret.PRIVATE_URL)
+ if not api_client.get_index(secret.INDEX).exists():
+ api_client.create_index(secret.INDEX, {'public_search': False})
+
+ self.index = api_client.get_index(secret.INDEX)
+
+ while not self.index.has_started():
+ time.sleep(0.5)
+
+ # Default scoring function
+ # Results are sorted by combination of "relevance"
+ # and number of thanks they have received.
+ # "Relevance" is a black box provided by IndexDen.
+ self.index.add_function(0, 'relevance * log(doc.var[0])')
+
+ @staticmethod
+ def _tags_to_str(tags):
+ return ' '.join([str(tag) for tag in tags.all()])
+
+ @staticmethod
+ def _note_to_dict(note):
+ d = {
+ 'name': note.name,
+ 'text': note.text
+ }
+
+ if note.tags.exists():
+ d['tags'] = SearchIndex._tags_to_str(note.tags)
+
+ if note.course:
+ d['course_id'] = note.course.id
+
+ if note.uploaded_at:
+ d['timestamp'] = calendar.timegm(note.uploaded_at.timetuple())
+
+ return d
+
+ def add_note(self, note):
+ """Add a note to the index. If the note is
+ already in the index, it will be overwritten."""
+ if note.text:
+ logger.info("Indexing {n}".format(n=note))
+ self.index.add_document(note.id, SearchIndex._note_to_dict(note), variables={0: note.thanks})
+ else:
+ logger.info("Note {n} has no text, will not add to IndexDen".format(n=note))
+
+ def update_note(self, new_note, old_note):
+ """Update a note. Will only truly update the search
+ index if it needs to. Compares the fields in new_note with
+ old_note to see what has changed."""
+ if not new_note.text:
+ logger.info("Note {n} has no text, will not add to IndexDen".format(n=new_note))
+ return
+
+ # If the indexable fields have changed,
+ # send the document to IndexDen again
+ if new_note.text != old_note.text or \
+ new_note.name != old_note.name or \
+ SearchIndex._tags_to_str(new_note.tags) != SearchIndex._tags_to_str(old_note.tags) or \
+ new_note.course != old_note.course or \
+ new_note.uploaded_at != old_note.uploaded_at:
+ logger.info("Indexing {n}".format(n=new_note))
+ self.index.add_document(new_note.id, SearchIndex._note_to_dict(new_note), variables={0: new_note.thanks})
+
+ # If only the thanks count has changed, we can
+ # just send that
+ elif new_note.thanks != old_note.thanks:
+ logger.info("Indexing thanks variable for {n}".format(n=new_note))
+ self.index.update_variables(new_note.id, variables={0: new_note.thanks})
+
+ # Otherwise we don't need to do anything
+ else:
+ logger.info("Note {n} has not changed sufficiently, will not update IndexDen".format(n=new_note))
+
+ def remove_note(self, note):
+ """Remove a note from the search index."""
+
+ logger.info("Removing from index: {n}".format(n=note))
+ self.index.delete_document(note.id)
+
+ def search(self, query, course_id=None, page=0):
+ """Returns an instance of SearchResult for your query."""
+
+ if course_id:
+ real_query = '("%s" OR name:"%s") AND course_id:%s' % (query, query, course_id)
+ else:
+ real_query = '"%s" OR name:"%s"' % (query, query)
+
+ raw_results = self.index.search(real_query, snippet_fields=['text'],
+ length=PAGE_SIZE, start=(page*PAGE_SIZE))
+
+ ordered_ids = [int(r['docid']) for r in raw_results['results']]
+ snippet_dict = {int(r['docid']): r['snippet_text'] for r in raw_results['results']}
+
+ # Are there more results to show the user if they want?
+ has_more = True if int(raw_results['matches']) > ((page+1) * PAGE_SIZE) else False
+
+ return SearchResult(ordered_ids, snippet_dict, has_more)
#!/usr/bin/env python
# -*- coding:utf8 -*-
# Copyright (C) 2012 FinalsClub Foundation
+
import json
from django.core.exceptions import ObjectDoesNotExist
from karmaworld.apps.courses.models import Course
-from karmaworld.apps.notes import search
+from karmaworld.apps.notes.search import SearchIndex
import os
from django.conf import settings
-from django.contrib.sites.models import Site
from django.http import HttpResponse, HttpResponseBadRequest, HttpResponseNotFound
from django.views.generic import DetailView, ListView
from django.views.generic import FormView
from django.views.generic import View
-from django.views.generic import TemplateView
from django.views.generic.detail import SingleObjectMixin
-from django.shortcuts import get_object_or_404, render_to_response
from karmaworld.apps.notes.models import Note
from karmaworld.apps.notes.forms import NoteForm
else:
page = 0
+ index = SearchIndex()
+
if 'course_id' in self.request.GET:
- raw_results = search.search(self.request.GET['query'],
+ raw_results = index.search(self.request.GET['query'],
self.request.GET['course_id'],
page=page)
else:
- raw_results = search.search(self.request.GET['query'],
+ raw_results = index.search(self.request.GET['query'],
page=page)
instances = Note.objects.in_bulk(raw_results.ordered_ids)