3 # Copyright (C) 2013 FinalsClub Foundation
9 from django.core.exceptions import ImproperlyConfigured
11 import indextank.client as itc
12 from django.conf import settings
18 MOCK_MODE = settings.TESTING
21 logger = logging.getLogger(__name__)
23 INDEXDEN_INDEX = os.environ['INDEXDEN_INDEX']
24 INDEXDEN_PRIVATE_URL = os.environ['INDEXDEN_PRIVATE_URL']
26 class SearchResult(object):
27 """The result of making a query into IndexDen.
28 @param ordered_ids A list of the note IDs found, in order they
30 @param snippet_dict A dictionary mapping note IDs to snippets
31 to show in search results
32 @param has_more A boolean indicating if the user should
33 request more results by increasing
34 the page number of the query."""
36 def __init__(self, ordered_ids, snippet_dict, has_more):
37 self.ordered_ids = ordered_ids
38 self.snippet_dict = snippet_dict
39 self.has_more = has_more
42 class Singleton(type):
43 """Set this as the metaclass of another
44 class to ensure that it will only have one instance.
46 http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python"""
49 def __call__(cls, *args, **kwargs):
50 if cls not in cls._instances:
51 cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
52 return cls._instances[cls]
55 class SearchIndex(object):
56 """A singleton class used to interface with the IndexDen
59 __metaclass__ = Singleton
62 self.index_name = INDEXDEN_INDEX
64 # If we're in production mode,
65 # or if we're in testing mode with indexing
66 # explicity turned on,
67 # do index setup stuff
71 self.api_client = itc.ApiClient(INDEXDEN_PRIVATE_URL)
72 if not self.api_client.get_index(self.index_name).exists():
74 self.api_client.create_index(self.index_name, {'public_search': False})
76 self.index = self.api_client.get_index(self.index_name)
78 while not self.index.has_started():
81 # Default scoring function
82 # Results are sorted by combination of "relevance"
83 # and number of thanks they have received.
84 # "Relevance" is a black box provided by IndexDen.
85 self.index.add_function(0, 'relevance * log(doc.var[0])')
88 def _tags_to_str(tags):
89 return ' '.join([str(tag) for tag in tags.all()])
92 def _note_to_dict(note):
98 if note.tags.exists():
99 d['tags'] = SearchIndex._tags_to_str(note.tags)
102 d['course_id'] = note.course.id
105 d['timestamp'] = calendar.timegm(note.uploaded_at.timetuple())
109 def delete_index(self):
110 """This is meant for test cases that want to clean up
115 self.api_client.delete_index(self.index_name)
117 def add_note(self, note):
118 """Add a note to the index. If the note is
119 already in the index, it will be overwritten."""
124 logger.info("Indexing {n}".format(n=note))
125 self.index.add_document(note.id, SearchIndex._note_to_dict(note), variables={0: note.thanks})
127 logger.info("Note {n} has no text, will not add to IndexDen".format(n=note))
129 def update_note(self, new_note, old_note):
130 """Update a note. Will only truly update the search
131 index if it needs to. Compares the fields in new_note with
132 old_note to see what has changed."""
136 if not new_note.text:
137 logger.info("Note {n} has no text, will not add to IndexDen".format(n=new_note))
140 # If the indexable fields have changed,
141 # send the document to IndexDen again
142 if new_note.text != old_note.text or \
143 new_note.name != old_note.name or \
144 SearchIndex._tags_to_str(new_note.tags) != SearchIndex._tags_to_str(old_note.tags) or \
145 new_note.course != old_note.course or \
146 new_note.uploaded_at != old_note.uploaded_at:
147 logger.info("Indexing {n}".format(n=new_note))
148 self.index.add_document(new_note.id, SearchIndex._note_to_dict(new_note), variables={0: new_note.thanks})
150 # If only the thanks count has changed, we can
152 elif new_note.thanks != old_note.thanks:
153 logger.info("Indexing thanks variable for {n}".format(n=new_note))
154 self.index.update_variables(new_note.id, variables={0: new_note.thanks})
156 # Otherwise we don't need to do anything
158 logger.info("Note {n} has not changed sufficiently, will not update IndexDen".format(n=new_note))
160 def remove_note(self, note):
161 """Remove a note from the search index."""
165 logger.info("Removing from index: {n}".format(n=note))
166 self.index.delete_document(note.id)
168 def search(self, query, course_id=None, page=0):
169 """Returns an instance of SearchResult for your query."""
171 raise ImproperlyConfigured("Attempting to use SearchIndex while in test mode.")
174 real_query = '("%s" OR name:"%s") AND course_id:%s' % (query, query, course_id)
176 real_query = '"%s" OR name:"%s"' % (query, query)
178 raw_results = self.index.search(real_query, snippet_fields=['text'],
179 length=PAGE_SIZE, start=(page*PAGE_SIZE))
181 ordered_ids = [int(r['docid']) for r in raw_results['results']]
182 snippet_dict = {int(r['docid']): r['snippet_text'] for r in raw_results['results']}
184 # Are there more results to show the user if they want?
185 has_more = True if int(raw_results['matches']) > ((page+1) * PAGE_SIZE) else False
187 return SearchResult(ordered_ids, snippet_dict, has_more)