removing obsolete management call to upload notes to S3
[oweals/karmaworld.git] / karmaworld / apps / notes / search.py
1 #!/usr/bin/env python
2 # -*- coding:utf8 -*-
3 # Copyright (C) 2013  FinalsClub Foundation
4
5 import calendar
6 import os
7 import time
8 import uuid
9 from django.core.exceptions import ImproperlyConfigured
10
11 import indextank.client as itc
12 from django.conf import settings
13
14 import logging
15
16 PAGE_SIZE = 10
17
18 MOCK_MODE = settings.TESTING
19
20 logging.basicConfig()
21 logger = logging.getLogger(__name__)
22
23 INDEXDEN_INDEX = os.environ['INDEXDEN_INDEX']
24 INDEXDEN_PRIVATE_URL = os.environ['INDEXDEN_PRIVATE_URL']
25
26 class SearchResult(object):
27     """The result of making a query into IndexDen.
28     @param ordered_ids A list of the note IDs found, in order they
29                        should be displayed
30     @param snippet_dict A dictionary mapping note IDs to snippets
31                         to show in search results
32     @param has_more A boolean indicating if the user should
33                     request more results by increasing
34                     the page number of the query."""
35
36     def __init__(self, ordered_ids, snippet_dict, has_more):
37         self.ordered_ids = ordered_ids
38         self.snippet_dict = snippet_dict
39         self.has_more = has_more
40
41
42 class Singleton(type):
43     """Set this as the metaclass of another
44     class to ensure that it will only have one instance.
45     Borrowed from
46     http://stackoverflow.com/questions/6760685/creating-a-singleton-in-python"""
47
48     _instances = {}
49     def __call__(cls, *args, **kwargs):
50         if cls not in cls._instances:
51             cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
52         return cls._instances[cls]
53
54
55 class SearchIndex(object):
56     """A singleton class used to interface with the IndexDen
57     search index."""
58
59     __metaclass__ = Singleton
60
61     def __init__(self):
62         self.index_name = INDEXDEN_INDEX
63
64         # If we're in production mode,
65         # or if we're in testing mode with indexing
66         # explicity turned on,
67         # do index setup stuff
68         if MOCK_MODE:
69             return
70
71         self.api_client = itc.ApiClient(INDEXDEN_PRIVATE_URL)
72         if not self.api_client.get_index(self.index_name).exists():
73             time.sleep(5)
74             self.api_client.create_index(self.index_name, {'public_search': False})
75
76         self.index = self.api_client.get_index(self.index_name)
77
78         while not self.index.has_started():
79             time.sleep(0.5)
80
81         # Default scoring function
82         # Results are sorted by combination of "relevance"
83         # and number of thanks they have received.
84         # "Relevance" is a black box provided by IndexDen.
85         self.index.add_function(0, 'relevance * log(doc.var[0])')
86
87     @staticmethod
88     def _tags_to_str(tags):
89         return ' '.join([str(tag) for tag in tags.all()])
90
91     @staticmethod
92     def _note_to_dict(note):
93         d = {
94             'name': note.name,
95             'text': note.text
96         }
97
98         if note.tags.exists():
99             d['tags'] = SearchIndex._tags_to_str(note.tags)
100
101         if note.course:
102             d['course_id'] = note.course.id
103
104         if note.uploaded_at:
105             d['timestamp'] = calendar.timegm(note.uploaded_at.timetuple())
106
107         return d
108
109     def delete_index(self):
110         """This is meant for test cases that want to clean up
111         after themselves."""
112         if MOCK_MODE:
113             return
114
115         self.api_client.delete_index(self.index_name)
116
117     def add_note(self, note):
118         """Add a note to the index. If the note is
119         already in the index, it will be overwritten."""
120         if MOCK_MODE:
121             return
122
123         if note.text:
124             logger.info("Indexing {n}".format(n=note))
125             self.index.add_document(note.id, SearchIndex._note_to_dict(note), variables={0: note.thanks})
126         else:
127             logger.info("Note {n} has no text, will not add to IndexDen".format(n=note))
128
129     def update_note(self, new_note, old_note):
130         """Update a note. Will only truly update the search
131         index if it needs to. Compares the fields in new_note with
132         old_note to see what has changed."""
133         if MOCK_MODE:
134             return
135
136         if not new_note.text:
137             logger.info("Note {n} has no text, will not add to IndexDen".format(n=new_note))
138             return
139
140         # If the indexable fields have changed,
141         # send the document to IndexDen again
142         if new_note.text != old_note.text or \
143             new_note.name != old_note.name or \
144             SearchIndex._tags_to_str(new_note.tags) != SearchIndex._tags_to_str(old_note.tags) or \
145             new_note.course != old_note.course or \
146             new_note.uploaded_at != old_note.uploaded_at:
147             logger.info("Indexing {n}".format(n=new_note))
148             self.index.add_document(new_note.id, SearchIndex._note_to_dict(new_note), variables={0: new_note.thanks})
149
150         # If only the thanks count has changed, we can
151         # just send that
152         elif new_note.thanks != old_note.thanks:
153             logger.info("Indexing thanks variable for {n}".format(n=new_note))
154             self.index.update_variables(new_note.id, variables={0: new_note.thanks})
155
156         # Otherwise we don't need to do anything
157         else:
158             logger.info("Note {n} has not changed sufficiently, will not update IndexDen".format(n=new_note))
159
160     def remove_note(self, note):
161         """Remove a note from the search index."""
162         if MOCK_MODE:
163             return
164
165         logger.info("Removing from index: {n}".format(n=note))
166         self.index.delete_document(note.id)
167
168     def search(self, query, course_id=None, page=0):
169         """Returns an instance of SearchResult for your query."""
170         if MOCK_MODE:
171             raise ImproperlyConfigured("Attempting to use SearchIndex while in test mode.")
172
173         if course_id:
174             real_query = '("%s" OR name:"%s") AND course_id:%s' % (query, query, course_id)
175         else:
176             real_query = '"%s" OR name:"%s"' % (query, query)
177
178         raw_results = self.index.search(real_query, snippet_fields=['text'],
179                                    length=PAGE_SIZE, start=(page*PAGE_SIZE))
180
181         ordered_ids = [int(r['docid']) for r in raw_results['results']]
182         snippet_dict = {int(r['docid']): r['snippet_text'] for r in raw_results['results']}
183
184         # Are there more results to show the user if they want?
185         has_more = True if int(raw_results['matches']) > ((page+1) * PAGE_SIZE) else False
186
187         return SearchResult(ordered_ids, snippet_dict, has_more)