382fc5715381622bafe74d4a96646fd7a999b2be
[Plinn.git] / catalog.py
1 # -*- coding: utf-8 -*-
2 from App.class_init import InitializeClass
3 from AccessControl import ClassSecurityInfo
4 from Products.CMFCore.interfaces import IIndexableObject
5 from Products.CMFCore.CatalogTool import CatalogTool as BaseCatalogTool
6 from Products.CMFCore.CatalogTool import IndexableObjectWrapper
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
8 from Products.CMFCore.permissions import ModifyPortalContent
9 from zope.component import queryMultiAdapter
10 from Products.ZCatalog.Catalog import Catalog
11 import transaction
12 from solr import *
13
14 class SolrTransactionHook :
15 ''' commit solr couplé sur le commit de la ZODB '''
16 def __init__(self, connection) :
17 self.connection = connection
18
19 def __call__(self, status) :
20 if status :
21 self.connection.commit()
22 self.connection.close()
23 else :
24 self.connection.close()
25
26 class CatalogTool(BaseCatalogTool) :
27 meta_type = 'Legivoc Catalog'
28 security = ClassSecurityInfo()
29 manage_options = (BaseCatalogTool.manage_options[:5] +
30 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
31 BaseCatalogTool.manage_options[5:])
32 manage_solr = PageTemplateFile('www/manage_solr', globals())
33
34
35 def __init__(self, idxs=[]) :
36 super(CatalogTool, self).__init__()
37 self._catalog = DelegatedCatalog()
38 self.solr_url = 'http://localhost:8983/solr'
39 self.delegatedIndexes = ('Title', 'Description', 'SearchableText')
40
41 security.declarePrivate('solrAdd')
42 def solrAdd(self, object, idxs=[], uid=None) :
43 if IIndexableObject.providedBy(object):
44 w = object
45 else:
46 w = queryMultiAdapter( (object, self), IIndexableObject )
47 if w is None:
48 # BBB
49 w = IndexableObjectWrapper(object, self)
50
51 uid = uid if uid else self.__url(object)
52 idxs = idxs if idxs !=[] else self.delegatedIndexes
53 data = {'id' : uid}
54 for name in idxs :
55 attr = getattr(w, name, '')
56 data[name] = attr() if callable(attr) else attr
57 c = SolrConnection(self.solr_url)
58 c.add(**data)
59 txn = transaction.get()
60 txn.addAfterCommitHook(SolrTransactionHook(c))
61
62
63 # PortalCatalog api overloads
64 security.declareProtected(ModifyPortalContent, 'indexObject')
65 def indexObject(self, object) :
66 """ Add to catalog and send to Solr """
67 super(CatalogTool, self).indexObject(object)
68 self.solrAdd(object)
69
70 security.declarePrivate('reindexObject')
71 def reindexObject(self, object, idxs=[], update_metadata=1, uid=None):
72 super(CatalogTool, self).reindexObject(object,
73 idxs=idxs,
74 update_metadata=update_metadata,
75 uid=uid)
76 if idxs != []:
77 # Filter out invalid indexes.
78 valid_indexes = self._catalog.indexes.keys()
79 idxs = [i for i in idxs if i in valid_indexes and i in self.delegatedIndexes]
80 else :
81 idxs = self.delegatedIndexes
82
83 if idxs :
84 self.solrAdd(object, idxs=idxs, uid=uid)
85
86 security.declarePrivate('unindexObject')
87 def unindexObject(self, object):
88 """Remove from catalog.
89 """
90 super(CatalogTool, self).unindexObject(object)
91 c = SolrConnection(self.solr_url)
92 url = self.__url(object)
93 c.delete(id=url)
94 txn = transaction.get()
95 txn.addAfterCommitHook(SolrTransactionHook(c))
96
97 InitializeClass(CatalogTool)
98
99
100 class DelegatedCatalog(Catalog) :
101 '''C'est ici qu'on délègue effectivement à Solr '''
102
103 def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
104 """Iterate through the indexes, applying the query to each one. If
105 merge is true then return a lazy result set (sorted if appropriate)
106 otherwise return the raw (possibly scored) results for later merging.
107 Limit is used in conjuntion with sorting or scored results to inform
108 the catalog how many results you are really interested in. The catalog
109 can then use optimizations to save time and memory. The number of
110 results is not guaranteed to fall within the limit however, you should
111 still slice or batch the results as usual."""
112
113 rs = None # resultset
114
115 # Indexes fulfill a fairly large contract here. We hand each
116 # index the query mapping we are given (which may be composed
117 # of some combination of web request, kw mappings or plain old dicts)
118 # and the index decides what to do with it. If the index finds work
119 # for itself in the query, it returns the results and a tuple of
120 # the attributes that were used. If the index finds nothing for it
121 # to do then it returns None.
122
123 # Canonicalize the request into a sensible query before passing it on
124 query = self.make_query(query)
125
126 cr = self.getCatalogPlan(query)
127 cr.start()
128
129 plan = cr.plan()
130 if not plan:
131 plan = self._sorted_search_indexes(query)
132
133 indexes = self.indexes.keys()
134 for i in plan:
135 if i not in indexes:
136 # We can have bogus keys or the plan can contain index names
137 # that have been removed in the meantime
138 continue
139
140 index = self.getIndex(i)
141 _apply_index = getattr(index, "_apply_index", None)
142 if _apply_index is None:
143 continue
144
145 cr.start_split(i)
146 limit_result = ILimitedResultIndex.providedBy(index)
147 if limit_result:
148 r = _apply_index(query, rs)
149 else:
150 r = _apply_index(query)
151
152 if r is not None:
153 r, u = r
154 # Short circuit if empty result
155 # BBB: We can remove the "r is not None" check in Zope 2.14
156 # once we don't need to support the "return everything" case
157 # anymore
158 if r is not None and not r:
159 cr.stop_split(i, result=None, limit=limit_result)
160 return LazyCat([])
161
162 # provide detailed info about the pure intersection time
163 intersect_id = i + '#intersection'
164 cr.start_split(intersect_id)
165 # weightedIntersection preserves the values from any mappings
166 # we get, as some indexes don't return simple sets
167 if hasattr(rs, 'items') or hasattr(r, 'items'):
168 _, rs = weightedIntersection(rs, r)
169 else:
170 rs = intersection(rs, r)
171
172 cr.stop_split(intersect_id)
173
174 # consider the time it takes to intersect the index result with
175 # the total resultset to be part of the index time
176 cr.stop_split(i, result=r, limit=limit_result)
177 if not rs:
178 break
179 else:
180 cr.stop_split(i, result=None, limit=limit_result)
181
182 # Try to deduce the sort limit from batching arguments
183 b_start = int(query.get('b_start', 0))
184 b_size = query.get('b_size', None)
185 if b_size is not None:
186 b_size = int(b_size)
187
188 if b_size is not None:
189 limit = b_start + b_size
190 elif limit and b_size is None:
191 b_size = limit
192
193 if rs is None:
194 # None of the indexes found anything to do with the query
195 # We take this to mean that the query was empty (an empty filter)
196 # and so we return everything in the catalog
197 warnings.warn('Your query %s produced no query restriction. '
198 'Currently the entire catalog content is returned. '
199 'In Zope 2.14 this will result in an empty LazyCat '
200 'to be returned.' % repr(cr.make_key(query)),
201 DeprecationWarning, stacklevel=3)
202
203 rlen = len(self)
204 if sort_index is None:
205 sequence, slen = self._limit_sequence(self.data.items(), rlen,
206 b_start, b_size)
207 result = LazyMap(self.instantiate, sequence, slen,
208 actual_result_count=rlen)
209 else:
210 cr.start_split('sort_on')
211 result = self.sortResults(
212 self.data, sort_index, reverse, limit, merge,
213 actual_result_count=rlen, b_start=b_start,
214 b_size=b_size)
215 cr.stop_split('sort_on', None)
216 elif rs:
217 # We got some results from the indexes.
218 # Sort and convert to sequences.
219 # XXX: The check for 'values' is really stupid since we call
220 # items() and *not* values()
221 rlen = len(rs)
222 if sort_index is None and hasattr(rs, 'items'):
223 # having a 'items' means we have a data structure with
224 # scores. Build a new result set, sort it by score, reverse
225 # it, compute the normalized score, and Lazify it.
226
227 if not merge:
228 # Don't bother to sort here, return a list of
229 # three tuples to be passed later to mergeResults
230 # note that data_record_normalized_score_ cannot be
231 # calculated and will always be 1 in this case
232 getitem = self.__getitem__
233 result = [(score, (1, score, rid), getitem)
234 for rid, score in rs.items()]
235 else:
236 cr.start_split('sort_on')
237
238 rs = rs.byValue(0) # sort it by score
239 max = float(rs[0][0])
240
241 # Here we define our getter function inline so that
242 # we can conveniently store the max value as a default arg
243 # and make the normalized score computation lazy
244 def getScoredResult(item, max=max, self=self):
245 """
246 Returns instances of self._v_brains, or whatever is
247 passed into self.useBrains.
248 """
249 score, key = item
250 r=self._v_result_class(self.data[key])\
251 .__of__(aq_parent(self))
252 r.data_record_id_ = key
253 r.data_record_score_ = score
254 r.data_record_normalized_score_ = int(100. * score / max)
255 return r
256
257 sequence, slen = self._limit_sequence(rs, rlen, b_start,
258 b_size)
259 result = LazyMap(getScoredResult, sequence, slen,
260 actual_result_count=rlen)
261 cr.stop_split('sort_on', None)
262
263 elif sort_index is None and not hasattr(rs, 'values'):
264 # no scores
265 if hasattr(rs, 'keys'):
266 rs = rs.keys()
267 sequence, slen = self._limit_sequence(rs, rlen, b_start,
268 b_size)
269 result = LazyMap(self.__getitem__, sequence, slen,
270 actual_result_count=rlen)
271 else:
272 # sort. If there are scores, then this block is not
273 # reached, therefore 'sort-on' does not happen in the
274 # context of a text index query. This should probably
275 # sort by relevance first, then the 'sort-on' attribute.
276 cr.start_split('sort_on')
277 result = self.sortResults(rs, sort_index, reverse, limit,
278 merge, actual_result_count=rlen, b_start=b_start,
279 b_size=b_size)
280 cr.stop_split('sort_on', None)
281 else:
282 # Empty result set
283 result = LazyCat([])
284 cr.stop()
285 return result