365544a431aa5d33c311634a33ede5a6f14bba61
[Plinn.git] / catalog.py
1 # -*- coding: utf-8 -*-
2 from App.class_init import InitializeClass
3 from AccessControl import ClassSecurityInfo
4 from Products.CMFCore.interfaces import IIndexableObject
5 from Products.CMFCore.CatalogTool import CatalogTool as BaseCatalogTool
6 from Products.CMFCore.CatalogTool import IndexableObjectWrapper
7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
8 from Products.CMFCore.permissions import ModifyPortalContent
9 from zope.component import queryMultiAdapter
10 from Products.ZCatalog.Catalog import Catalog
11 import transaction
12 from solr import *
13
14 # imports for Catalog class
15 from Products.PluginIndexes.interfaces import ILimitedResultIndex
16 from Products.ZCatalog.Lazy import LazyMap, LazyCat, LazyValues
17 from BTrees.IIBTree import intersection, IISet
18 from BTrees.IIBTree import weightedIntersection
19 import warnings
20
21 class SolrTransactionHook :
22 ''' commit solr couplé sur le commit de la ZODB '''
23 def __init__(self, connection) :
24 self.connection = connection
25
26 def __call__(self, status) :
27 if status :
28 self.connection.commit()
29 self.connection.close()
30 else :
31 self.connection.close()
32
33 class CatalogTool(BaseCatalogTool) :
34 meta_type = 'Legivoc Catalog'
35 security = ClassSecurityInfo()
36 manage_options = (BaseCatalogTool.manage_options[:5] +
37 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
38 BaseCatalogTool.manage_options[5:])
39 manage_solr = PageTemplateFile('www/manage_solr', globals())
40
41
42 def __init__(self, idxs=[]) :
43 super(CatalogTool, self).__init__()
44 self._catalog = DelegatedCatalog()
45 self.solr_url = 'http://localhost:8983/solr'
46 self.delegatedIndexes = ('Title', 'Description', 'SearchableText')
47
48 security.declarePrivate('solrAdd')
49 def solrAdd(self, object, idxs=[], uid=None) :
50 if IIndexableObject.providedBy(object):
51 w = object
52 else:
53 w = queryMultiAdapter( (object, self), IIndexableObject )
54 if w is None:
55 # BBB
56 w = IndexableObjectWrapper(object, self)
57
58 uid = uid if uid else self.__url(object)
59 idxs = idxs if idxs !=[] else self.delegatedIndexes
60 data = {'id' : uid}
61 for name in idxs :
62 attr = getattr(w, name, '')
63 data[name] = attr() if callable(attr) else attr
64 c = SolrConnection(self.solr_url)
65 c.add(**data)
66 txn = transaction.get()
67 txn.addAfterCommitHook(SolrTransactionHook(c))
68
69
70 # PortalCatalog api overloads
71 security.declareProtected(ModifyPortalContent, 'indexObject')
72 def indexObject(self, object) :
73 """ Add to catalog and send to Solr """
74 super(CatalogTool, self).indexObject(object)
75 self.solrAdd(object)
76
77 security.declarePrivate('reindexObject')
78 def reindexObject(self, object, idxs=[], update_metadata=1, uid=None):
79 super(CatalogTool, self).reindexObject(object,
80 idxs=idxs,
81 update_metadata=update_metadata,
82 uid=uid)
83 if idxs != []:
84 # Filter out invalid indexes.
85 valid_indexes = self._catalog.indexes.keys()
86 idxs = [i for i in idxs if i in valid_indexes and i in self.delegatedIndexes]
87 else :
88 idxs = self.delegatedIndexes
89
90 if idxs :
91 self.solrAdd(object, idxs=idxs, uid=uid)
92
93 security.declarePrivate('unindexObject')
94 def unindexObject(self, object):
95 """Remove from catalog.
96 """
97 super(CatalogTool, self).unindexObject(object)
98 c = SolrConnection(self.solr_url)
99 url = self.__url(object)
100 c.delete(id=url)
101 txn = transaction.get()
102 txn.addAfterCommitHook(SolrTransactionHook(c))
103
104 InitializeClass(CatalogTool)
105
106
107 class DelegatedCatalog(Catalog) :
108 '''C'est ici qu'on délègue effectivement à Solr '''
109
110 def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
111 """Iterate through the indexes, applying the query to each one. If
112 merge is true then return a lazy result set (sorted if appropriate)
113 otherwise return the raw (possibly scored) results for later merging.
114 Limit is used in conjuntion with sorting or scored results to inform
115 the catalog how many results you are really interested in. The catalog
116 can then use optimizations to save time and memory. The number of
117 results is not guaranteed to fall within the limit however, you should
118 still slice or batch the results as usual."""
119
120 rs = None # resultset
121
122 # Indexes fulfill a fairly large contract here. We hand each
123 # index the query mapping we are given (which may be composed
124 # of some combination of web request, kw mappings or plain old dicts)
125 # and the index decides what to do with it. If the index finds work
126 # for itself in the query, it returns the results and a tuple of
127 # the attributes that were used. If the index finds nothing for it
128 # to do then it returns None.
129
130 # Canonicalize the request into a sensible query before passing it on
131 query = self.make_query(query)
132
133 cr = self.getCatalogPlan(query)
134 cr.start()
135
136 plan = cr.plan()
137 if not plan:
138 plan = self._sorted_search_indexes(query)
139
140 indexes = self.indexes.keys()
141 for i in plan:
142 if i not in indexes:
143 # We can have bogus keys or the plan can contain index names
144 # that have been removed in the meantime
145 continue
146
147 index = self.getIndex(i)
148 _apply_index = getattr(index, "_apply_index", None)
149 if _apply_index is None:
150 continue
151
152 cr.start_split(i)
153 limit_result = ILimitedResultIndex.providedBy(index)
154 if limit_result:
155 r = _apply_index(query, rs)
156 else:
157 r = _apply_index(query)
158
159 if r is not None:
160 r, u = r
161 # Short circuit if empty result
162 # BBB: We can remove the "r is not None" check in Zope 2.14
163 # once we don't need to support the "return everything" case
164 # anymore
165 if r is not None and not r:
166 cr.stop_split(i, result=None, limit=limit_result)
167 return LazyCat([])
168
169 # provide detailed info about the pure intersection time
170 intersect_id = i + '#intersection'
171 cr.start_split(intersect_id)
172 # weightedIntersection preserves the values from any mappings
173 # we get, as some indexes don't return simple sets
174 if hasattr(rs, 'items') or hasattr(r, 'items'):
175 _, rs = weightedIntersection(rs, r)
176 else:
177 rs = intersection(rs, r)
178
179 cr.stop_split(intersect_id)
180
181 # consider the time it takes to intersect the index result with
182 # the total resultset to be part of the index time
183 cr.stop_split(i, result=r, limit=limit_result)
184 if not rs:
185 break
186 else:
187 cr.stop_split(i, result=None, limit=limit_result)
188
189 # Try to deduce the sort limit from batching arguments
190 b_start = int(query.get('b_start', 0))
191 b_size = query.get('b_size', None)
192 if b_size is not None:
193 b_size = int(b_size)
194
195 if b_size is not None:
196 limit = b_start + b_size
197 elif limit and b_size is None:
198 b_size = limit
199
200 if rs is None:
201 # None of the indexes found anything to do with the query
202 # We take this to mean that the query was empty (an empty filter)
203 # and so we return everything in the catalog
204 warnings.warn('Your query %s produced no query restriction. '
205 'Currently the entire catalog content is returned. '
206 'In Zope 2.14 this will result in an empty LazyCat '
207 'to be returned.' % repr(cr.make_key(query)),
208 DeprecationWarning, stacklevel=3)
209
210 rlen = len(self)
211 if sort_index is None:
212 sequence, slen = self._limit_sequence(self.data.items(), rlen,
213 b_start, b_size)
214 result = LazyMap(self.instantiate, sequence, slen,
215 actual_result_count=rlen)
216 else:
217 cr.start_split('sort_on')
218 result = self.sortResults(
219 self.data, sort_index, reverse, limit, merge,
220 actual_result_count=rlen, b_start=b_start,
221 b_size=b_size)
222 cr.stop_split('sort_on', None)
223 elif rs:
224 # We got some results from the indexes.
225 # Sort and convert to sequences.
226 # XXX: The check for 'values' is really stupid since we call
227 # items() and *not* values()
228 rlen = len(rs)
229 if sort_index is None and hasattr(rs, 'items'):
230 # having a 'items' means we have a data structure with
231 # scores. Build a new result set, sort it by score, reverse
232 # it, compute the normalized score, and Lazify it.
233
234 if not merge:
235 # Don't bother to sort here, return a list of
236 # three tuples to be passed later to mergeResults
237 # note that data_record_normalized_score_ cannot be
238 # calculated and will always be 1 in this case
239 getitem = self.__getitem__
240 result = [(score, (1, score, rid), getitem)
241 for rid, score in rs.items()]
242 else:
243 cr.start_split('sort_on')
244
245 rs = rs.byValue(0) # sort it by score
246 max = float(rs[0][0])
247
248 # Here we define our getter function inline so that
249 # we can conveniently store the max value as a default arg
250 # and make the normalized score computation lazy
251 def getScoredResult(item, max=max, self=self):
252 """
253 Returns instances of self._v_brains, or whatever is
254 passed into self.useBrains.
255 """
256 score, key = item
257 r=self._v_result_class(self.data[key])\
258 .__of__(aq_parent(self))
259 r.data_record_id_ = key
260 r.data_record_score_ = score
261 r.data_record_normalized_score_ = int(100. * score / max)
262 return r
263
264 sequence, slen = self._limit_sequence(rs, rlen, b_start,
265 b_size)
266 result = LazyMap(getScoredResult, sequence, slen,
267 actual_result_count=rlen)
268 cr.stop_split('sort_on', None)
269
270 elif sort_index is None and not hasattr(rs, 'values'):
271 # no scores
272 if hasattr(rs, 'keys'):
273 rs = rs.keys()
274 sequence, slen = self._limit_sequence(rs, rlen, b_start,
275 b_size)
276 result = LazyMap(self.__getitem__, sequence, slen,
277 actual_result_count=rlen)
278 else:
279 # sort. If there are scores, then this block is not
280 # reached, therefore 'sort-on' does not happen in the
281 # context of a text index query. This should probably
282 # sort by relevance first, then the 'sort-on' attribute.
283 cr.start_split('sort_on')
284 result = self.sortResults(rs, sort_index, reverse, limit,
285 merge, actual_result_count=rlen, b_start=b_start,
286 b_size=b_size)
287 cr.stop_split('sort_on', None)
288 else:
289 # Empty result set
290 result = LazyCat([])
291 cr.stop()
292 return result