1 # -*- coding: utf-8 -*-
2 from App
.class_init
import InitializeClass
3 from AccessControl
import ClassSecurityInfo
4 from Products
.CMFCore
.interfaces
import IIndexableObject
5 from Products
.CMFCore
.CatalogTool
import CatalogTool
as BaseCatalogTool
6 from Products
.CMFCore
.CatalogTool
import IndexableObjectWrapper
7 from Products
.PageTemplates
.PageTemplateFile
import PageTemplateFile
8 from Products
.CMFCore
.permissions
import ModifyPortalContent
, ManagePortal
9 from zope
.component
import queryMultiAdapter
10 from Products
.ZCatalog
.Catalog
import Catalog
14 # imports for Catalog class
15 from Products
.PluginIndexes
.interfaces
import ILimitedResultIndex
16 from Products
.ZCatalog
.Lazy
import LazyMap
, LazyCat
, LazyValues
17 from BTrees
.IIBTree
import intersection
, IISet
18 from BTrees
.IIBTree
import weightedIntersection
21 _VOLATILE_SOLR_NAME
= '_v_solrConnection'
23 class SolrTransactionHook
:
24 ''' commit solr couplé sur le commit de la ZODB '''
25 def __init__(self
, context
, con
) :
26 self
.context
= context
29 def __call__(self
, status
) :
36 delattr(self
.context
, _VOLATILE_SOLR_NAME
)
37 except AttributeError :
40 class CatalogTool(BaseCatalogTool
) :
41 meta_type
= 'Plinn Catalog'
42 security
= ClassSecurityInfo()
43 manage_options
= (BaseCatalogTool
.manage_options
[:5] +
44 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
45 BaseCatalogTool
.manage_options
[5:])
46 manage_solr
= PageTemplateFile('www/manage_solr.pt', globals(), __name__
='manage_solr')
50 def __init__(self
, idxs
=[]) :
51 super(CatalogTool
, self
).__init
__()
52 self
._catalog
= DelegatedCatalog(self
)
53 self
.solr_url
= 'http://localhost:8983/solr'
54 self
.delegatedIndexes
= ('Title', 'Description', 'SearchableText')
56 security
.declarePublic('getDelegatedIndexes')
57 def getDelegatedIndexes(self
) :
58 """ read the method name """
59 return self
.delegatedIndexes
61 security
.declareProtected(ManagePortal
, 'setDelegatedIndexes')
62 def setDelegatedIndexes(self
, indexes
, REQUEST
=None) :
63 """setDelegatedIndexes documentation"""
64 self
.delegatedIndexes
= tuple([i
.strip() for i
in indexes
if i
.strip()])
66 REQUEST
.RESPONSE
.redirect(self
.absolute_url() + '/manage_solr?manage_tabs_message=Saved changes.')
68 def _getSolrConnection(self
) :
69 if not hasattr(self
, _VOLATILE_SOLR_NAME
) :
70 con
= SolrConnection(self
.solr_url
)
71 setattr(self
, _VOLATILE_SOLR_NAME
, con
)
72 txn
= transaction
.get()
73 txn
.addAfterCommitHook(SolrTransactionHook(self
, con
))
74 return getattr(self
, _VOLATILE_SOLR_NAME
)
76 security
.declarePrivate('solrAdd')
77 def solrAdd(self
, w
, uid
, idxs
) :
78 idxs
= idxs
if idxs
else self
.delegatedIndexes
79 # Filter out delegated indexes
80 idxs
= [i
for i
in idxs
if i
in self
.delegatedIndexes
]
83 attr
= getattr(w
, name
, '')
84 data
[name
] = attr() if callable(attr
) else attr
85 c
= self
._getSolrConnection
()
88 # PortalCatalog api overloads
89 def catalog_object(self
, obj
, uid
=None, idxs
=None, update_metadata
=1,
91 # Wraps the object with workflow and accessibility
92 # information just before cataloging.
93 if IIndexableObject
.providedBy(obj
):
96 w
= queryMultiAdapter( (obj
, self
), IIndexableObject
)
99 w
= IndexableObjectWrapper(obj
, self
)
103 # Filter out invalid indexes.
104 valid_indexes
= self
._catalog
.indexes
.keys()
105 idxs_
= [i
for i
in idxs
if i
in valid_indexes
]
107 super(CatalogTool
, self
).catalog_object(w
, uid
, idxs_
, update_metadata
, pghandler
)
108 self
.solrAdd(w
, uid
, idxs
)
110 security
.declarePrivate('reindexObject')
111 def reindexObject(self
, object, idxs
=[], update_metadata
=1, uid
=None):
112 """Update catalog after object data has changed.
114 The optional idxs argument is a list of specific indexes
115 to update (all of them by default).
117 The update_metadata flag controls whether the object's
118 metadata record is updated as well.
120 If a non-None uid is passed, it will be used as the catalog uid
121 for the object instead of its physical path.
124 uid
= self
.__url
(object)
126 self
.catalog_object(object, uid
, idxs
, update_metadata
)
128 security
.declarePrivate('unindexObject')
129 def unindexObject(self
, object):
130 """Remove from catalog.
132 super(CatalogTool
, self
).unindexObject(object)
133 c
= self
._getSolrConnection
()
134 url
= self
.__url
(object)
137 InitializeClass(CatalogTool
)
140 class DelegatedCatalog(Catalog
) :
141 '''C'est ici qu'on délègue effectivement à Solr '''
143 def __init__(self
, zcat
, brains
=None) :
144 Catalog
.__init
__(self
, brains
=brains
)
147 def delegateSearch(self
, query
, plan
) :
150 None signifie : pas de délégation, il faut continuer à interroger les autres index.
151 IISet() vide : pas de résultat lors de la délégation, on peut arrêter la recherche.
153 indexes
= set(query
.keys()).intersection(set(self
.zcat
.delegatedIndexes
))
158 delegatedQuery
[i
] = query
.pop(i
)
160 except ValueError : pass
161 c
= SolrConnection(self
.zcat
.solr_url
)
162 q
=' AND '.join(['%s:"%s"' % item
for item
in delegatedQuery
.items()])
163 resp
= c
.query(q
, fields
='id', rows
=len(self
))
165 return IISet(filter(None, [self
.uids
.get(r
['id']) for r
in resp
.results
]))
167 def search(self
, query
, sort_index
=None, reverse
=0, limit
=None, merge
=1):
168 """Iterate through the indexes, applying the query to each one. If
169 merge is true then return a lazy result set (sorted if appropriate)
170 otherwise return the raw (possibly scored) results for later merging.
171 Limit is used in conjuntion with sorting or scored results to inform
172 the catalog how many results you are really interested in. The catalog
173 can then use optimizations to save time and memory. The number of
174 results is not guaranteed to fall within the limit however, you should
175 still slice or batch the results as usual."""
177 rs
= None # resultset
179 # Indexes fulfill a fairly large contract here. We hand each
180 # index the query mapping we are given (which may be composed
181 # of some combination of web request, kw mappings or plain old dicts)
182 # and the index decides what to do with it. If the index finds work
183 # for itself in the query, it returns the results and a tuple of
184 # the attributes that were used. If the index finds nothing for it
185 # to do then it returns None.
187 # Canonicalize the request into a sensible query before passing it on
188 query
= self
.make_query(query
)
190 cr
= self
.getCatalogPlan(query
)
195 plan
= self
._sorted
_search
_indexes
(query
)
198 rs
= self
.delegateSearch(query
, plan
)
199 if rs
is not None and not rs
:
202 indexes
= self
.indexes
.keys()
205 # We can have bogus keys or the plan can contain index names
206 # that have been removed in the meantime
209 index
= self
.getIndex(i
)
210 _apply_index
= getattr(index
, "_apply_index", None)
211 if _apply_index
is None:
215 limit_result
= ILimitedResultIndex
.providedBy(index
)
217 r
= _apply_index(query
, rs
)
219 r
= _apply_index(query
)
223 # Short circuit if empty result
224 # BBB: We can remove the "r is not None" check in Zope 2.14
225 # once we don't need to support the "return everything" case
227 if r
is not None and not r
:
228 cr
.stop_split(i
, result
=None, limit
=limit_result
)
231 # provide detailed info about the pure intersection time
232 intersect_id
= i
+ '#intersection'
233 cr
.start_split(intersect_id
)
234 # weightedIntersection preserves the values from any mappings
235 # we get, as some indexes don't return simple sets
236 if hasattr(rs
, 'items') or hasattr(r
, 'items'):
237 _
, rs
= weightedIntersection(rs
, r
)
239 rs
= intersection(rs
, r
)
241 cr
.stop_split(intersect_id
)
243 # consider the time it takes to intersect the index result with
244 # the total resultset to be part of the index time
245 cr
.stop_split(i
, result
=r
, limit
=limit_result
)
249 cr
.stop_split(i
, result
=None, limit
=limit_result
)
251 # Try to deduce the sort limit from batching arguments
252 b_start
= int(query
.get('b_start', 0))
253 b_size
= query
.get('b_size', None)
254 if b_size
is not None:
257 if b_size
is not None:
258 limit
= b_start
+ b_size
259 elif limit
and b_size
is None:
263 # None of the indexes found anything to do with the query
264 # We take this to mean that the query was empty (an empty filter)
265 # and so we return everything in the catalog
266 warnings
.warn('Your query %s produced no query restriction. '
267 'Currently the entire catalog content is returned. '
268 'In Zope 2.14 this will result in an empty LazyCat '
269 'to be returned.' % repr(cr
.make_key(query
)),
270 DeprecationWarning, stacklevel
=3)
273 if sort_index
is None:
274 sequence
, slen
= self
._limit
_sequence
(self
.data
.items(), rlen
,
276 result
= LazyMap(self
.instantiate
, sequence
, slen
,
277 actual_result_count
=rlen
)
279 cr
.start_split('sort_on')
280 result
= self
.sortResults(
281 self
.data
, sort_index
, reverse
, limit
, merge
,
282 actual_result_count
=rlen
, b_start
=b_start
,
284 cr
.stop_split('sort_on', None)
286 # We got some results from the indexes.
287 # Sort and convert to sequences.
288 # XXX: The check for 'values' is really stupid since we call
289 # items() and *not* values()
291 if sort_index
is None and hasattr(rs
, 'items'):
292 # having a 'items' means we have a data structure with
293 # scores. Build a new result set, sort it by score, reverse
294 # it, compute the normalized score, and Lazify it.
297 # Don't bother to sort here, return a list of
298 # three tuples to be passed later to mergeResults
299 # note that data_record_normalized_score_ cannot be
300 # calculated and will always be 1 in this case
301 getitem
= self
.__getitem
__
302 result
= [(score
, (1, score
, rid
), getitem
)
303 for rid
, score
in rs
.items()]
305 cr
.start_split('sort_on')
307 rs
= rs
.byValue(0) # sort it by score
308 max = float(rs
[0][0])
310 # Here we define our getter function inline so that
311 # we can conveniently store the max value as a default arg
312 # and make the normalized score computation lazy
313 def getScoredResult(item
, max=max, self
=self
):
315 Returns instances of self._v_brains, or whatever is
316 passed into self.useBrains.
319 r
=self
._v
_result
_class
(self
.data
[key
])\
320 .__of
__(aq_parent(self
))
321 r
.data_record_id_
= key
322 r
.data_record_score_
= score
323 r
.data_record_normalized_score_
= int(100. * score
/ max)
326 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
328 result
= LazyMap(getScoredResult
, sequence
, slen
,
329 actual_result_count
=rlen
)
330 cr
.stop_split('sort_on', None)
332 elif sort_index
is None and not hasattr(rs
, 'values'):
334 if hasattr(rs
, 'keys'):
336 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
338 result
= LazyMap(self
.__getitem
__, sequence
, slen
,
339 actual_result_count
=rlen
)
341 # sort. If there are scores, then this block is not
342 # reached, therefore 'sort-on' does not happen in the
343 # context of a text index query. This should probably
344 # sort by relevance first, then the 'sort-on' attribute.
345 cr
.start_split('sort_on')
346 result
= self
.sortResults(rs
, sort_index
, reverse
, limit
,
347 merge
, actual_result_count
=rlen
, b_start
=b_start
,
349 cr
.stop_split('sort_on', None)