1 # -*- coding: utf-8 -*-
2 from App
.class_init
import InitializeClass
3 from AccessControl
import ClassSecurityInfo
4 from Products
.CMFCore
.interfaces
import IIndexableObject
5 from Products
.CMFCore
.CatalogTool
import CatalogTool
as BaseCatalogTool
6 from Products
.CMFCore
.CatalogTool
import IndexableObjectWrapper
7 from Products
.PageTemplates
.PageTemplateFile
import PageTemplateFile
8 from Products
.CMFCore
.permissions
import ModifyPortalContent
, ManagePortal
9 from zope
.component
import queryMultiAdapter
10 from Products
.ZCatalog
.Catalog
import Catalog
14 # imports for Catalog class
15 from Products
.PluginIndexes
.interfaces
import ILimitedResultIndex
16 from Products
.ZCatalog
.Lazy
import LazyMap
, LazyCat
, LazyValues
17 from BTrees
.IIBTree
import intersection
, IISet
18 from BTrees
.IIBTree
import weightedIntersection
21 _VOLATILE_SOLR_NAME
= '_v_solrConnection'
23 class SolrTransactionHook
:
24 ''' commit solr couplé sur le commit de la ZODB '''
25 def __init__(self
, context
, con
) :
26 self
.context
= context
29 def __call__(self
, status
) :
36 delattr(self
.context
, _VOLATILE_SOLR_NAME
)
37 except AttributeError :
40 class CatalogTool(BaseCatalogTool
) :
41 meta_type
= 'Plinn Catalog'
42 security
= ClassSecurityInfo()
43 manage_options
= (BaseCatalogTool
.manage_options
[:5] +
44 ({'label' : 'Solr', 'action' : 'manage_solr'},) +
45 BaseCatalogTool
.manage_options
[5:])
46 manage_solr
= PageTemplateFile('www/manage_solr.pt', globals(), __name__
='manage_solr')
50 def __init__(self
, idxs
=[]) :
51 super(CatalogTool
, self
).__init
__()
52 self
._catalog
= DelegatedCatalog(self
)
53 self
.solr_url
= 'http://localhost:8983/solr'
54 self
.delegatedIndexes
= ('Title', 'Description', 'SearchableText')
56 security
.declarePublic('getDelegatedIndexes')
57 def getDelegatedIndexes(self
) :
58 """ read the method name """
59 return self
.delegatedIndexes
61 security
.declareProtected(ManagePortal
, 'setSolrProperties')
62 def setSolrProperties(self
, url
, indexes
, REQUEST
=None) :
63 """ set Solr server url and delegated indexes """
65 self
.delegatedIndexes
= tuple([i
.strip() for i
in indexes
if i
.strip()])
67 REQUEST
.RESPONSE
.redirect(self
.absolute_url() + '/manage_solr?manage_tabs_message=Saved changes.')
69 def _getSolrConnection(self
) :
70 if not hasattr(self
, _VOLATILE_SOLR_NAME
) :
71 con
= SolrConnection(self
.solr_url
)
72 setattr(self
, _VOLATILE_SOLR_NAME
, con
)
73 txn
= transaction
.get()
74 txn
.addAfterCommitHook(SolrTransactionHook(self
, con
))
75 return getattr(self
, _VOLATILE_SOLR_NAME
)
77 security
.declarePrivate('solrAdd')
78 def solrAdd(self
, w
, uid
, idxs
) :
79 idxs
= idxs
if idxs
else self
.delegatedIndexes
80 # Filter out delegated indexes
81 idxs
= [i
for i
in idxs
if i
in self
.delegatedIndexes
]
84 attr
= getattr(w
, name
, '')
85 data
[name
] = attr() if callable(attr
) else attr
86 c
= self
._getSolrConnection
()
89 # PortalCatalog api overloads
90 def catalog_object(self
, obj
, uid
=None, idxs
=None, update_metadata
=1,
92 # Wraps the object with workflow and accessibility
93 # information just before cataloging.
94 if IIndexableObject
.providedBy(obj
):
97 w
= queryMultiAdapter( (obj
, self
), IIndexableObject
)
100 w
= IndexableObjectWrapper(obj
, self
)
104 # Filter out invalid indexes.
105 valid_indexes
= self
._catalog
.indexes
.keys()
106 idxs_
= [i
for i
in idxs
if i
in valid_indexes
]
108 super(CatalogTool
, self
).catalog_object(w
, uid
, idxs_
, update_metadata
, pghandler
)
109 self
.solrAdd(w
, uid
, idxs
)
111 security
.declarePrivate('reindexObject')
112 def reindexObject(self
, object, idxs
=[], update_metadata
=1, uid
=None):
113 """Update catalog after object data has changed.
115 The optional idxs argument is a list of specific indexes
116 to update (all of them by default).
118 The update_metadata flag controls whether the object's
119 metadata record is updated as well.
121 If a non-None uid is passed, it will be used as the catalog uid
122 for the object instead of its physical path.
125 uid
= self
.__url
(object)
127 self
.catalog_object(object, uid
, idxs
, update_metadata
)
129 security
.declarePrivate('unindexObject')
130 def unindexObject(self
, object):
131 """Remove from catalog.
133 super(CatalogTool
, self
).unindexObject(object)
134 c
= self
._getSolrConnection
()
135 url
= self
.__url
(object)
138 InitializeClass(CatalogTool
)
141 class DelegatedCatalog(Catalog
) :
142 '''C'est ici qu'on délègue effectivement à Solr '''
144 def __init__(self
, zcat
, brains
=None) :
145 Catalog
.__init
__(self
, brains
=brains
)
148 def delegateSearch(self
, query
, plan
) :
151 None signifie : pas de délégation, il faut continuer à interroger les autres index.
152 IISet() vide : pas de résultat lors de la délégation, on peut arrêter la recherche.
154 indexes
= set(query
.keys()).intersection(set(self
.zcat
.delegatedIndexes
))
159 delegatedQuery
[i
] = query
.pop(i
)
161 except ValueError : pass
162 c
= SolrConnection(self
.zcat
.solr_url
)
163 q
=' AND '.join(['%s:"%s"' % item
for item
in delegatedQuery
.items()])
164 resp
= c
.query(q
, fields
='id', rows
=len(self
))
166 return IISet(filter(None, [self
.uids
.get(r
['id']) for r
in resp
.results
]))
168 def search(self
, query
, sort_index
=None, reverse
=0, limit
=None, merge
=1):
169 """Iterate through the indexes, applying the query to each one. If
170 merge is true then return a lazy result set (sorted if appropriate)
171 otherwise return the raw (possibly scored) results for later merging.
172 Limit is used in conjuntion with sorting or scored results to inform
173 the catalog how many results you are really interested in. The catalog
174 can then use optimizations to save time and memory. The number of
175 results is not guaranteed to fall within the limit however, you should
176 still slice or batch the results as usual."""
178 rs
= None # resultset
180 # Indexes fulfill a fairly large contract here. We hand each
181 # index the query mapping we are given (which may be composed
182 # of some combination of web request, kw mappings or plain old dicts)
183 # and the index decides what to do with it. If the index finds work
184 # for itself in the query, it returns the results and a tuple of
185 # the attributes that were used. If the index finds nothing for it
186 # to do then it returns None.
188 # Canonicalize the request into a sensible query before passing it on
189 query
= self
.make_query(query
)
191 cr
= self
.getCatalogPlan(query
)
196 plan
= self
._sorted
_search
_indexes
(query
)
199 rs
= self
.delegateSearch(query
, plan
)
200 if rs
is not None and not rs
:
203 indexes
= self
.indexes
.keys()
206 # We can have bogus keys or the plan can contain index names
207 # that have been removed in the meantime
210 index
= self
.getIndex(i
)
211 _apply_index
= getattr(index
, "_apply_index", None)
212 if _apply_index
is None:
216 limit_result
= ILimitedResultIndex
.providedBy(index
)
218 r
= _apply_index(query
, rs
)
220 r
= _apply_index(query
)
224 # Short circuit if empty result
225 # BBB: We can remove the "r is not None" check in Zope 2.14
226 # once we don't need to support the "return everything" case
228 if r
is not None and not r
:
229 cr
.stop_split(i
, result
=None, limit
=limit_result
)
232 # provide detailed info about the pure intersection time
233 intersect_id
= i
+ '#intersection'
234 cr
.start_split(intersect_id
)
235 # weightedIntersection preserves the values from any mappings
236 # we get, as some indexes don't return simple sets
237 if hasattr(rs
, 'items') or hasattr(r
, 'items'):
238 _
, rs
= weightedIntersection(rs
, r
)
240 rs
= intersection(rs
, r
)
242 cr
.stop_split(intersect_id
)
244 # consider the time it takes to intersect the index result with
245 # the total resultset to be part of the index time
246 cr
.stop_split(i
, result
=r
, limit
=limit_result
)
250 cr
.stop_split(i
, result
=None, limit
=limit_result
)
252 # Try to deduce the sort limit from batching arguments
253 b_start
= int(query
.get('b_start', 0))
254 b_size
= query
.get('b_size', None)
255 if b_size
is not None:
258 if b_size
is not None:
259 limit
= b_start
+ b_size
260 elif limit
and b_size
is None:
264 # None of the indexes found anything to do with the query
265 # We take this to mean that the query was empty (an empty filter)
266 # and so we return everything in the catalog
267 warnings
.warn('Your query %s produced no query restriction. '
268 'Currently the entire catalog content is returned. '
269 'In Zope 2.14 this will result in an empty LazyCat '
270 'to be returned.' % repr(cr
.make_key(query
)),
271 DeprecationWarning, stacklevel
=3)
274 if sort_index
is None:
275 sequence
, slen
= self
._limit
_sequence
(self
.data
.items(), rlen
,
277 result
= LazyMap(self
.instantiate
, sequence
, slen
,
278 actual_result_count
=rlen
)
280 cr
.start_split('sort_on')
281 result
= self
.sortResults(
282 self
.data
, sort_index
, reverse
, limit
, merge
,
283 actual_result_count
=rlen
, b_start
=b_start
,
285 cr
.stop_split('sort_on', None)
287 # We got some results from the indexes.
288 # Sort and convert to sequences.
289 # XXX: The check for 'values' is really stupid since we call
290 # items() and *not* values()
292 if sort_index
is None and hasattr(rs
, 'items'):
293 # having a 'items' means we have a data structure with
294 # scores. Build a new result set, sort it by score, reverse
295 # it, compute the normalized score, and Lazify it.
298 # Don't bother to sort here, return a list of
299 # three tuples to be passed later to mergeResults
300 # note that data_record_normalized_score_ cannot be
301 # calculated and will always be 1 in this case
302 getitem
= self
.__getitem
__
303 result
= [(score
, (1, score
, rid
), getitem
)
304 for rid
, score
in rs
.items()]
306 cr
.start_split('sort_on')
308 rs
= rs
.byValue(0) # sort it by score
309 max = float(rs
[0][0])
311 # Here we define our getter function inline so that
312 # we can conveniently store the max value as a default arg
313 # and make the normalized score computation lazy
314 def getScoredResult(item
, max=max, self
=self
):
316 Returns instances of self._v_brains, or whatever is
317 passed into self.useBrains.
320 r
=self
._v
_result
_class
(self
.data
[key
])\
321 .__of
__(aq_parent(self
))
322 r
.data_record_id_
= key
323 r
.data_record_score_
= score
324 r
.data_record_normalized_score_
= int(100. * score
/ max)
327 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
329 result
= LazyMap(getScoredResult
, sequence
, slen
,
330 actual_result_count
=rlen
)
331 cr
.stop_split('sort_on', None)
333 elif sort_index
is None and not hasattr(rs
, 'values'):
335 if hasattr(rs
, 'keys'):
337 sequence
, slen
= self
._limit
_sequence
(rs
, rlen
, b_start
,
339 result
= LazyMap(self
.__getitem
__, sequence
, slen
,
340 actual_result_count
=rlen
)
342 # sort. If there are scores, then this block is not
343 # reached, therefore 'sort-on' does not happen in the
344 # context of a text index query. This should probably
345 # sort by relevance first, then the 'sort-on' attribute.
346 cr
.start_split('sort_on')
347 result
= self
.sortResults(rs
, sort_index
, reverse
, limit
,
348 merge
, actual_result_count
=rlen
, b_start
=b_start
,
350 cr
.stop_split('sort_on', None)