catalog.py

   1 # -*- coding: utf-8 -*-
   2 from App.class_init import InitializeClass
   3 from AccessControl import ClassSecurityInfo
   4 from Products.CMFCore.interfaces import IIndexableObject
   5 from Products.CMFCore.CatalogTool import CatalogTool as BaseCatalogTool
   6 from Products.CMFCore.CatalogTool import IndexableObjectWrapper
   7 from Products.PageTemplates.PageTemplateFile import PageTemplateFile
   8 from Products.CMFCore.permissions import ModifyPortalContent
   9 from zope.component import queryMultiAdapter
  10 from Products.ZCatalog.Catalog import Catalog
  11 import transaction
  12 from solr import *
  13
  14 class SolrTransactionHook :
  15     ''' commit solr couplé sur le commit de la ZODB '''
  16     def __init__(self, connection) :
  17         self.connection = connection
  18
  19     def __call__(self, status) :
  20         if status :
  21             self.connection.commit()
  22             self.connection.close()
  23         else :
  24             self.connection.close()
  25
  26 class CatalogTool(BaseCatalogTool) :
  27     meta_type = 'Legivoc Catalog'
  28     security = ClassSecurityInfo()
  29     manage_options = (BaseCatalogTool.manage_options[:5] +
  30                       ({'label' : 'Solr', 'action' : 'manage_solr'},) +
  31                       BaseCatalogTool.manage_options[5:])
  32     manage_solr = PageTemplateFile('www/manage_solr', globals())
  33
  34
  35     def __init__(self, idxs=[]) :
  36         super(CatalogTool, self).__init__()
  37         self._catalog = DelegatedCatalog()
  38         self.solr_url = 'http://localhost:8983/solr'
  39         self.delegatedIndexes = ('Title', 'Description', 'SearchableText')
  40
  41     security.declarePrivate('solrAdd')
  42     def solrAdd(self, object, idxs=[], uid=None) :
  43         if IIndexableObject.providedBy(object):
  44             w = object
  45         else:
  46             w = queryMultiAdapter( (object, self), IIndexableObject )
  47             if w is None:
  48                 # BBB
  49                 w = IndexableObjectWrapper(object, self)
  50
  51         uid = uid if uid else self.__url(object)
  52         idxs = idxs if idxs !=[] else self.delegatedIndexes
  53         data = {'id' : uid}
  54         for name in idxs :
  55             attr = getattr(w, name, '')
  56             data[name] = attr() if callable(attr) else attr
  57         c = SolrConnection(self.solr_url)
  58         c.add(**data)
  59         txn = transaction.get()
  60         txn.addAfterCommitHook(SolrTransactionHook(c))
  61
  62
  63     # PortalCatalog api overloads
  64     security.declareProtected(ModifyPortalContent, 'indexObject')
  65     def indexObject(self, object) :
  66         """ Add to catalog and send to Solr """
  67         super(CatalogTool, self).indexObject(object)
  68         self.solrAdd(object)
  69
  70     security.declarePrivate('reindexObject')
  71     def reindexObject(self, object, idxs=[], update_metadata=1, uid=None):
  72         super(CatalogTool, self).reindexObject(object,
  73                                                idxs=idxs,
  74                                                update_metadata=update_metadata,
  75                                                uid=uid)
  76         if idxs != []:
  77             # Filter out invalid indexes.
  78             valid_indexes = self._catalog.indexes.keys()
  79             idxs = [i for i in idxs if i in valid_indexes and i in self.delegatedIndexes]
  80         else :
  81             idxs = self.delegatedIndexes
  82
  83         if idxs :
  84             self.solrAdd(object, idxs=idxs, uid=uid)
  85
  86     security.declarePrivate('unindexObject')
  87     def unindexObject(self, object):
  88         """Remove from catalog.
  89         """
  90         super(CatalogTool, self).unindexObject(object)
  91         c = SolrConnection(self.solr_url)
  92         url = self.__url(object)
  93         c.delete(id=url)
  94         txn = transaction.get()
  95         txn.addAfterCommitHook(SolrTransactionHook(c))
  96
  97 InitializeClass(CatalogTool)
  98
  99
 100 class DelegatedCatalog(Catalog) :
 101     '''C'est ici qu'on délègue effectivement à Solr '''
 102
 103     def search(self, query, sort_index=None, reverse=0, limit=None, merge=1):
 104         """Iterate through the indexes, applying the query to each one. If
 105         merge is true then return a lazy result set (sorted if appropriate)
 106         otherwise return the raw (possibly scored) results for later merging.
 107         Limit is used in conjuntion with sorting or scored results to inform
 108         the catalog how many results you are really interested in. The catalog
 109         can then use optimizations to save time and memory. The number of
 110         results is not guaranteed to fall within the limit however, you should
 111         still slice or batch the results as usual."""
 112
 113         rs = None # resultset
 114
 115         # Indexes fulfill a fairly large contract here. We hand each
 116         # index the query mapping we are given (which may be composed
 117         # of some combination of web request, kw mappings or plain old dicts)
 118         # and the index decides what to do with it. If the index finds work
 119         # for itself in the query, it returns the results and a tuple of
 120         # the attributes that were used. If the index finds nothing for it
 121         # to do then it returns None.
 122
 123         # Canonicalize the request into a sensible query before passing it on
 124         query = self.make_query(query)
 125
 126         cr = self.getCatalogPlan(query)
 127         cr.start()
 128
 129         plan = cr.plan()
 130         if not plan:
 131             plan = self._sorted_search_indexes(query)
 132
 133         indexes = self.indexes.keys()
 134         for i in plan:
 135             if i not in indexes:
 136                 # We can have bogus keys or the plan can contain index names
 137                 # that have been removed in the meantime
 138                 continue
 139
 140             index = self.getIndex(i)
 141             _apply_index = getattr(index, "_apply_index", None)
 142             if _apply_index is None:
 143                 continue
 144
 145             cr.start_split(i)
 146             limit_result = ILimitedResultIndex.providedBy(index)
 147             if limit_result:
 148                 r = _apply_index(query, rs)
 149             else:
 150                 r = _apply_index(query)
 151
 152             if r is not None:
 153                 r, u = r
 154                 # Short circuit if empty result
 155                 # BBB: We can remove the "r is not None" check in Zope 2.14
 156                 # once we don't need to support the "return everything" case
 157                 # anymore
 158                 if r is not None and not r:
 159                     cr.stop_split(i, result=None, limit=limit_result)
 160                     return LazyCat([])
 161
 162                 # provide detailed info about the pure intersection time
 163                 intersect_id = i + '#intersection'
 164                 cr.start_split(intersect_id)
 165                 # weightedIntersection preserves the values from any mappings
 166                 # we get, as some indexes don't return simple sets
 167                 if hasattr(rs, 'items') or hasattr(r, 'items'):
 168                     _, rs = weightedIntersection(rs, r)
 169                 else:
 170                     rs = intersection(rs, r)
 171
 172                 cr.stop_split(intersect_id)
 173
 174                 # consider the time it takes to intersect the index result with
 175                 # the total resultset to be part of the index time
 176                 cr.stop_split(i, result=r, limit=limit_result)
 177                 if not rs:
 178                     break
 179             else:
 180                 cr.stop_split(i, result=None, limit=limit_result)
 181
 182         # Try to deduce the sort limit from batching arguments
 183         b_start = int(query.get('b_start', 0))
 184         b_size = query.get('b_size', None)
 185         if b_size is not None:
 186             b_size = int(b_size)
 187
 188         if b_size is not None:
 189             limit = b_start + b_size
 190         elif limit and b_size is None:
 191             b_size = limit
 192
 193         if rs is None:
 194             # None of the indexes found anything to do with the query
 195             # We take this to mean that the query was empty (an empty filter)
 196             # and so we return everything in the catalog
 197             warnings.warn('Your query %s produced no query restriction. '
 198                           'Currently the entire catalog content is returned. '
 199                           'In Zope 2.14 this will result in an empty LazyCat '
 200                           'to be returned.' % repr(cr.make_key(query)),
 201                           DeprecationWarning, stacklevel=3)
 202
 203             rlen = len(self)
 204             if sort_index is None:
 205                 sequence, slen = self._limit_sequence(self.data.items(), rlen,
 206                     b_start, b_size)
 207                 result = LazyMap(self.instantiate, sequence, slen,
 208                     actual_result_count=rlen)
 209             else:
 210                 cr.start_split('sort_on')
 211                 result = self.sortResults(
 212                     self.data, sort_index, reverse, limit, merge,
 213                         actual_result_count=rlen, b_start=b_start,
 214                         b_size=b_size)
 215                 cr.stop_split('sort_on', None)
 216         elif rs:
 217             # We got some results from the indexes.
 218             # Sort and convert to sequences.
 219             # XXX: The check for 'values' is really stupid since we call
 220             # items() and *not* values()
 221             rlen = len(rs)
 222             if sort_index is None and hasattr(rs, 'items'):
 223                 # having a 'items' means we have a data structure with
 224                 # scores.  Build a new result set, sort it by score, reverse
 225                 # it, compute the normalized score, and Lazify it.
 226
 227                 if not merge:
 228                     # Don't bother to sort here, return a list of
 229                     # three tuples to be passed later to mergeResults
 230                     # note that data_record_normalized_score_ cannot be
 231                     # calculated and will always be 1 in this case
 232                     getitem = self.__getitem__
 233                     result = [(score, (1, score, rid), getitem)
 234                             for rid, score in rs.items()]
 235                 else:
 236                     cr.start_split('sort_on')
 237
 238                     rs = rs.byValue(0) # sort it by score
 239                     max = float(rs[0][0])
 240
 241                     # Here we define our getter function inline so that
 242                     # we can conveniently store the max value as a default arg
 243                     # and make the normalized score computation lazy
 244                     def getScoredResult(item, max=max, self=self):
 245                         """
 246                         Returns instances of self._v_brains, or whatever is
 247                         passed into self.useBrains.
 248                         """
 249                         score, key = item
 250                         r=self._v_result_class(self.data[key])\
 251                               .__of__(aq_parent(self))
 252                         r.data_record_id_ = key
 253                         r.data_record_score_ = score
 254                         r.data_record_normalized_score_ = int(100. * score / max)
 255                         return r
 256
 257                     sequence, slen = self._limit_sequence(rs, rlen, b_start,
 258                         b_size)
 259                     result = LazyMap(getScoredResult, sequence, slen,
 260                         actual_result_count=rlen)
 261                     cr.stop_split('sort_on', None)
 262
 263             elif sort_index is None and not hasattr(rs, 'values'):
 264                 # no scores
 265                 if hasattr(rs, 'keys'):
 266                     rs = rs.keys()
 267                 sequence, slen = self._limit_sequence(rs, rlen, b_start,
 268                     b_size)
 269                 result = LazyMap(self.__getitem__, sequence, slen,
 270                     actual_result_count=rlen)
 271             else:
 272                 # sort.  If there are scores, then this block is not
 273                 # reached, therefore 'sort-on' does not happen in the
 274                 # context of a text index query.  This should probably
 275                 # sort by relevance first, then the 'sort-on' attribute.
 276                 cr.start_split('sort_on')
 277                 result = self.sortResults(rs, sort_index, reverse, limit,
 278                     merge, actual_result_count=rlen, b_start=b_start,
 279                     b_size=b_size)
 280                 cr.stop_split('sort_on', None)
 281         else:
 282             # Empty result set
 283             result = LazyCat([])
 284         cr.stop()
 285         return result