Reprise du code pour support des blobs.
authorBenoît Pin <benoit.pin@gmail.com>
Mon, 25 Oct 2010 13:00:45 +0000 (15:00 +0200)
committerBenoît Pin <benoit.pin@gmail.com>
Mon, 25 Oct 2010 13:00:45 +0000 (15:00 +0200)
blobbases.py

index 0da0f8b..c0084c3 100755 (executable)
@@ -52,6 +52,11 @@ from zope.event import notify
 from zope.lifecycleevent import ObjectModifiedEvent
 from zope.lifecycleevent import ObjectCreatedEvent
 
+from ZODB.blob import Blob
+
+CHUNK_SIZE = 1 << 16
+
+
 manage_addFileForm = DTMLFile('dtml/imageAdd',
                               globals(),
                               Kind='File',
@@ -71,19 +76,9 @@ def manage_addFile(self, id, file='', title='', precondition='',
     id, title = cookId(id, title, file)
     
     self=self.this()
+    self._setObject(id, File(id,title,file,content_type, precondition))
 
-    # First, we create the file without data:
-    self._setObject(id, File(id,title,'',content_type, precondition))
-    
     newFile = self._getOb(id)
-    
-    # Now we "upload" the data.  By doing this in two steps, we
-    # can use a database trick to make the upload more efficient.
-    if file:
-        newFile.manage_upload(file)
-    if content_type:
-        newFile.content_type=content_type
-    
     notify(ObjectCreatedEvent(newFile))
     
     if REQUEST is not None:
@@ -103,7 +98,7 @@ class File(Persistent, Implicit, PropertyManager,
                IWriteLock,
                HTTPRangeSupport.HTTPRangeInterface,
               )
-    meta_type='File'
+    meta_type='Blob File'
 
     security = ClassSecurityInfo()
     security.declareObjectProtected(View)
@@ -141,10 +136,62 @@ class File(Persistent, Implicit, PropertyManager,
         self.__name__=id
         self.title=title
         self.precondition=precondition
-
-        data, size = self._read_data(file)
-        content_type=self._get_content_type(file, data, id, content_type)
-        self.update_data(data, content_type, size)
+        self.uploaded_filename = cookId('', '', file)[0]
+        self.bdata = Blob()
+
+        content_type=self._get_content_type(file, id, content_type)
+        self.update_data(file, content_type)
+
+    security.declarePrivate('save')
+    def save(self, file):
+        bf = self.bdata.open('w')
+        bf.write(file.read())
+        self.size = bf.tell()
+        bf.close()
+    
+    security.declarePrivate('open')
+    def open(self, mode='r'):
+        bf = self.bdata.open(mode)
+        return bf
+    
+    security.declarePrivate('updateSize')
+    def updateSize(self, size=None):
+        if size is None :
+            bf = self.open('r')
+            bf.seek(0,2)
+            self.size = bf.tell()
+            bf.close()
+        else :
+            self.size = size
+
+    def _getLegacyData(self) :
+        warn("Accessing 'data' attribute may be inefficient with "
+             "this blob based file. You should refactor your product "
+             "by accessing data like: "
+             "f = self.open('r') "
+             "data = f.read()",
+            DeprecationWarning, stacklevel=2)
+        f = self.open()
+        data = f.read()
+        f.close()
+        return data
+    
+    def _setLegacyData(self, data) :
+        warn("Accessing 'data' attribute may be inefficient with "
+             "this blob based file. You should refactor your product "
+             "by accessing data like: "
+             "f = self.save(data)",
+            DeprecationWarning, stacklevel=2)
+        if isinstance(data, str) :
+            sio = StringIO()
+            sio.write(data)
+            sio.seek(0)
+            data = sio
+        self.save(data)
+        
+    data = property(_getLegacyData, _setLegacyData,
+                    "Data Legacy attribute to ensure compatibility "
+                    "with derived classes that access data by this way.")
 
     def id(self):
         return self.__name__
@@ -251,36 +298,11 @@ class File(Persistent, Implicit, PropertyManager,
                     RESPONSE.setHeader('Content-Range',
                         'bytes %d-%d/%d' % (start, end - 1, self.size))
                     RESPONSE.setStatus(206) # Partial content
-
-                    data = self.data
-                    if isinstance(data, str):
-                        RESPONSE.write(data[start:end])
-                        return True
-
-                    # Linked Pdata objects. Urgh.
-                    pos = 0
-                    while data is not None:
-                        l = len(data.data)
-                        pos = pos + l
-                        if pos > start:
-                            # We are within the range
-                            lstart = l - (pos - start)
-
-                            if lstart < 0: lstart = 0
-
-                            # find the endpoint
-                            if end <= pos:
-                                lend = l - (pos - end)
-
-                                # Send and end transmission
-                                RESPONSE.write(data[lstart:lend])
-                                break
-
-                            # Not yet at the end, transmit what we have.
-                            RESPONSE.write(data[lstart:])
-
-                        data = data.next
-
+                    
+                    bf = self.open('r')
+                    bf.seek(start)
+                    RESPONSE.write(bf.read(size))
+                    bf.close()
                     return True
 
                 else:
@@ -310,11 +332,13 @@ class File(Persistent, Implicit, PropertyManager,
                             draftprefix, boundary))
                     RESPONSE.setStatus(206) # Partial content
 
-                    data = self.data
-                    # The Pdata map allows us to jump into the Pdata chain
-                    # arbitrarily during out-of-order range searching.
-                    pdata_map = {}
-                    pdata_map[0] = data
+
+                    bf = self.open('r')
+#                    data = self.data
+#                    # The Pdata map allows us to jump into the Pdata chain
+#                    # arbitrarily during out-of-order range searching.
+#                    pdata_map = {}
+#                    pdata_map[0] = data
 
                     for start, end in ranges:
                         RESPONSE.write('\r\n--%s\r\n' % boundary)
@@ -324,51 +348,12 @@ class File(Persistent, Implicit, PropertyManager,
                             'Content-Range: bytes %d-%d/%d\r\n\r\n' % (
                                 start, end - 1, self.size))
 
-                        if isinstance(data, str):
-                            RESPONSE.write(data[start:end])
-
-                        else:
-                            # Yippee. Linked Pdata objects. The following
-                            # calculations allow us to fast-forward through the
-                            # Pdata chain without a lot of dereferencing if we
-                            # did the work already.
-                            first_size = len(pdata_map[0].data)
-                            if start < first_size:
-                                closest_pos = 0
-                            else:
-                                closest_pos = (
-                                    ((start - first_size) >> 16 << 16) +
-                                    first_size)
-                            pos = min(closest_pos, max(pdata_map.keys()))
-                            data = pdata_map[pos]
-
-                            while data is not None:
-                                l = len(data.data)
-                                pos = pos + l
-                                if pos > start:
-                                    # We are within the range
-                                    lstart = l - (pos - start)
-
-                                    if lstart < 0: lstart = 0
-
-                                    # find the endpoint
-                                    if end <= pos:
-                                        lend = l - (pos - end)
-
-                                        # Send and loop to next range
-                                        RESPONSE.write(data[lstart:lend])
-                                        break
-
-                                    # Not yet at the end, transmit what we have.
-                                    RESPONSE.write(data[lstart:])
-
-                                data = data.next
-                                # Store a reference to a Pdata chain link so we
-                                # don't have to deref during this request again.
-                                pdata_map[pos] = data
-
-                    # Do not keep the link references around.
-                    del pdata_map
+                        
+                        size = end - start
+                        bf.seek(start)
+                        RESPONSE.write(bf.read(size))
+                    
+                    bf.close()
 
                     RESPONSE.write('\r\n--%s--\r\n' % boundary)
                     return True
@@ -422,16 +407,13 @@ class File(Persistent, Implicit, PropertyManager,
 
         self.ZCacheable_set(None)
 
-        data=self.data
-        if isinstance(data, str):
-            RESPONSE.setBase(None)
-            return data
-
-        while data is not None:
-            RESPONSE.write(data.data)
-            data=data.next
-
-        return ''
+        bf = self.open('r')
+        chunk = bf.read(CHUNK_SIZE)
+        while chunk :
+            RESPONSE.write(chunk)
+            chunk = bf.read(CHUNK_SIZE)
+        bf.close()
+        return ''       
 
     security.declareProtected(View, 'view_image_or_file')
     def view_image_or_file(self, URL1):
@@ -445,19 +427,25 @@ class File(Persistent, Implicit, PropertyManager,
         """ Allow file objects to be searched.
         """
         if self.content_type.startswith('text/'):
-            return str(self.data)
+            bf = self.open('r')
+            data = bf.read()
+            bf.close()
+            return data
         return ''
 
     security.declarePrivate('update_data')
-    def update_data(self, data, content_type=None, size=None):
-        if isinstance(data, unicode):
+    def update_data(self, file, content_type=None):
+        if isinstance(file, unicode):
             raise TypeError('Data can only be str or file-like.  '
                             'Unicode objects are expressly forbidden.')
+        elif isinstance(file, str) :
+            sio = StringIO()
+            sio.write(file)
+            sio.seek(0)
+            file = sio
 
         if content_type is not None: self.content_type=content_type
-        if size is None: size=len(data)
-        self.size=size
-        self.data=data
+        self.save(file)
         self.ZCacheable_invalidate()
         self.ZCacheable_set(None)
         self.http__refreshEtag()
@@ -476,7 +464,7 @@ class File(Persistent, Implicit, PropertyManager,
         if precondition: self.precondition=str(precondition)
         elif self.precondition: del self.precondition
         if filedata is not None:
-            self.update_data(filedata, content_type, len(filedata))
+            self.update_data(filedata, content_type)
         else:
             self.ZCacheable_invalidate()
         
@@ -496,95 +484,23 @@ class File(Persistent, Implicit, PropertyManager,
         if self.wl_isLocked():
             raise ResourceLockedError, "File is locked via WebDAV"
 
-        data, size = self._read_data(file)
-        content_type=self._get_content_type(file, data, self.__name__,
+        content_type=self._get_content_type(file, self.__name__,
                                             'application/octet-stream')
-        self.update_data(data, content_type, size)
-        
-        notify(ObjectModifiedEvent(self))
-        
+        self.update_data(file, content_type)
+
         if REQUEST:
             message="Saved changes."
             return self.manage_main(self,REQUEST,manage_tabs_message=message)
 
-    def _get_content_type(self, file, body, id, content_type=None):
+    def _get_content_type(self, file, id, content_type=None):
         headers=getattr(file, 'headers', None)
         if headers and headers.has_key('content-type'):
             content_type=headers['content-type']
         else:
-            if not isinstance(body, str): body=body.data
-            content_type, enc=guess_content_type(
-                getattr(file, 'filename',id), body, content_type)
+            name = getattr(file, 'filename', self.uploaded_filename) or id
+            content_type, enc=guess_content_type(name, '', content_type)
         return content_type
 
-    def _read_data(self, file):
-        import transaction
-
-        n=1 << 16
-
-        if isinstance(file, str):
-            size=len(file)
-            if size < n: return file, size
-            # Big string: cut it into smaller chunks
-            file = StringIO(file)
-
-        if isinstance(file, FileUpload) and not file:
-            raise ValueError, 'File not specified'
-
-        if hasattr(file, '__class__') and file.__class__ is Pdata:
-            size=len(file)
-            return file, size
-
-        seek=file.seek
-        read=file.read
-
-        seek(0,2)
-        size=end=file.tell()
-
-        if size <= 2*n:
-            seek(0)
-            if size < n: return read(size), size
-            return Pdata(read(size)), size
-
-        # Make sure we have an _p_jar, even if we are a new object, by
-        # doing a sub-transaction commit.
-        transaction.savepoint(optimistic=True)
-
-        if self._p_jar is None:
-            # Ugh
-            seek(0)
-            return Pdata(read(size)), size
-
-        # Now we're going to build a linked list from back
-        # to front to minimize the number of database updates
-        # and to allow us to get things out of memory as soon as
-        # possible.
-        next = None
-        while end > 0:
-            pos = end-n
-            if pos < n:
-                pos = 0 # we always want at least n bytes
-            seek(pos)
-
-            # Create the object and assign it a next pointer
-            # in the same transaction, so that there is only
-            # a single database update for it.
-            data = Pdata(read(end-pos))
-            self._p_jar.add(data)
-            data.next = next
-
-            # Save the object so that we can release its memory.
-            transaction.savepoint(optimistic=True)
-            data._p_deactivate()
-            # The object should be assigned an oid and be a ghost.
-            assert data._p_oid is not None
-            assert data._p_state == -1
-
-            next = data
-            end = pos
-
-        return next, size
-
     security.declareProtected(delete_objects, 'DELETE')
 
     security.declareProtected(change_images_and_files, 'PUT')
@@ -596,10 +512,9 @@ class File(Persistent, Implicit, PropertyManager,
 
         file=REQUEST['BODYFILE']
 
-        data, size = self._read_data(file)
-        content_type=self._get_content_type(file, data, self.__name__,
-                                            type or self.content_type)
-        self.update_data(data, content_type, size)
+        content_type = self._get_content_type(file, self.__name__,
+                                              type or self.content_type)
+        self.update_data(file, content_type)
 
         RESPONSE.setStatus(204)
         return RESPONSE
@@ -611,7 +526,11 @@ class File(Persistent, Implicit, PropertyManager,
         Returns the size of the file or image.
         """
         size=self.size
-        if size is None: size=len(self.data)
+        if size is None :
+            bf = self.open('r')
+            bf.seek(0,2)
+            self.size = size = bf.tell()
+            bf.close()
         return size
 
     # deprecated; use get_size!
@@ -648,16 +567,11 @@ class File(Persistent, Implicit, PropertyManager,
                 RESPONSE.setHeader('Content-Length', self.size)
                 return result
 
-        data = self.data
-        if isinstance(data, str):
-            RESPONSE.setBase(None)
-            return data
-
-        while data is not None:
-            RESPONSE.write(data.data)
-            data = data.next
-
-        return ''
+        bf = self.open('r')
+        data = bf.read()
+        bf.close()
+        RESPONSE.setBase(None)
+        return data
 
 manage_addImageForm=DTMLFile('dtml/imageAdd',globals(),
                              Kind='Image',kind='image')
@@ -677,19 +591,9 @@ def manage_addImage(self, id, file, title='', precondition='', content_type='',
     id, title = cookId(id, title, file)
 
     self=self.this()
+    self._setObject(id, Image(id,title,file,content_type, precondition))
 
-    # First, we create the image without data:
-    self._setObject(id, Image(id,title,'',content_type, precondition))
-    
     newFile = self._getOb(id)
-    
-    # Now we "upload" the data.  By doing this in two steps, we
-    # can use a database trick to make the upload more efficient.
-    if file:
-        newFile.manage_upload(file)
-    if content_type:
-        newFile.content_type=content_type
-    
     notify(ObjectCreatedEvent(newFile))
     
     if REQUEST is not None:
@@ -699,14 +603,14 @@ def manage_addImage(self, id, file, title='', precondition='', content_type='',
     return id
 
 
-def getImageInfo(data):
-    data = str(data)
-    size = len(data)
+def getImageInfo(file):
     height = -1
     width = -1
     content_type = ''
 
     # handle GIFs
+    data = file.read(24)
+    size = len(data)
     if (size >= 10) and data[:6] in ('GIF87a', 'GIF89a'):
         # Check to see if content_type is correct
         content_type = 'image/gif'
@@ -735,7 +639,8 @@ def getImageInfo(data):
     # handle JPEGs
     elif (size >= 2) and (data[:2] == '\377\330'):
         content_type = 'image/jpeg'
-        jpeg = StringIO(data)
+        jpeg = file
+        jpeg.seek(0)
         jpeg.read(2)
         b = jpeg.read(1)
         try:
@@ -761,7 +666,7 @@ class Image(File):
     as File objects.  Images also have a string representation that
     renders an HTML 'IMG' tag.
     """
-    meta_type='Image'
+    meta_type='Blob Image'
 
     security = ClassSecurityInfo()
     security.declareObjectProtected(View)
@@ -813,29 +718,29 @@ class Image(File):
     manage_uploadForm=manage_editForm
 
     security.declarePrivate('update_data')
-    def update_data(self, data, content_type=None, size=None):
-        if isinstance(data, unicode):
-            raise TypeError('Data can only be str or file-like.  '
-                            'Unicode objects are expressly forbidden.')
+    def update_data(self, file, content_type=None):
+        super(Image, self).update_data(file, content_type)
+        self.updateFormat(size=self.size, content_type=content_type)
         
-        if size is None: size=len(data)
-
-        self.size=size
-        self.data=data
-
-        ct, width, height = getImageInfo(data)
-        if ct:
-            content_type = ct
-        if width >= 0 and height >= 0:
-            self.width = width
-            self.height = height
-
-        # Now we should have the correct content type, or still None
-        if content_type is not None: self.content_type = content_type
-
-        self.ZCacheable_invalidate()
-        self.ZCacheable_set(None)
-        self.http__refreshEtag()
+    security.declarePrivate('updateFormat')
+    def updateFormat(self, size=None, dimensions=None, content_type=None):
+        self.updateSize(size=size)
+
+        if dimensions is None or content_type is None :
+            bf = self.open('r')
+            ct, width, height = getImageInfo(bf)
+            bf.close()
+            if ct:
+                content_type = ct
+            if width >= 0 and height >= 0:
+                self.width = width
+                self.height = height
+
+            # Now we should have the correct content type, or still None
+            if content_type is not None: self.content_type = content_type
+        else :
+            self.width, self.height = dimensions
+            self.content_type = content_type
 
     def __str__(self):
         return self.tag()
@@ -910,30 +815,3 @@ def cookId(id, title, file):
                         filename.rfind(':'),
                         )+1:]
     return id, title
-
-class Pdata(Persistent, Implicit):
-    # Wrapper for possibly large data
-
-    next=None
-
-    def __init__(self, data):
-        self.data=data
-
-    def __getslice__(self, i, j):
-        return self.data[i:j]
-
-    def __len__(self):
-        data = str(self)
-        return len(data)
-
-    def __str__(self):
-        next=self.next
-        if next is None: return self.data
-
-        r=[self.data]
-        while next is not None:
-            self=next
-            r.append(self.data)
-            next=self.next
-
-        return ''.join(r)