__all__ = ['memmap']
import mmap
import warnings
from numeric import uint8,ndarray,dtype
dtypedescr = dtype
valid_filemodes = ["r", "c", "r+", "w+"]
writeable_filemodes = ["r+","w+"]
mode_equivalents = {
"readonly":"r",
"copyonwrite":"c",
"readwrite":"r+",
"write":"w+"
}
class memmap(ndarray):
"""Create a memory-map to an array stored in a file on disk.
Memory-mapped files are used for accessing small segments of large files
on disk, without reading the entire file into memory. Numpy's memmaps are
array-like objects. This differs from python's mmap module which are
file-like objects.
Parameters
----------
filename : string or file-like object
The file name or file object to be used as the array data
buffer.
dtype : data-type, optional
The data-type used to interpret the file contents.
Default is uint8
mode : {'r', 'r+', 'w+', 'c'}, optional
The mode to open the file.
'r', open existing file for read-only
'r+', open existing file for read-write
'w+', create or overwrite existing file and open for read-write
'c', copy-on-write, assignments effect data in memory, but changes
are not saved to disk. File on disk is read-only.
Default is 'r+'
offset : integer, optional
Byte offset into the file to start the array data. Should be a
multiple of the data-type of the data. Requires shape=None.
Default is 0
shape : tuple, optional
The desired shape of the array. If None, the returned array will be 1-D
with the number of elements determined by file size and data-type.
Default is None
order : {'C', 'F'}, optional
Specify the order of the N-D array, C or Fortran ordered. This only
has an effect if the shape is greater than 2-D.
Default is 'C'
Methods
-------
close : close the memmap file
flush : flush any changes in memory to file on disk
When you delete a memmap object, flush is called first to write
changes to disk before removing the object.
Returns
-------
memmap : array-like memmap object
The memmap object can be used anywhere an ndarray is accepted.
If fp is a memmap, isinstance(fp, numpy.ndarray) will return True.
Examples
--------
>>> import numpy as np
>>> data = np.arange(12, dtype='float32')
>>> data.resize((3,4))
>>> # Using a tempfile so doctest doesn't write files to your directory.
>>> # You would use a 'normal' filename.
>>> from tempfile import mkdtemp
>>> import os.path as path
>>> filename = path.join(mkdtemp(), 'newfile.dat')
>>> # Create a memmap with dtype and shape that matches our data
>>> fp = np.memmap(filename, dtype='float32', mode='w+', shape=(3,4))
>>> fp
memmap([[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.],
[ 0., 0., 0., 0.]], dtype=float32)
>>> # Write data to memmap array
>>> fp[:] = data[:]
>>> fp
memmap([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]], dtype=float32)
>>> # Deletion flushes memory changes to disk before removing the object.
>>> del fp
>>> # Load the memmap and verify data was stored
>>> newfp = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
>>> newfp
memmap([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]], dtype=float32)
>>> # read-only memmap
>>> fpr = np.memmap(filename, dtype='float32', mode='r', shape=(3,4))
>>> fpr.flags.writeable
False
>>> # Cannot assign to read-only, obviously
>>> fpr[0, 3] = 56
Traceback (most recent call last):
...
RuntimeError: array is not writeable
>>> # copy-on-write memmap
>>> fpc = np.memmap(filename, dtype='float32', mode='c', shape=(3,4))
>>> fpc.flags.writeable
True
>>> # Can assign to copy-on-write array, but values are only written
>>> # into the memory copy of the array, and not written to disk.
>>> fpc
memmap([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]], dtype=float32)
>>> fpc[0,:] = 0
>>> fpc
memmap([[ 0., 0., 0., 0.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]], dtype=float32)
>>> # file on disk is unchanged
>>> fpr
memmap([[ 0., 1., 2., 3.],
[ 4., 5., 6., 7.],
[ 8., 9., 10., 11.]], dtype=float32)
>>> # offset into a memmap
>>> fpo = np.memmap(filename, dtype='float32', mode='r', offset=16)
>>> fpo
memmap([ 4., 5., 6., 7., 8., 9., 10., 11.], dtype=float32)
"""
__array_priority__ = -100.0
def __new__(subtype, filename, dtype=uint8, mode='r+', offset=0,
shape=None, order='C'):
try:
mode = mode_equivalents[mode]
except KeyError:
if mode not in valid_filemodes:
raise ValueError("mode must be one of %s" % \
(valid_filemodes + mode_equivalents.keys()))
if hasattr(filename,'read'):
fid = filename
else:
fid = file(filename, (mode == 'c' and 'r' or mode)+'b')
if (mode == 'w+') and shape is None:
raise ValueError, "shape must be given"
fid.seek(0,2)
flen = fid.tell()
descr = dtypedescr(dtype)
_dbytes = descr.itemsize
if shape is None:
bytes = flen-offset
if (bytes % _dbytes):
fid.close()
raise ValueError, "Size of available data is not a "\
"multiple of data-type size."
size = bytes // _dbytes
shape = (size,)
else:
if not isinstance(shape, tuple):
shape = (shape,)
size = 1
for k in shape:
size *= k
bytes = long(offset + size*_dbytes)
if mode == 'w+' or (mode == 'r+' and flen < bytes):
fid.seek(bytes-1,0)
fid.write(chr(0))
fid.flush()
if mode == 'c':
acc = mmap.ACCESS_COPY
elif mode == 'r':
acc = mmap.ACCESS_READ
else:
acc = mmap.ACCESS_WRITE
mm = mmap.mmap(fid.fileno(), bytes, access=acc)
self = ndarray.__new__(subtype, shape, dtype=descr, buffer=mm,
offset=offset, order=order)
self._mmap = mm
self._offset = offset
self._mode = mode
self._size = size
self._name = filename
return self
def __array_finalize__(self, obj):
if hasattr(obj, '_mmap'):
self._mmap = obj._mmap
else:
self._mmap = None
def flush(self):
"""Flush any changes in the array to the file on disk."""
if self._mmap is not None:
self._mmap.flush()
def sync(self):
"""Flush any changes in the array to the file on disk."""
warnings.warn("Use ``flush``.", DeprecationWarning)
self.flush()
def _close(self):
"""Close the memmap file. Only do this when deleting the object."""
if self.base is self._mmap:
self._mmap.close()
self._mmap = None
# DEV NOTE: This error is raised on the deletion of each row
# in a view of this memmap. Python traps exceptions in
# __del__ and prints them to stderr. Suppressing this for now
# until memmap code is cleaned up and and better tested for
# numpy v1.1 Objects that do not have a python mmap instance
# as their base data array, should not do anything in the
# close anyway.
#elif self._mmap is not None:
#raise ValueError, "Cannot close a memmap that is being used " \
# "by another object."
def close(self):
"""Close the memmap file. Does nothing."""
warnings.warn("``close`` is deprecated on memmap arrays. Use del",
DeprecationWarning)
def __del__(self):
if self._mmap is not None:
try:
# First run tell() to see whether file is open
self._mmap.tell()
except ValueError:
pass
else:
# flush any changes to disk, even if it's a view
self.flush()
self._close()
|