# -*- coding: utf-8 -*-
"""
These functions might be PR quality for numpy.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
from six import next
from six.moves import zip, range
[docs]def atleast_nd(arr, n, tofront=False):
r"""
View inputs as arrays with at least n dimensions.
TODO: Submit as a PR to numpy
Args:
arr (array_like): One array-like object. Non-array inputs are
converted to arrays. Arrays that already have n or more
dimensions are preserved.
n (int): number of dimensions to ensure
tofront (bool): if True new dimensions are added to the front of the
array. otherwise they are added to the back.
CommandLine:
python -m vtool.numpy_utils atleast_nd
Returns:
ndarray :
An array with ``a.ndim >= n``. Copies are avoided where possible,
and views with three or more dimensions are returned. For example,
a 1-D array of shape ``(N,)`` becomes a view of shape
``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a view
of shape ``(M, N, 1)``.
See Also:
ensure_shape, np.atleast_1d, np.atleast_2d, np.atleast_3d
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> import ubelt as ub
>>> n = 2
>>> arr = np.array([1, 1, 1])
>>> arr_ = atleast_nd(arr, n)
>>> result = ub.repr2(arr_.tolist())
>>> print(result)
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> import ubelt as ub
>>> n = 4
>>> arr1 = [1, 1, 1]
>>> arr2 = np.array(0)
>>> arr3 = np.array([[[[[1]]]]])
>>> arr1_ = atleast_nd(arr1, n)
>>> arr2_ = atleast_nd(arr2, n)
>>> arr3_ = atleast_nd(arr3, n)
>>> result1 = ub.repr2(arr1_.tolist())
>>> result2 = ub.repr2(arr2_.tolist())
>>> result3 = ub.repr2(arr3_.tolist())
>>> result = '\n'.join([result1, result2, result3])
>>> print(result)
"""
arr_ = np.asanyarray(arr)
ndims = len(arr_.shape)
if n is not None and ndims < n:
# append the required number of dimensions to the front or back
if tofront:
expander = (None,) * (n - ndims) + (Ellipsis,)
else:
expander = (Ellipsis,) + (None,) * (n - ndims)
arr_ = arr_[expander]
return arr_
[docs]def ensure_shape(arr, dimshape):
"""
TODO: Submit as a PR to numpy?
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> ensure_shape(np.array([[1, 2]]), (None, 2))
>>> ensure_shape(np.array([]), (None, 2))
"""
if isinstance(dimshape, tuple):
n = len(dimshape)
else:
n = dimshape
dimshape = None
arr_ = atleast_nd(arr, n)
if dimshape is not None:
newshape = tuple(
[d1 if d2 is None else d2 for d1, d2 in zip(arr_.shape, dimshape)]
)
arr_.shape = newshape
return arr_
[docs]def fromiter_nd(iter_, shape, dtype):
r"""
Like np.fromiter but handles iterators that generated
n-dimensional arrays. Slightly faster than np.array.
Note:
np.vstack(list\_) is still faster than
vt.fromiter_nd(ut.iflatten(list\_))
Args:
iter\_ (iter): an iterable that generates homogenous ndarrays
shape (tuple): the expected output shape
dtype (dtype): the numpy datatype of the generated ndarrays
Note:
The iterable must yeild a numpy array. It cannot yeild a Python list.
CommandLine:
python -m vtool.numpy_utils fromiter_nd
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> dtype = np.float
>>> total = 11
>>> rng = np.random.RandomState(0)
>>> iter_ = (rng.rand(5, 7, 3) for _ in range(total))
>>> shape = (total, 5, 7, 3)
>>> result = fromiter_nd(iter_, shape, dtype)
>>> assert result.shape == shape
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> import utool as ut
>>> dtype = np.int
>>> qfxs = np.array([1, 2, 3])
>>> dfxs = np.array([4, 5, 6])
>>> iter_ = (np.array(x) for x in ut.product(qfxs, dfxs))
>>> total = len(qfxs) * len(dfxs)
>>> shape = (total, 2)
>>> result = fromiter_nd(iter_, shape, dtype)
>>> assert result.shape == shape
Ignore:
>>> dtype = np.uint8
>>> feat_dim = 128
>>> mu = 1000
>>> sigma = 500
>>> n_data = 1000
>>> rng = np.random.RandomState(42)
>>> n_feat_list = np.clip(rng.randn(n_data) * sigma + mu, 0, np.inf).astype(np.int)
>>> # Make a large list of vectors of various sizes
>>> print('Making random vectors')
>>> vecs_list = [(rng.rand(num, feat_dim) * 255).astype(dtype) for num in n_feat_list]
>>> mega_bytes = sum([x.nbytes for x in vecs_list]) / 2 ** 20
>>> print('mega_bytes = %r' % (mega_bytes,))
>>> import itertools as it
>>> import vtool as vt
>>> n_total = n_feat_list.sum()
>>> target1 = np.vstack(vecs_list)
>>> iter_ = it.chain.from_iterable(vecs_list)
>>> shape = (n_total, feat_dim)
>>> target2 = vt.fromiter_nd(it.chain.from_iterable(vecs_list), shape, dtype=dtype)
>>> assert np.all(target1 == target2)
>>>
>>> %timeit np.vstack(vecs_list)
>>> 20.4ms
>>> %timeit vt.fromiter_nd(it.chain.from_iterable(vecs_list), shape, dtype)
>>> 102ms
>>>
>>> iter_ = it.chain.from_iterable(vecs_list)
>>> %time vt.fromiter_nd(iter_, shape, dtype)
>>> %time np.vstack(vecs_list)
"""
num_rows = shape[0]
chunksize = np.prod(shape[1:])
itemsize = np.dtype(dtype).itemsize
# Create dtype that makes an entire ndarray appear as a single item
chunk_dtype = np.dtype((np.void, itemsize * chunksize))
arr = np.fromiter(iter_, count=num_rows, dtype=chunk_dtype)
# Convert back to original dtype and shape
arr = arr.view(dtype)
arr.shape = shape
return arr
[docs]def index_to_boolmask(index_list, maxval=None, isflat=True):
r"""
transforms a list of indicies into a boolean mask
Args:
index_list (ndarray):
maxval (None): (default = None)
Kwargs:
maxval
Returns:
ndarray: mask
CommandLine:
python -m vtool.util_numpy index_to_boolmask
Example:
>>> # DISABLE_DOCTEST
>>> from vtool.util_numpy import * # NOQA
>>> import vtool as vt
>>> index_list = np.array([(0, 0), (1, 1), (2, 1)])
>>> maxval = (3, 3)
>>> mask = vt.index_to_boolmask(index_list, maxval, isflat=False)
>>> result = ('mask =\n%s' % (str(mask.astype(np.uint8)),))
>>> print(result)
[[1 0 0]
[0 1 0]
[0 1 0]]
Example:
>>> # DISABLE_DOCTEST
>>> from vtool.util_numpy import * # NOQA
>>> import vtool as vt
>>> index_list = np.array([0, 1, 4])
>>> maxval = 5
>>> mask = vt.index_to_boolmask(index_list, maxval, isflat=True)
>>> result = ('mask = %s' % (str(mask.astype(np.uint8)),))
>>> print(result)
mask = [1 1 0 0 1]
"""
# assert index_list.min() >= 0
if maxval is None:
maxval = index_list.max()
mask = np.zeros(maxval, dtype=np.bool_)
if not isflat:
# assumes non-flat
mask.__setitem__(tuple(index_list.T), True)
# mask.__getitem__(tuple(index_list.T))
else:
mask[index_list] = True
return mask
[docs]def multiaxis_reduce(ufunc, arr, startaxis=0):
"""
used to get max/min over all axes after <startaxis>
CommandLine:
python -m vtool.numpy_utils --test-multiaxis_reduce
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> rng = np.random.RandomState(0)
>>> arr = (rng.rand(4, 3, 2, 1) * 255).astype(np.uint8)
>>> ufunc = np.amax
>>> startaxis = 1
>>> out_ = multiaxis_reduce(ufunc, arr, startaxis)
>>> result = out_
>>> print(result)
[182 245 236 249]
"""
num_iters = len(arr.shape) - startaxis
out_ = ufunc(arr, axis=startaxis)
for _ in range(num_iters - 1):
out_ = ufunc(out_, axis=1)
return out_
[docs]def iter_reduce_ufunc(ufunc, arr_iter, out=None):
"""
constant memory iteration and reduction
applys ufunc from left to right over the input arrays
Example:
>>> # ENABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> arr_list = [
... np.array([0, 1, 2, 3, 8, 9]),
... np.array([4, 1, 2, 3, 4, 5]),
... np.array([0, 5, 2, 3, 4, 5]),
... np.array([1, 1, 6, 3, 4, 5]),
... np.array([0, 1, 2, 7, 4, 5])
... ]
>>> memory = np.array([9, 9, 9, 9, 9, 9])
>>> gen_memory = memory.copy()
>>> def arr_gen(arr_list, gen_memory):
... for arr in arr_list:
... gen_memory[:] = arr
... yield gen_memory
>>> print('memory = %r' % (memory,))
>>> print('gen_memory = %r' % (gen_memory,))
>>> ufunc = np.maximum
>>> res1 = iter_reduce_ufunc(ufunc, iter(arr_list), out=None)
>>> res2 = iter_reduce_ufunc(ufunc, iter(arr_list), out=memory)
>>> res3 = iter_reduce_ufunc(ufunc, arr_gen(arr_list, gen_memory), out=memory)
>>> print('res1 = %r' % (res1,))
>>> print('res2 = %r' % (res2,))
>>> print('res3 = %r' % (res3,))
>>> print('memory = %r' % (memory,))
>>> print('gen_memory = %r' % (gen_memory,))
>>> assert np.all(res1 == res2)
>>> assert np.all(res2 == res3)
"""
# Get first item in iterator
try:
initial = next(arr_iter)
except StopIteration:
return
# Populate the outvariable if specified otherwise make a copy of the first
# item to be the output memory
if out is not None:
out[:] = initial
else:
out = initial.copy()
# Iterate and reduce
for arr in arr_iter:
ufunc(out, arr, out=out)
return out
[docs]def unique_row_indexes(arr):
"""np.unique on rows
Args:
arr (ndarray): 2d array
Returns:
ndarray: unique_rowx
References:
http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array
CommandLine:
python -m vtool.numpy_utils --test-unique_row_indexes
Example:
>>> # DISABLE_DOCTEST
>>> from vtool.numpy_utils import * # NOQA
>>> import ubelt as ub
>>> arr = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [0, 0], [.534, .432], [.534, .432], [1, 0], [0, 1]])
>>> unique_rowx = unique_row_indexes(arr)
>>> result = ('unique_rowx = %s' % (ub.repr2(unique_rowx),))
>>> print(result)
unique_rowx = np.array([0, 1, 2, 3, 5], dtype=np.int64)
Ignore:
%timeit unique_row_indexes(arr)
%timeit compute_unique_data_ids(arr)
%timeit compute_unique_integer_data_ids(arr)
"""
void_dtype = np.dtype((np.void, arr.dtype.itemsize * arr.shape[1]))
arr_void_view = np.ascontiguousarray(arr).view(void_dtype)
_, unique_rowx = np.unique(arr_void_view, return_index=True)
# cast back to original dtype
unique_rowx.sort()
return unique_rowx
if __name__ == '__main__':
"""
CommandLine:
xdoctest -m vtool.numpy_utils
"""
import xdoctest
xdoctest.doctest_module(__file__)