Source code for vtool.numpy_utils

# -*- coding: utf-8 -*-
"""
These functions might be PR quality for numpy.
"""
from __future__ import absolute_import, division, print_function, unicode_literals
import numpy as np
from six import next
from six.moves import zip, range


[docs]def atleast_nd(arr, n, tofront=False):
    r"""
    View inputs as arrays with at least n dimensions.
    TODO: Submit as a PR to numpy

    Args:
        arr (array_like): One array-like object.  Non-array inputs are
                converted to arrays.  Arrays that already have n or more
                dimensions are preserved.
        n (int): number of dimensions to ensure
        tofront (bool): if True new dimensions are added to the front of the
            array.  otherwise they are added to the back.

    CommandLine:
        python -m vtool.numpy_utils atleast_nd

    Returns:
        ndarray :
            An array with ``a.ndim >= n``.  Copies are avoided where possible,
            and views with three or more dimensions are returned.  For example,
            a 1-D array of shape ``(N,)`` becomes a view of shape
            ``(1, N, 1)``, and a 2-D array of shape ``(M, N)`` becomes a view
            of shape ``(M, N, 1)``.

    See Also:
        ensure_shape, np.atleast_1d, np.atleast_2d, np.atleast_3d

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> import ubelt as ub
        >>> n = 2
        >>> arr = np.array([1, 1, 1])
        >>> arr_ = atleast_nd(arr, n)
        >>> result = ub.repr2(arr_.tolist())
        >>> print(result)

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> import ubelt as ub
        >>> n = 4
        >>> arr1 = [1, 1, 1]
        >>> arr2 = np.array(0)
        >>> arr3 = np.array([[[[[1]]]]])
        >>> arr1_ = atleast_nd(arr1, n)
        >>> arr2_ = atleast_nd(arr2, n)
        >>> arr3_ = atleast_nd(arr3, n)
        >>> result1 = ub.repr2(arr1_.tolist())
        >>> result2 = ub.repr2(arr2_.tolist())
        >>> result3 = ub.repr2(arr3_.tolist())
        >>> result = '\n'.join([result1, result2, result3])
        >>> print(result)
    """
    arr_ = np.asanyarray(arr)
    ndims = len(arr_.shape)
    if n is not None and ndims < n:
        # append the required number of dimensions to the front or back
        if tofront:
            expander = (None,) * (n - ndims) + (Ellipsis,)
        else:
            expander = (Ellipsis,) + (None,) * (n - ndims)
        arr_ = arr_[expander]
    return arr_


[docs]def ensure_shape(arr, dimshape):
    """
    TODO: Submit as a PR to numpy?

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> ensure_shape(np.array([[1, 2]]), (None, 2))
        >>> ensure_shape(np.array([]), (None, 2))
    """
    if isinstance(dimshape, tuple):
        n = len(dimshape)
    else:
        n = dimshape
        dimshape = None
    arr_ = atleast_nd(arr, n)
    if dimshape is not None:
        newshape = tuple(
            [d1 if d2 is None else d2 for d1, d2 in zip(arr_.shape, dimshape)]
        )
        arr_.shape = newshape
    return arr_


[docs]def fromiter_nd(iter_, shape, dtype):
    r"""
    Like np.fromiter but handles iterators that generated
    n-dimensional arrays. Slightly faster than np.array.

    Note:
        np.vstack(list\_) is still faster than
        vt.fromiter_nd(ut.iflatten(list\_))

    Args:
        iter\_ (iter): an iterable that generates homogenous ndarrays
        shape (tuple): the expected output shape
        dtype (dtype): the numpy datatype of the generated ndarrays

    Note:
        The iterable must yeild a numpy array. It cannot yeild a Python list.

    CommandLine:
        python -m vtool.numpy_utils fromiter_nd

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> dtype = np.float
        >>> total = 11
        >>> rng = np.random.RandomState(0)
        >>> iter_ = (rng.rand(5, 7, 3) for _ in range(total))
        >>> shape = (total, 5, 7, 3)
        >>> result = fromiter_nd(iter_, shape, dtype)
        >>> assert result.shape == shape

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> import utool as ut
        >>> dtype = np.int
        >>> qfxs = np.array([1, 2, 3])
        >>> dfxs = np.array([4, 5, 6])
        >>> iter_ = (np.array(x) for x in ut.product(qfxs, dfxs))
        >>> total = len(qfxs) * len(dfxs)
        >>> shape = (total, 2)
        >>> result = fromiter_nd(iter_, shape, dtype)
        >>> assert result.shape == shape

    Ignore:
        >>> dtype = np.uint8
        >>> feat_dim = 128
        >>> mu = 1000
        >>> sigma = 500
        >>> n_data = 1000
        >>> rng = np.random.RandomState(42)
        >>> n_feat_list = np.clip(rng.randn(n_data) * sigma + mu, 0, np.inf).astype(np.int)
        >>> # Make a large list of vectors of various sizes
        >>> print('Making random vectors')
        >>> vecs_list = [(rng.rand(num, feat_dim) * 255).astype(dtype) for num in n_feat_list]
        >>> mega_bytes = sum([x.nbytes for x in vecs_list]) / 2 ** 20
        >>> print('mega_bytes = %r' % (mega_bytes,))
        >>> import itertools as it
        >>> import vtool as vt
        >>> n_total = n_feat_list.sum()
        >>> target1 = np.vstack(vecs_list)
        >>> iter_ = it.chain.from_iterable(vecs_list)
        >>> shape = (n_total, feat_dim)
        >>> target2 = vt.fromiter_nd(it.chain.from_iterable(vecs_list), shape, dtype=dtype)
        >>> assert np.all(target1 == target2)
        >>>
        >>> %timeit np.vstack(vecs_list)
        >>> 20.4ms
        >>> %timeit vt.fromiter_nd(it.chain.from_iterable(vecs_list), shape, dtype)
        >>> 102ms
        >>>
        >>> iter_ = it.chain.from_iterable(vecs_list)
        >>> %time vt.fromiter_nd(iter_, shape, dtype)
        >>> %time np.vstack(vecs_list)
    """
    num_rows = shape[0]
    chunksize = np.prod(shape[1:])
    itemsize = np.dtype(dtype).itemsize
    # Create dtype that makes an entire ndarray appear as a single item
    chunk_dtype = np.dtype((np.void, itemsize * chunksize))
    arr = np.fromiter(iter_, count=num_rows, dtype=chunk_dtype)
    # Convert back to original dtype and shape
    arr = arr.view(dtype)
    arr.shape = shape
    return arr


[docs]def index_to_boolmask(index_list, maxval=None, isflat=True):
    r"""
    transforms a list of indicies into a boolean mask

    Args:
        index_list (ndarray):
        maxval (None): (default = None)

    Kwargs:
        maxval

    Returns:
        ndarray: mask

    CommandLine:
        python -m vtool.util_numpy index_to_boolmask

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.util_numpy import *  # NOQA
        >>> import vtool as vt
        >>> index_list = np.array([(0, 0), (1, 1), (2, 1)])
        >>> maxval = (3, 3)
        >>> mask = vt.index_to_boolmask(index_list, maxval, isflat=False)
        >>> result = ('mask =\n%s' % (str(mask.astype(np.uint8)),))
        >>> print(result)
        [[1 0 0]
         [0 1 0]
         [0 1 0]]

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.util_numpy import *  # NOQA
        >>> import vtool as vt
        >>> index_list = np.array([0, 1, 4])
        >>> maxval = 5
        >>> mask = vt.index_to_boolmask(index_list, maxval, isflat=True)
        >>> result = ('mask = %s' % (str(mask.astype(np.uint8)),))
        >>> print(result)
        mask = [1 1 0 0 1]

    """
    # assert index_list.min() >= 0
    if maxval is None:
        maxval = index_list.max()
    mask = np.zeros(maxval, dtype=np.bool_)
    if not isflat:
        # assumes non-flat
        mask.__setitem__(tuple(index_list.T), True)
        # mask.__getitem__(tuple(index_list.T))
    else:
        mask[index_list] = True
    return mask


[docs]def multiaxis_reduce(ufunc, arr, startaxis=0):
    """
    used to get max/min over all axes after <startaxis>

    CommandLine:
        python -m vtool.numpy_utils --test-multiaxis_reduce

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> rng = np.random.RandomState(0)
        >>> arr = (rng.rand(4, 3, 2, 1) * 255).astype(np.uint8)
        >>> ufunc = np.amax
        >>> startaxis = 1
        >>> out_ = multiaxis_reduce(ufunc, arr, startaxis)
        >>> result = out_
        >>> print(result)
        [182 245 236 249]
    """
    num_iters = len(arr.shape) - startaxis
    out_ = ufunc(arr, axis=startaxis)
    for _ in range(num_iters - 1):
        out_ = ufunc(out_, axis=1)
    return out_


[docs]def iter_reduce_ufunc(ufunc, arr_iter, out=None):
    """
    constant memory iteration and reduction

    applys ufunc from left to right over the input arrays

    Example:
        >>> # ENABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> arr_list = [
        ...     np.array([0, 1, 2, 3, 8, 9]),
        ...     np.array([4, 1, 2, 3, 4, 5]),
        ...     np.array([0, 5, 2, 3, 4, 5]),
        ...     np.array([1, 1, 6, 3, 4, 5]),
        ...     np.array([0, 1, 2, 7, 4, 5])
        ... ]
        >>> memory = np.array([9, 9, 9, 9, 9, 9])
        >>> gen_memory = memory.copy()
        >>> def arr_gen(arr_list, gen_memory):
        ...     for arr in arr_list:
        ...         gen_memory[:] = arr
        ...         yield gen_memory
        >>> print('memory = %r' % (memory,))
        >>> print('gen_memory = %r' % (gen_memory,))
        >>> ufunc = np.maximum
        >>> res1 = iter_reduce_ufunc(ufunc, iter(arr_list), out=None)
        >>> res2 = iter_reduce_ufunc(ufunc, iter(arr_list), out=memory)
        >>> res3 = iter_reduce_ufunc(ufunc, arr_gen(arr_list, gen_memory), out=memory)
        >>> print('res1       = %r' % (res1,))
        >>> print('res2       = %r' % (res2,))
        >>> print('res3       = %r' % (res3,))
        >>> print('memory     = %r' % (memory,))
        >>> print('gen_memory = %r' % (gen_memory,))
        >>> assert np.all(res1 == res2)
        >>> assert np.all(res2 == res3)
    """
    # Get first item in iterator
    try:
        initial = next(arr_iter)
    except StopIteration:
        return
    # Populate the outvariable if specified otherwise make a copy of the first
    # item to be the output memory
    if out is not None:
        out[:] = initial
    else:
        out = initial.copy()
    # Iterate and reduce
    for arr in arr_iter:
        ufunc(out, arr, out=out)
    return out


[docs]def unique_row_indexes(arr):
    """np.unique on rows

    Args:
        arr (ndarray): 2d array

    Returns:
        ndarray: unique_rowx

    References:
        http://stackoverflow.com/questions/16970982/find-unique-rows-in-numpy-array

    CommandLine:
        python -m vtool.numpy_utils --test-unique_row_indexes

    Example:
        >>> # DISABLE_DOCTEST
        >>> from vtool.numpy_utils import *  # NOQA
        >>> import ubelt as ub
        >>> arr = np.array([[0, 0], [0, 1], [1, 0], [1, 1], [0, 0], [.534, .432], [.534, .432], [1, 0], [0, 1]])
        >>> unique_rowx = unique_row_indexes(arr)
        >>> result = ('unique_rowx = %s' % (ub.repr2(unique_rowx),))
        >>> print(result)
        unique_rowx = np.array([0, 1, 2, 3, 5], dtype=np.int64)

    Ignore:
        %timeit unique_row_indexes(arr)
        %timeit compute_unique_data_ids(arr)
        %timeit compute_unique_integer_data_ids(arr)

    """
    void_dtype = np.dtype((np.void, arr.dtype.itemsize * arr.shape[1]))
    arr_void_view = np.ascontiguousarray(arr).view(void_dtype)
    _, unique_rowx = np.unique(arr_void_view, return_index=True)
    # cast back to original dtype
    unique_rowx.sort()
    return unique_rowx


if __name__ == '__main__':
    """
    CommandLine:
        xdoctest -m vtool.numpy_utils
    """
    import xdoctest

    xdoctest.doctest_module(__file__)