Numpy: reduceatコーナーケース（Trac＃236）

docstring

In [1]:import numpy

In [2]:numpy.version.version
Out[2]:'1.0b1'

In [3]:a = numpy.arange (5)

In [4]:numpy.add.reduceat (a, (1,1))
Out[4]:array([ 1, 10])

n = 10000
arr = np.random.random(n)
inds = np.random.randint(0, n, n//10)
inds.sort()

%timeit out = np.add.reduceat(arr, inds)
10000 loops, best of 3: 42.1 µs per loop

%timeit out = piecewise_reduce(np.add, arr, inds)
100 loops, best of 3: 6.03 ms per loop

def reducebins(func, arr, start=None, stop=None, axis=-1, out=None):
    """
    Compute (in the 1d case) `out[i] = func.reduce(arr[start[i]:stop[i]])`

    If only `start` is specified, this computes the same reduce at `reduceat` did:

        `out[i]  = func.reduce(arr[start[i]:start[i+1]])`
        `out[-1] = func.reduce(arr[start[-1]:])`

    If only `stop` is specified, this computes:

        `out[0] = func.reduce(arr[:stop[0]])`
        `out[i] = func.reduce(arr[stop[i-1]:stop[i]])`

    """
    # convert to 1d arrays
    if start is not None:
        start = np.array(start, copy=False, ndmin=1, dtype=np.intp)
        assert start.ndim == 1
    if stop is not None:
        stop = np.array(stop, copy=False, ndmin=1, dtype=np.intp)
        assert stop.ndim == 1

    # default arguments that do useful things
    if start is None and stop is None:
        raise ValueError('At least one of start and stop must be specified')
    elif stop is None:
        # start only means reduce from one index to the next, and the last to the end
        stop = np.empty_like(start)
        stop[:-1] = start[1:]
        stop[-1] = arr.shape[axis]
    elif start is None:
        # stop only means reduce from the start to the first index, and one index to the next
        start = np.empty_like(stop)
        start[1:] = stop[:-1]
        start[0] = 0
    else:
        # TODO: possibly confusing?
        start, stop = np.broadcast_arrays(start, stop)

    # allocate output - not clear how to do this safely for subclasses
    if not out:
        sh = list(arr.shape)
        sh[axis] = len(stop)
        sh = tuple(sh)
        out = np.empty(shape=sh)

    # below assumes axis=0 for brevity here
    for i, (si, ei) in enumerate(zip(start, stop)):
        func.reduce(arr[si:ei,...], out=out[i, ...], axis=axis)
    return out

if not out:
    sh = list(arr.shape)
    sh[axis] = len(end_indices)
    out = np.empty(shape=sh)

# below assumes axis=0 for brevity here
for i, (si, ei) in enumerate(zip(start_indices, end_indices)):
    func.reduce(arr[si:ei,...], out=alloc[i, ...], axis=axis)
return out

n = 10000
arr = np.random.random(n)
inds = np.random.randint(0, n, n//10)
inds.sort()

%timeit out = np.add.reduceat(arr, inds)
10000 loops, best of 3: 42.1 µs per loop

%timeit out = piecewise_reduce(np.add, arr, inds)
100 loops, best of 3: 6.03 ms per loop

    for i, (si, ei) in enumerate(zip(start, stop)):
        if si >= ei:
            func.reduce(arr[si:ei,...], out=out[i, ...], axis=axis)
        else:
            func.reduce(arr[ei:si,...], out=out[i, ...], axis=axis)
            func.inverse(func.identity, out[i, ...], out=out[i, ...])

func.reduce(func.reduceat(x, inds_from_0)) == func.reduce(x))

a = [1, 2, 3, 4]
inds = [0, 3, 1]
result = np.add.reduceat(a, inds) # [6, -5, 9] == [(1 + 2 + 3), -(3 + 2), (2 + 3 + 4)]

add.reduce(array, slice=slice(indices[:-1], indices[1:])

For i in ``range(len(indices))``, `reduceat` computes
``ufunc.reduce(a[indices[i]:indices[i+1]])``, which becomes the i-th
generalized "row" parallel to `axis` in the final result (i.e., in a
2-D array, for example, if `axis = 0`, it becomes the i-th row, but if
`axis = 1`, it becomes the i-th column).  There are three exceptions to this:

* when ``i = len(indices) - 1`` (so for the last index),
  ``indices[i+1] = a.shape[axis]``.
* if ``indices[i] >= indices[i + 1]``, the i-th generalized "row" is
  simply ``a[indices[i]]``.
* if ``indices[i] >= len(a)`` or ``indices[i] < 0``, an error is raised.

def reduceat(func, arr, inds):
    deprecation_warning()
    start = inds
    stops = zeros(inds.shape)
    stops[:-1] = start[1:]
    stops[-1] = len(arr)
    np.add(stops, 1, where=ends == starts, out=stops)  # reintroduce the "bug" that we would have to keep
    return reducebins(func, arr, starts, stops)

a = np.arange(10)
reducebins(np.add, start=[2, 4, 6]) == [2 + 3, 4 + 5, 6 + 7 + 8 + 9]  # what `reduceat` does
reducebins(np.add, stop=[2, 4, 6])  == [0 + 1, 2 + 3, 4 + 5]          # also useful

In [2]: A = np.random.random((4000, 4000))
In [3]: B = sparse.csr_matrix((A > 0.8) * A)
In [9]: %timeit np.add.reduceat(B.data, B.indptr[:-1]) * (np.diff(B.indptr) > 1)
1000 loops, best of 3: 1.81 ms per loop
In [12]: %timeit B.sum(axis=1).A
100 loops, best of 3: 1.95 ms per loop
In [16]: %timeit np.maximum.reduceat(B.data, B.indptr[:-1]) * (np.diff(B.indptr) > 0)
1000 loops, best of 3: 1.8 ms per loop
In [20]: %timeit B.max(axis=1).A
100 loops, best of 3: 2.12 ms per loop

np.add.reduce_at(arr, inds)
reduce_bins(np.add, arr, inds)
reduce_bins(np.add, arr, start=inds)

a = np.arange(10)
inds = [2, 4, 6]
reduce_bins(a, start=inds[:-1], stop=inds[1:])  #  [2 + 3, 4 + 5]

# or less efficiently:
reduce_at(a, inds)[:-1}
reduce_bins(a, start=inds)[:-1]
reduce_bins(a, stop=inds)[1:]

Numpy: reduceatコーナーケース（Trac＃236）

最も参考になるコメント

全てのコメント49件

関連する問題