How to perform computations with NumPy#

Awkward Array’s integration with NumPy allows you to use NumPy’s array functions on data with complex structures, including ragged and heterogeneous arrays.

import awkward as ak
import numpy as np

Universal functions (ufuncs)#

NumPy’s universal functions (ufuncs) are functions that operate elementwise on arrays. They are broadcasting-aware, so they can naturally handle data structures like ragged arrays that are common in Awkward Arrays.

Here’s an example of applying np.sqrt, a NumPy ufunc, to an Awkward Array:

data = ak.Array([[1, 4, 9], [], [16, 25]])

np.sqrt(data)
[[1, 2, 3],
 [],
 [4, 5]]
-----------------------
type: 3 * var * float64

Notice that the ufunc applies to the numeric data, passing through all dimensions of nested lists, even if those lists have variable length. This also applies to heterogeneous data, in which the data are not all of the same type.

data = ak.Array([[1, 4, 9], [], 16, [[[25]]]])

np.sqrt(data)
[[1, 2, 3],
 [],
 4,
 [[[5]]]]
---------------------------
type: 4 * union[
    var * union[
        float64,
        var * var * float64
    ],
    float64
]

Unary and binary operations on Awkward Arrays, such as +, -, >, and ==, are actually calling NumPy ufuncs. For instance, +:

array1 = ak.Array([[1, 2, 3], [], [4, 5]])
array2 = ak.Array([[10, 20, 30], [], [40, 50]])

array1 + array2
[[11, 22, 33],
 [],
 [44, 55]]
---------------------
type: 3 * var * int64

is actually np.add:

np.add(array1, array2)
[[11, 22, 33],
 [],
 [44, 55]]
---------------------
type: 3 * var * int64

Arrays with record fields#

Ufuncs can only be applied to numerical data in lists, not records.

records = ak.Array([{"x": 4, "y": 9}, {"x": 16, "y": 25}])
np.sqrt(records)
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[7], line 1
----> 1 np.sqrt(records)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1511, in Array.__array_ufunc__(self, ufunc, method, *inputs, **kwargs)
   1509 name = f"{type(ufunc).__module__}.{ufunc.__name__}.{method!s}"
   1510 with ak._errors.OperationErrorContext(name, inputs, kwargs):
-> 1511     return ak._connect.numpy.array_ufunc(ufunc, method, inputs, kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:466, in array_ufunc(ufunc, method, inputs, kwargs)
    458         raise TypeError(
    459             "no {}.{} overloads for custom types: {}".format(
    460                 type(ufunc).__module__, ufunc.__name__, ", ".join(error_message)
    461             )
    462         )
    464     return None
--> 466 out = ak._broadcasting.broadcast_and_apply(
    467     inputs, action, allow_records=False, function_name=ufunc.__name__
    468 )
    470 if len(out) == 1:
    471     return wrap_layout(out[0], behavior=behavior, attrs=attrs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1140, in broadcast_and_apply(inputs, action, depth_context, lateral_context, allow_records, left_broadcast, right_broadcast, numpy_to_regular, regular_to_jagged, function_name, broadcast_parameters_rule)
   1138 backend = backend_of(*inputs, coerce_to_common=False)
   1139 isscalar = []
-> 1140 out = apply_step(
   1141     backend,
   1142     broadcast_pack(inputs, isscalar),
   1143     action,
   1144     0,
   1145     depth_context,
   1146     lateral_context,
   1147     {
   1148         "allow_records": allow_records,
   1149         "left_broadcast": left_broadcast,
   1150         "right_broadcast": right_broadcast,
   1151         "numpy_to_regular": numpy_to_regular,
   1152         "regular_to_jagged": regular_to_jagged,
   1153         "function_name": function_name,
   1154         "broadcast_parameters_rule": broadcast_parameters_rule,
   1155     },
   1156 )
   1157 assert isinstance(out, tuple)
   1158 return tuple(broadcast_unpack(x, isscalar) for x in out)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1118, in apply_step(backend, inputs, action, depth, depth_context, lateral_context, options)
   1116     return result
   1117 elif result is None:
-> 1118     return continuation()
   1119 else:
   1120     raise AssertionError(result)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1087, in apply_step.<locals>.continuation()
   1085 # Any non-string list-types?
   1086 elif any(x.is_list and not is_string_like(x) for x in contents):
-> 1087     return broadcast_any_list()
   1089 # Any RecordArrays?
   1090 elif any(x.is_record for x in contents):

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:623, in apply_step.<locals>.broadcast_any_list()
    620         nextinputs.append(x)
    621         nextparameters.append(NO_PARAMETERS)
--> 623 outcontent = apply_step(
    624     backend,
    625     nextinputs,
    626     action,
    627     depth + 1,
    628     copy.copy(depth_context),
    629     lateral_context,
    630     options,
    631 )
    632 assert isinstance(outcontent, tuple)
    633 parameters = parameters_factory(nextparameters, len(outcontent))

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1118, in apply_step(backend, inputs, action, depth, depth_context, lateral_context, options)
   1116     return result
   1117 elif result is None:
-> 1118     return continuation()
   1119 else:
   1120     raise AssertionError(result)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:1091, in apply_step.<locals>.continuation()
   1089 # Any RecordArrays?
   1090 elif any(x.is_record for x in contents):
-> 1091     return broadcast_any_record()
   1093 else:
   1094     raise ValueError(
   1095         "cannot broadcast: {}{}".format(
   1096             ", ".join(repr(type(x)) for x in inputs), in_function(options)
   1097         )
   1098     )

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_broadcasting.py:468, in apply_step.<locals>.broadcast_any_record()
    466 def broadcast_any_record():
    467     if not options["allow_records"]:
--> 468         raise ValueError(f"cannot broadcast records{in_function(options)}")
    470     frozen_record_fields: frozenset[str] | None = UNSET
    471     first_record = next(c for c in contents if c.is_record)

ValueError: cannot broadcast records in sqrt

This error occurred while calling

    numpy.sqrt.__call__(
        <Array [{x: 4, y: 9}, {x: 16, ...}] type='2 * {x: int64, y: int64}'>
    )

However, you can pull each field out of a record and apply the ufunc to it.

np.sqrt(records.x)
[2,
 4]
-----------------
type: 2 * float64
np.sqrt(records.y)
[3,
 5]
-----------------
type: 2 * float64

If you want the result wrapped up in a new array of records, you can use ak.zip() to do that.

ak.zip({"x": np.sqrt(records.x), "y": np.sqrt(records.y)})
[{x: 2, y: 3},
 {x: 4, y: 5}]
---------------
type: 2 * {
    x: float64,
    y: float64
}

Here’s an idiom that would apply a ufunc to every field individually, and then wrap up the result as a new record with the same fields (using ak.fields(), ak.unzip(), and ak.zip()):

ak.zip({key: np.sqrt(value) for key, value in zip(ak.fields(records), ak.unzip(records))})
[{x: 2, y: 3},
 {x: 4, y: 5}]
---------------
type: 2 * {
    x: float64,
    y: float64
}

The reaons that Awkward Array does not do this automatically is to prevent mistakes: it’s common for records to represent coordinates of data points, and if the coordinates are not Cartesian, the one-to-one application is not correct.

Using non-NumPy ufuncs#

NumPy-compatible ufuncs exist in other libraries, like SciPy, and can be applied in the same way. Here’s how you can apply scipy.special.gamma and scipy.special.erf:

import scipy.special

data = ak.Array([[0.1, 0.2, 0.3], [], [0.4, 0.5]])
scipy.special.gamma(data)
[[9.51, 4.59, 2.99],
 [],
 [2.22, 1.77]]
-----------------------
type: 3 * var * float64
scipy.special.erf(data)
[[0.112, 0.223, 0.329],
 [],
 [0.428, 0.52]]
-----------------------
type: 3 * var * float64

You can even create your own ufuncs using Numba’s @nb.vectorize:

import numba as nb

@nb.vectorize
def gcd_euclid(x, y):
    # computation that is more complex than a formula
    while y != 0:
        x, y = y, x % y
    return x
x = ak.Array([[10, 20, 30], [], [40, 50]])
y = ak.Array([[5, 40, 15], [], [24, 255]])
gcd_euclid(x, y)
[[5, 20, 15],
 [],
 [8, 5]]
---------------------
type: 3 * var * int64

Since Numba has JIT-compiled this function, it would run much faster on large arrays than custom Python code.

Non-ufunc NumPy functions#

Some NumPy functions don’t satisfy the ufunc protocol, but have been implemented for Awkward Arrays because they are useful. You can tell when a NumPy function has an Awkward Array implementation when a function with the same name and signature exists in both libraries.

For instance, np.where works on Awkward Arrays because ak.where() exists:

np.where(y % 2 == 0, x, y) 
[[5, 20, 15],
 [],
 [40, 255]]
---------------------
type: 3 * var * int64

(The above selects elements from x when y is even and elements from y when y is odd.)

Similarly, np.concatenate works on Awkward Arrays because ak.concatenate() exists:

np.concatenate([x, y])
[[10, 20, 30],
 [],
 [40, 50],
 [5, 40, 15],
 [],
 [24, 255]]
---------------------
type: 6 * var * int64
np.concatenate([x, y], axis=1)
[[10, 20, 30, 5, 40, 15],
 [],
 [40, 50, 24, 255]]
-------------------------
type: 3 * var * int64

Other NumPy functions, without an equivalent in the Awkward Array library, will work only if the Awkward Array can be converted into a NumPy array.

Ragged arrays can’t be converted to NumPy:

np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]]))
---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[21], line 1
----> 1 np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [], [7.7, 8.8, 9.9]]))

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/highlevel.py:1527, in Array.__array_function__(self, func, types, args, kwargs)
   1513 def __array_function__(self, func, types, args, kwargs):
   1514     """
   1515     Intercepts attempts to pass this Array to those NumPy functions other
   1516     than universal functions that have an Awkward equivalent.
   (...)
   1525     See also #__array_ufunc__.
   1526     """
-> 1527     return ak._connect.numpy.array_function(
   1528         func, types, args, kwargs, behavior=self._behavior, attrs=self._attrs
   1529     )

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:109, in array_function(func, types, args, kwargs, behavior, attrs)
    106 unique_backends = frozenset(_find_backends(all_arguments))
    107 backend = common_backend(unique_backends)
--> 109 rectilinear_args = tuple(_to_rectilinear(x, backend) for x in args)
    110 rectilinear_kwargs = {k: _to_rectilinear(v, backend) for k, v in kwargs.items()}
    111 result = func(*rectilinear_args, **rectilinear_kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:109, in <genexpr>(.0)
    106 unique_backends = frozenset(_find_backends(all_arguments))
    107 backend = common_backend(unique_backends)
--> 109 rectilinear_args = tuple(_to_rectilinear(x, backend) for x in args)
    110 rectilinear_kwargs = {k: _to_rectilinear(v, backend) for k, v in kwargs.items()}
    111 result = func(*rectilinear_args, **rectilinear_kwargs)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_connect/numpy.py:78, in _to_rectilinear(arg, backend)
     69     # Otherwise, cast to layout and convert
     70     else:
     71         layout = ak.to_layout(
     72             arg,
     73             allow_record=False,
   (...)
     76             string_policy="error",
     77         )
---> 78         return layout.to_backend(backend).to_backend_array(allow_missing=True)
     79 elif isinstance(arg, tuple):
     80     return tuple(_to_rectilinear(x, backend) for x in arg)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/content.py:1024, in Content.to_backend_array(self, allow_missing, backend)
   1022 else:
   1023     backend = regularize_backend(backend)
-> 1024 return self._to_backend_array(allow_missing, backend)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:2106, in ListOffsetArray._to_backend_array(self, allow_missing, backend)
   2104     return buffer.view(np.dtype(("S", max_count)))
   2105 else:
-> 2106     return self.to_RegularArray()._to_backend_array(allow_missing, backend)

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/contents/listoffsetarray.py:284, in ListOffsetArray.to_RegularArray(self)
    279 _size = Index64.empty(1, self._backend.index_nplike)
    280 assert (
    281     _size.nplike is self._backend.index_nplike
    282     and self._offsets.nplike is self._backend.index_nplike
    283 )
--> 284 self._backend.maybe_kernel_error(
    285     self._backend[
    286         "awkward_ListOffsetArray_toRegularArray",
    287         _size.dtype.type,
    288         self._offsets.dtype.type,
    289     ](
    290         _size.data,
    291         self._offsets.data,
    292         self._offsets.length,
    293     )
    294 )
    295 size = self._backend.index_nplike.index_as_shape_item(_size[0])
    296 length = self._offsets.length - 1

File ~/micromamba/envs/awkward-docs/lib/python3.11/site-packages/awkward/_backends/backend.py:67, in Backend.maybe_kernel_error(self, error)
     65     return
     66 else:
---> 67     raise ValueError(self.format_kernel_error(error))

ValueError: cannot convert to RegularArray because subarray lengths are not regular (in compiled code: https://github.com/scikit-hep/awkward/blob/awkward-cpp-38/awkward-cpp/src/cpu-kernels/awkward_ListOffsetArray_toRegularArray.cpp#L22)

But arrays with equal-sized lists can:

np.fft.fft(ak.Array([[1.1, 2.2, 3.3], [4.4, 5.5, 6.6], [7.7, 8.8, 9.9]]))
[[6.6+0j, -1.65+0.953j, -1.65+-0.953j],
 [16.5+0j, -1.65+0.953j, -1.65+-0.953j],
 [26.4+0j, -1.65+0.953j, -1.65+-0.953j]]
----------------------------------------
type: 3 * 3 * complex128