From 41bfbf6043fc67810091bd016b88d8f16a9c3153 Mon Sep 17 00:00:00 2001 From: aljosanpedro Date: Sun, 26 Oct 2025 13:36:56 +0800 Subject: [PATCH] DOC: removed @Appender and @Substition and replaced with docstrings --- pandas/core/groupby/generic.py | 290 ++++++++++++++++++++++++++++++++- 1 file changed, 283 insertions(+), 7 deletions(-) diff --git a/pandas/core/groupby/generic.py b/pandas/core/groupby/generic.py index d279594617235..62a1e4baf846b 100644 --- a/pandas/core/groupby/generic.py +++ b/pandas/core/groupby/generic.py @@ -32,8 +32,6 @@ SpecificationError, ) from pandas.util._decorators import ( - Appender, - Substitution, doc, set_module, ) @@ -71,7 +69,6 @@ from pandas.core.groupby.groupby import ( GroupBy, GroupByPlot, - _transform_template, ) from pandas.core.indexes.api import ( Index, @@ -675,9 +672,143 @@ def _wrap_applied_output( """ ) - @Substitution(klass="Series", example=__examples_series_doc) - @Appender(_transform_template) def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + """ + Call function producing a same-indexed Series on each group. + + Returns a Series having the same indexes as the original object + filled with the transformed values. + + Parameters + ---------- + func : function, str + Function to apply to each group. See the Notes section below for + requirements. + + Accepted inputs are: + + - String + - Python function + - Numba JIT function with ``engine='numba'`` specified. + + Only passing a single function is supported with this engine. + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + If a string is chosen, then it needs to be the name + of the groupby method you want to use. + *args + Positional arguments to pass to func. + engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or the global setting + ``compute.use_numba`` + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + **kwargs + Keyword arguments to be passed into func. + + Returns + ------- + Series + Series with the same indexes as the original object filled + with transformed values. + + See Also + -------- + Series.groupby.apply : Apply function ``func`` group-wise and combine + the results together. + Series.groupby.aggregate : Aggregate using one or more operations. + Series.transform : Call ``func`` on self producing a Series with the + same axis shape as self. + + Notes + ----- + Each group is endowed the attribute 'name' in case you need to know + which group you are working on. + + The current implementation imposes three requirements on f: + + * f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. + * if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. + * f must not mutate groups. Mutation is not supported and may + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. + + When using ``engine='numba'``, there will be no "fall back" behavior internally. + The group data and group index will be passed as numpy arrays to the JITed + user defined function, and no alternative execution attempts will be tried. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + .. versionchanged:: 2.0.0 + + When using ``.transform`` on a grouped DataFrame and the + transformation function returns a DataFrame, pandas now aligns the + result's index with the input's index. You can call ``.to_numpy()`` + on the result of the transformation function to avoid alignment. + + Examples + -------- + >>> ser = pd.Series( + ... [390.0, 350.0, 30.0, 20.0], + ... index=["Falcon", "Falcon", "Parrot", "Parrot"], + ... name="Max Speed", + ... ) + >>> grouped = ser.groupby([1, 1, 2, 2]) + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + Falcon 0.707107 + Falcon -0.707107 + Parrot 0.707107 + Parrot -0.707107 + Name: Max Speed, dtype: float64 + + Broadcast result of the transformation + + >>> grouped.transform(lambda x: x.max() - x.min()) + Falcon 40.0 + Falcon 40.0 + Parrot 10.0 + Parrot 10.0 + Name: Max Speed, dtype: float64 + + >>> grouped.transform("mean") + Falcon 370.0 + Falcon 370.0 + Parrot 25.0 + Parrot 25.0 + Name: Max Speed, dtype: float64 + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + + >>> grouped.transform(lambda x: x.astype(int).max()) + Falcon 390 + Falcon 390 + Parrot 30 + Parrot 30 + Name: Max Speed, dtype: int64 + """ return self._transform( func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs ) @@ -2298,9 +2429,154 @@ def _transform_general(self, func, engine, engine_kwargs, *args, **kwargs): """ ) - @Substitution(klass="DataFrame", example=__examples_dataframe_doc) - @Appender(_transform_template) def transform(self, func, *args, engine=None, engine_kwargs=None, **kwargs): + """ + Call function producing a same-indexed DataFrame on each group. + + Returns a DataFrame having the same indexes as the original object + filled with the transformed values. + + Parameters + ---------- + func : function, str + Function to apply to each group. See the Notes section below for + requirements. + + Accepted inputs are: + + - String + - Python function + - Numba JIT function with ``engine='numba'`` specified. + + Only passing a single function is supported with this engine. + If the ``'numba'`` engine is chosen, the function must be + a user defined function with ``values`` and ``index`` as the + first and second arguments respectively in the function signature. + Each group's index will be passed to the user defined function + and optionally available for use. + + If a string is chosen, then it needs to be the name + of the groupby method you want to use. + *args + Positional arguments to pass to func. + engine : str, default None + * ``'cython'`` : Runs the function through C-extensions from cython. + * ``'numba'`` : Runs the function through JIT compiled code from numba. + * ``None`` : Defaults to ``'cython'`` or the global setting + ``compute.use_numba`` + + engine_kwargs : dict, default None + * For ``'cython'`` engine, there are no accepted ``engine_kwargs`` + * For ``'numba'`` engine, the engine can accept ``nopython``, ``nogil`` + and ``parallel`` dictionary keys. The values must either be ``True`` or + ``False``. The default ``engine_kwargs`` for the ``'numba'`` engine is + ``{'nopython': True, 'nogil': False, 'parallel': False}`` and will be + applied to the function + + **kwargs + Keyword arguments to be passed into func. + + Returns + ------- + DataFrame + DataFrame with the same indexes as the original object filled + with transformed values. + + See Also + -------- + DataFrame.groupby.apply : Apply function ``func`` group-wise and combine + the results together. + DataFrame.groupby.aggregate : Aggregate using one or more operations. + DataFrame.transform : Call ``func`` on self producing a DataFrame with the + same axis shape as self. + + Notes + ----- + Each group is endowed the attribute 'name' in case you need to know + which group you are working on. + + The current implementation imposes three requirements on f: + + * f must return a value that either has the same shape as the input + subframe or can be broadcast to the shape of the input subframe. + For example, if `f` returns a scalar it will be broadcast to have the + same shape as the input subframe. + * if this is a DataFrame, f must support application column-by-column + in the subframe. If f also supports application to the entire subframe, + then a fast path is used starting from the second chunk. + * f must not mutate groups. Mutation is not supported and may + produce unexpected results. See :ref:`gotchas.udf-mutation` for more details. + + When using ``engine='numba'``, there will be no "fall back" behavior internally. + The group data and group index will be passed as numpy arrays to the JITed + user defined function, and no alternative execution attempts will be tried. + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + see the examples below. + + .. versionchanged:: 2.0.0 + + When using ``.transform`` on a grouped DataFrame and the transformation + function returns a DataFrame, pandas now aligns the result's index + with the input's index. You can call ``.to_numpy()`` on the + result of the transformation function to avoid alignment. + + Examples + -------- + >>> df = pd.DataFrame( + ... { + ... "A": ["foo", "bar", "foo", "bar", "foo", "bar"], + ... "B": ["one", "one", "two", "three", "two", "two"], + ... "C": [1, 5, 5, 2, 5, 5], + ... "D": [2.0, 5.0, 8.0, 1.0, 2.0, 9.0], + ... } + ... ) + >>> grouped = df.groupby("A")[["C", "D"]] + >>> grouped.transform(lambda x: (x - x.mean()) / x.std()) + C D + 0 -1.154701 -0.577350 + 1 0.577350 0.000000 + 2 0.577350 1.154701 + 3 -1.154701 -1.000000 + 4 0.577350 -0.577350 + 5 0.577350 1.000000 + + Broadcast result of the transformation + + >>> grouped.transform(lambda x: x.max() - x.min()) + C D + 0 4.0 6.0 + 1 3.0 8.0 + 2 4.0 6.0 + 3 3.0 8.0 + 4 4.0 6.0 + 5 3.0 8.0 + + >>> grouped.transform("mean") + C D + 0 3.666667 4.0 + 1 4.000000 5.0 + 2 3.666667 4.0 + 3 4.000000 5.0 + 4 3.666667 4.0 + 5 4.000000 5.0 + + .. versionchanged:: 1.3.0 + + The resulting dtype will reflect the return value of the passed ``func``, + for example: + + >>> grouped.transform(lambda x: x.astype(int).max()) + C D + 0 5 8 + 1 5 9 + 2 5 8 + 3 5 9 + 4 5 8 + 5 5 9 + """ return self._transform( func, *args, engine=engine, engine_kwargs=engine_kwargs, **kwargs )