Skip to content

Fly Handles

On the fly, relative pipeline stage creation.

Use drop_rows_where and keep_rows_where as handles to the future dataframe, using the [] indexing syntax to select a (single) column to apply the logic by, and regular binary operators such as >, >=, ==, !=, etc. to express the condition by which rows will be kept or dropped.

For example:

>>> import pandas as pd; import pdpipe as pdp;
>>> df = pd.DataFrame([[1,4],[4,5],[5,11]], [1,2,3], ['a','b'])
>>> df
   a   b
1  1   4
2  4   5
3  5  11
>>> pipeline = pdp.PdPipeline([
...     pdp.drop_rows_where['a'] > 4,
... ])
>>> pipeline(df)
   a  b
1  1  4
2  4  5

The resulting stages can be naturaly combined by logical binary operators: & for AND, | for OR and ^ for XOR, and can also be inverted with the ~ operator.

For example:

>>> import pandas as pd; import pdpipe as pdp;
>>> df = pd.DataFrame([[1,4],[4,5],[5,11]], [1,2,3], ['a','b'])
>>> pipeline = pdp.PdPipeline([
...     ~ (pdp.drop_rows_where['a'] > 4),
... ])
>>> pipeline(df)
   a   b
3  5  11
>>> pipeline = pdp.PdPipeline([
...     (pdp.drop_rows_where['a'] > 3) & (pdp.drop_rows_where['b'] < 10),
... ])
>>> pipeline(df)
   a   b
1  1   4
3  5  11


drop_rows_where = _DropRowsByColValHandle() module-attribute

keep_rows_where = _KeepRowsByColValHandle() module-attribute



Bases: PdPipelineStage

A pipeline stage that keeps rows by a row qualifier.

All rows which the qualifier qualifies (i.e. return a boolean series with True in the corresponding entries) will be kept, while all other rows will be dropped from input dataframes.


Name Type Description Default
qualifier RowQualifier

An object that returns a boolean series from input dataframes. See more in pdpipe.rq.

**kwargs object

All PdPipelineStage constructor parameters are supported.



>>> import pandas as pd; import pdpipe as pdp;
>>> df = pd.DataFrame([[1,4],[4,5],[5,11]], [1,2,3], ['a','b'])
>>> q = pdp.rq.ColValGt('a', 3)
   a   b
2  4   5
3  5  11
Source code in pdpipe/
class KeepRowsByQualifier(PdPipelineStage):
    A pipeline stage that keeps rows by a row qualifier.

    All rows which the qualifier qualifies (i.e. return a boolean series with
    True in the corresponding entries) will be kept, while all other rows will
    be dropped from input dataframes.

    qualifier : RowQualifier
        An object that returns a boolean series from input dataframes. See more
        in `pdpipe.rq`.
    **kwargs : object
        All PdPipelineStage constructor parameters are supported.

    >>> import pandas as pd; import pdpipe as pdp;
    >>> df = pd.DataFrame([[1,4],[4,5],[5,11]], [1,2,3], ['a','b'])
    >>> q = pdp.rq.ColValGt('a', 3)
       a   b
    2  4   5
    3  5  11

    def __init__(self, qualifier, **kwargs):
        self._keeprowsby_rq = qualifier
        super_kwargs = {
            "desc": f"Drop rows by qualifier {qualifier}",

    def _prec(self, X: pandas.DataFrame) -> bool:
        return True

    def _transform(self, X, verbose=None):
        before_count = len(X)
        bool_ix = self._keeprowsby_rq(X)
        inter_X = X[bool_ix]
        if verbose:
            print(f"{before_count - len(inter_X)} rows dropped.")
        return inter_X

    def __and__(self, other):
            and_rq = self._keeprowsby_rq & other._keeprowsby_rq
            return type(self)(qualifier=and_rq)
        except AttributeError:
            return NotImplemented

    def __or__(self, other):
            or_rq = self._keeprowsby_rq | other._keeprowsby_rq
            return type(self)(qualifier=or_rq)
        except AttributeError:
            return NotImplemented

    def __xor__(self, other):
            xor_rq = self._keeprowsby_rq ^ other._keeprowsby_rq
            return type(self)(qualifier=xor_rq)
        except AttributeError:
            return NotImplemented

    def __invert__(self):
        not_rq = ~self._keeprowsby_rq
        return type(self)(qualifier=not_rq)


Bases: PdPipelineStage

A pipeline stage that drops rows by a row qualifier.

All rows which the qualifier qualifies (i.e. return a boolean series with True in the corresponding entries) will be dropped, while all other rows will be kept in input dataframes.


Name Type Description Default
qualifier RowQualifier

An object that returns a boolean series from input dataframes. See more in pdpipe.rq.

**kwargs object

All PdPipelineStage constructor parameters are supported.



>>> import pandas as pd; import pdpipe as pdp;
>>> df = pd.DataFrame([[1,4],[4,5],[5,11]], [1,2,3], ['a','b'])
>>> q = pdp.rq.ColValLt('a', 3)
   a   b
2  4   5
3  5  11
Source code in pdpipe/
class DropRowsByQualifier(PdPipelineStage):
    A pipeline stage that drops rows by a row qualifier.

    All rows which the qualifier qualifies (i.e. return a boolean series with
    True in the corresponding entries) will be dropped, while all other rows
    will be kept in input dataframes.

    qualifier : RowQualifier
        An object that returns a boolean series from input dataframes. See more
        in `pdpipe.rq`.
    **kwargs : object
        All PdPipelineStage constructor parameters are supported.

    >>> import pandas as pd; import pdpipe as pdp;
    >>> df = pd.DataFrame([[1,4],[4,5],[5,11]], [1,2,3], ['a','b'])
    >>> q = pdp.rq.ColValLt('a', 3)
       a   b
    2  4   5
    3  5  11

    def __init__(self, qualifier, **kwargs):
        self._droprowsby_rq = qualifier
        super_kwargs = {
            "desc": f"Drop rows by qualifier {qualifier}",

    def _prec(self, X: pandas.DataFrame) -> bool:
        return True

    def _transform(self, X, verbose=None):
        before_count = len(X)
        bool_ix = ~self._droprowsby_rq(X)
        inter_X = X[bool_ix]
        if verbose:
            print(f"{before_count - len(inter_X)} rows dropped.")
        return inter_X

    def __and__(self, other):
            and_rq = self._droprowsby_rq & other._droprowsby_rq
            return type(self)(qualifier=and_rq)
        except AttributeError:
            return NotImplemented

    def __or__(self, other):
            or_rq = self._droprowsby_rq | other._droprowsby_rq
            return type(self)(qualifier=or_rq)
        except AttributeError:
            return NotImplemented

    def __xor__(self, other):
            xor_rq = self._droprowsby_rq ^ other._droprowsby_rq
            return type(self)(qualifier=xor_rq)
        except AttributeError:
            return NotImplemented

    def __invert__(self):
        not_rq = ~self._droprowsby_rq
        return type(self)(qualifier=not_rq)

Last update: 2022-01-21