By looking into pandas implementation and utilizing pandas
capabilities, one can patch the DataFrame object to achieve this behavior.
I implemented a method named make_dataframe_immutable(dataframe)
to solve this problem.
Written for pandas==0.25.3,
EDIT: added a solution for pandas==1.0.5 and pandas==1.1.4
New pandas version probably requires adjustments - hope it won't be too hard to do it, by utilizing the tests below.
This solution is new and not thoroughly tested - every feedback will be appreciated.
It would be nice if someone could post here an inverse make_dataframe_mutable()
method.
import functools
import numpy as np
import pandas as pd
from pandas.core.indexing import _NDFrameIndexer
def make_dataframe_immutable(df: pd.DataFrame):
"""
Makes the given DataFrame immutable.
I.e. after calling this method - one cannot modify the dataframe using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(df, "_is_immutable", False):
return
df._is_immutable = True
df._set_value = functools.wraps(df._set_value)(_raise_immutable_exception)
df._setitem_slice = functools.wraps(df._setitem_slice)(_raise_immutable_exception)
df._setitem_frame = functools.wraps(df._setitem_frame)(_raise_immutable_exception)
df._setitem_array = functools.wraps(df._setitem_array)(_raise_immutable_exception)
df._set_item = functools.wraps(df._set_item)(_raise_immutable_exception)
df._data.delete = functools.wraps(df._data.delete)(_raise_immutable_exception)
df.update = functools.wraps(df.update)(_raise_immutable_exception)
df.insert = functools.wraps(df.insert)(_raise_immutable_exception)
df._get_item_cache = _make_result_immutable(df._get_item_cache)
# prevent modification through numpy arrays
df._data.as_array = _make_numpy_result_readonly(df._data.as_array)
_prevent_inplace_argument_in_function_calls(
df,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.DataFrame, attr, None), '__code__') and
# 'inplace' in getattr(pd.DataFrame, attr).__code__.co_varnames]
(
'bfill',
'clip',
'clip_lower',
'clip_upper',
'drop',
'drop_duplicates',
'dropna',
'eval',
'ffill',
'fillna',
'interpolate',
'mask',
'query',
'replace',
'reset_index',
'set_axis',
'set_index',
'sort_index',
'sort_values',
'where',
"astype",
"assign",
"reindex",
"rename",
),
)
def make_series_immutable(series: pd.Series):
"""
Makes the given Series immutable.
I.e. after calling this method - one cannot modify the series using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(series, "_is_immutable", False):
return
series._is_immutable = True
series._set_with_engine = functools.wraps(series._set_with_engine)(_raise_immutable_exception)
series._set_with = functools.wraps(series._set_with)(_raise_immutable_exception)
series.set_value = functools.wraps(series.set_value)(_raise_immutable_exception)
# prevent modification through numpy arrays
series._data.external_values = _make_numpy_result_readonly(series._data.external_values)
series._data.internal_values = _make_numpy_result_readonly(series._data.internal_values)
series._data.get_values = _make_numpy_result_readonly(series._data.get_values)
_prevent_inplace_argument_in_function_calls(
series,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.Series, attr, None), '__code__') and
# 'inplace' in getattr(pd.Series, attr).__code__.co_varnames]
(
"astype",
'bfill',
'clip',
'clip_lower',
'clip_upper',
'drop',
'drop_duplicates',
'dropna',
'ffill',
'fillna',
'interpolate',
'mask',
'replace',
'reset_index',
'set_axis',
'sort_index',
'sort_values',
"valid",
'where',
"_set_name",
),
)
class ImmutablePandas(Exception):
pass
def _raise_immutable_exception(*args, **kwargs):
raise ImmutablePandas(f"Cannot modify immutable dataframe. Please use df.copy()")
def _get_df_or_series_from_args(args):
if len(args) >= 2 and (isinstance(args[1], pd.DataFrame) or isinstance(args[1], pd.Series)):
return args[1]
def _safe__init__(self, *args, **kwargs):
super(_NDFrameIndexer, self).__init__(*args, **kwargs)
df_or_series = _get_df_or_series_from_args(args)
if df_or_series is not None:
if getattr(df_or_series, "_is_immutable", False):
self._get_setitem_indexer = functools.wraps(self._get_setitem_indexer)(_raise_immutable_exception)
# This line is the greatest foul in this module - as it performs a global patch.
# Notice that a reload of this module incurs overriding this variable again and again. It is supported.
_NDFrameIndexer.__init__ = functools.wraps(_NDFrameIndexer.__init__)(_safe__init__)
def _make_numpy_result_readonly(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, np.ndarray):
res.flags.writeable = False
return res
return wrapper
def _make_result_immutable(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, pd.Series):
make_series_immutable(res)
return res
return wrapper
def _prevent_inplace_operation(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# TODO: here we assume that in-place is not given as a positional.
# remove this assumption, either by hard-coding the position for each method or by parsing the
# function signature.
if kwargs.get("inplace", False):
_raise_immutable_exception()
return func(*args, **kwargs)
return wrapper
def _prevent_inplace_argument_in_function_calls(obj, attributes):
for attr in attributes:
member = getattr(obj, attr)
setattr(obj, attr, _prevent_inplace_operation(member))
pytest unit-tests
import immutable_pandas
import importlib
import warnings
import pandas as pd
import pytest
def create_immutable_dataframe() -> pd.DataFrame:
# Cannot be used as a fixture because pytest copies objects transparently, which makes the tests flaky
immutable_dataframe = pd.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7]})
make_dataframe_immutable(immutable_dataframe)
return immutable_dataframe
def test_immutable_dataframe_cannot_change_with_direct_access():
immutable_dataframe = create_immutable_dataframe()
immutable_dataframe2 = immutable_dataframe.query("x == 2")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
immutable_dataframe2["moshe"] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe.x = 2
with pytest.raises(ImmutablePandas):
immutable_dataframe["moshe"] = 56
with pytest.raises(ImmutablePandas):
immutable_dataframe.insert(0, "z", [1, 2, 3, 4])
def test_immutable_dataframe_cannot_change_with_inplace_operations():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.eval("y=x+1", inplace=True)
with pytest.raises(ImmutablePandas):
immutable_dataframe.assign(y=2, inplace=True)
def test_immutable_dataframe_cannot_change_with_loc():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.loc[2] = 1
with pytest.raises(ImmutablePandas):
immutable_dataframe.iloc[1] = 4
def test_immutable_dataframe_cannot_change_with_columns_access():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"][2] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"].loc[2] = 123
def test_immutable_dataframe_cannot_del_column():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
del immutable_dataframe["x"]
def test_immutable_dataframe_cannot_be_modified_through_values():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ValueError, match="read-only"):
immutable_dataframe.values[0, 0] = 1
with pytest.raises(ValueError, match="read-only"):
immutable_dataframe.as_matrix()[0, 0] = 1
def test_immutable_series_cannot_change_with_loc():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.loc[0] = 1
with pytest.raises(ImmutablePandas):
series.iloc[0] = 1
def test_immutable_series_cannot_change_with_inplace_operations():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.sort_index(inplace=True)
with pytest.raises(ImmutablePandas):
series.sort_values(inplace=True)
with pytest.raises(ImmutablePandas):
series.astype(int, inplace=True)
def test_series_cannot_be_modeified_through_values():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ValueError, match="read-only"):
series.get_values()[0] = 1234
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ValueError, match="read-only"):
series.values[0] = 1234
def test_reloading_module_immutable_pandas_does_not_break_immutability():
# We need to test the effects of reloading the module, because we modify the global variable
# _NDFrameIndexer.__init__ upon every reload of the module.
df = create_immutable_dataframe()
df2 = df.copy()
immutable_pandas2 = importlib.reload(immutable_pandas)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df.astype(int, inplace=True)
df2.astype(int, inplace=True)
immutable_pandas2.make_dataframe_immutable(df2)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df2.astype(int, inplace=True)
Edit: here is an update tested on pandas==1.0.5 and on pandas==1.1.4
"""
Two methods to make pandas objects immutable.
make_dataframe_immutable()
make_series_immutable()
"""
import functools
import numpy as np
import pandas as pd
from pandas.core.indexing import _iLocIndexer
from pandas.core.indexing import _LocIndexer
from pandas.core.indexing import IndexingMixin
def make_dataframe_immutable(df: pd.DataFrame):
"""
Makes the given DataFrame immutable.
I.e. after calling this method - one cannot modify the dataframe using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(df, "_is_immutable", False):
return
df._is_immutable = True
df._set_value = functools.wraps(df._set_value)(_raise_immutable_exception)
df._setitem_slice = functools.wraps(df._setitem_slice)(_raise_immutable_exception)
df._setitem_frame = functools.wraps(df._setitem_frame)(_raise_immutable_exception)
df._setitem_array = functools.wraps(df._setitem_array)(_raise_immutable_exception)
df._set_item = functools.wraps(df._set_item)(_raise_immutable_exception)
if hasattr(df, "_mgr"):
# pandas==1.1.4
df._mgr.idelete = functools.wraps(df._mgr.idelete)(_raise_immutable_exception)
elif hasattr(df, "_data"):
# pandas==1.0.5
df._data.delete = functools.wraps(df._data.delete)(_raise_immutable_exception)
df.update = functools.wraps(df.update)(_raise_immutable_exception)
df.insert = functools.wraps(df.insert)(_raise_immutable_exception)
df._get_item_cache = _make_result_immutable(df._get_item_cache)
# prevent modification through numpy arrays
df._data.as_array = _make_numpy_result_readonly(df._data.as_array)
_prevent_inplace_argument_in_function_calls(
df,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.DataFrame, attr, None), '__code__') and
# 'inplace' in getattr(pd.DataFrame, attr).__code__.co_varnames]
(
"bfill",
"clip",
"drop",
"drop_duplicates",
"dropna",
"eval",
"ffill",
"fillna",
"interpolate",
"mask",
"query",
"replace",
"reset_index",
"set_axis",
"set_index",
"sort_index",
"sort_values",
"where",
"astype",
"assign",
"reindex",
"rename",
),
)
def make_series_immutable(series: pd.Series):
"""
Makes the given Series immutable.
I.e. after calling this method - one cannot modify the series using pandas interface.
Upon a trial to modify an immutable dataframe, an exception of type ImmutablePandas is raised.
"""
if getattr(series, "_is_immutable", False):
return
series._is_immutable = True
series._set_with_engine = functools.wraps(series._set_with_engine)(_raise_immutable_exception)
series._set_with = functools.wraps(series._set_with)(_raise_immutable_exception)
# prevent modification through numpy arrays
series._data.external_values = _make_numpy_result_readonly(series._data.external_values)
series._data.internal_values = _make_numpy_result_readonly(series._data.internal_values)
_prevent_inplace_argument_in_function_calls(
series,
# This list was obtained by manual inspection +
# [attr for attr in dir(d) if hasattr(getattr(pd.Series, attr, None), '__code__') and
# 'inplace' in getattr(pd.Series, attr).__code__.co_varnames]
(
"astype",
"bfill",
"clip",
"drop",
"drop_duplicates",
"dropna",
"ffill",
"fillna",
"interpolate",
"mask",
"replace",
"reset_index",
"set_axis",
"sort_index",
"sort_values",
"where",
"_set_name",
),
)
class ImmutablePandas(Exception):
pass
def _raise_immutable_exception(*args, **kwargs):
raise ImmutablePandas(f"Cannot modify immutable dataframe. Please use df.copy()")
def _get_df_or_series_from_args(args):
if len(args) >= 2 and (isinstance(args[1], pd.DataFrame) or isinstance(args[1], pd.Series)):
return args[1]
def _protect_indexer(loc_func):
def wrapper(*arg, **kwargs):
res = loc_func(*args, **kwargs)
return res
def _safe__init__(cls, self, *args, **kwargs):
super(cls, self).__init__(*args, **kwargs)
df_or_series = _get_df_or_series_from_args(args)
if df_or_series is not None:
if getattr(df_or_series, "_is_immutable", False):
self._get_setitem_indexer = functools.wraps(self._get_setitem_indexer)(_raise_immutable_exception)
@functools.wraps(IndexingMixin.loc)
def _safe_loc(self):
loc = _LocIndexer("loc", self)
if getattr(self, "_is_immutable", False):
# Edit also loc._setitem_with_indexer
loc._get_setitem_indexer = functools.wraps(loc._get_setitem_indexer)(_raise_immutable_exception)
return loc
@functools.wraps(IndexingMixin.iloc)
def _safe_iloc(self):
iloc = _iLocIndexer("iloc", self)
if getattr(self, "_is_immutable", False):
# Edit also iloc._setitem_with_indexer
iloc._get_setitem_indexer = functools.wraps(iloc._get_setitem_indexer)(_raise_immutable_exception)
return iloc
# wraps
pd.DataFrame.loc = property(_safe_loc)
pd.Series.loc = property(_safe_loc)
pd.DataFrame.iloc = property(_safe_iloc)
pd.Series.iloc = property(_safe_iloc)
def _make_numpy_result_readonly(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, np.ndarray):
res.flags.writeable = False
return res
return wrapper
def _make_result_immutable(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
res = func(*args, **kwargs)
if isinstance(res, pd.Series):
make_series_immutable(res)
return res
return wrapper
def _prevent_inplace_operation(func):
@functools.wraps(func)
def wrapper(*args, **kwargs):
# TODO: here we assume that in-place is not given as a positional.
# remove this assumption, either by hard-coding the position for each method or by parsing the
# function signature.
if kwargs.get("inplace", False):
_raise_immutable_exception()
return func(*args, **kwargs)
return wrapper
def _prevent_inplace_argument_in_function_calls(obj, attributes):
for attr in attributes:
member = getattr(obj, attr)
setattr(obj, attr, _prevent_inplace_operation(member))
And the pytest file
import importlib
import warnings
import pandas as pd
import pytest
import immutable_pandas
from immutable_pandas import ImmutablePandas
from immutable_pandas import make_dataframe_immutable
from immutable_pandas import make_series_immutable
def create_immutable_dataframe() -> pd.DataFrame:
# Cannot be used as a fixture because pytest copies objects transparently, which makes the tests flaky
immutable_dataframe = pd.DataFrame({"x": [1, 2, 3, 4], "y": [4, 5, 6, 7]})
make_dataframe_immutable(immutable_dataframe)
return immutable_dataframe
def test_immutable_dataframe_cannot_change_with_direct_access():
immutable_dataframe = create_immutable_dataframe()
immutable_dataframe2 = immutable_dataframe.query("x == 2")
with warnings.catch_warnings():
warnings.simplefilter("ignore")
immutable_dataframe2["moshe"] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe.x = 2
with pytest.raises(ImmutablePandas):
immutable_dataframe["moshe"] = 56
with pytest.raises(ImmutablePandas):
immutable_dataframe.insert(0, "z", [1, 2, 3, 4])
def test_immutable_dataframe_cannot_change_with_inplace_operations():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.eval("y=x+1", inplace=True)
with pytest.raises(ImmutablePandas):
immutable_dataframe.assign(y=2, inplace=True)
def test_immutable_dataframe_cannot_change_with_loc():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe.loc[2] = 1
with pytest.raises(ImmutablePandas):
immutable_dataframe.iloc[1] = 4
def test_immutable_dataframe_cannot_change_with_columns_access():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"][2] = 123
with pytest.raises(ImmutablePandas):
immutable_dataframe["x"].loc[2] = 123
def test_immutable_dataframe_cannot_del_column():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ImmutablePandas):
del immutable_dataframe["x"]
def test_immutable_dataframe_cannot_be_modified_through_values():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(ValueError, match="read-only"):
immutable_dataframe.values[0, 0] = 1
# with pytest.raises(ValueError, match="read-only"):
# immutable_dataframe.as_matrix()[0, 0] = 1
def test_immutable_series_cannot_change_with_loc():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.loc[0] = 1
with pytest.raises(ImmutablePandas):
series.iloc[0] = 1
def test_immutable_series_cannot_change_with_inplace_operations():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ImmutablePandas):
series.sort_index(inplace=True)
with pytest.raises(ImmutablePandas):
series.sort_values(inplace=True)
with pytest.raises(ImmutablePandas):
series.astype(int, inplace=True)
def test_series_cannot_be_modeified_through_values():
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
series = pd.Series([1, 2, 3, 4])
make_series_immutable(series)
with pytest.raises(ValueError, match="read-only"):
series.values[0] = 1234
def test_reloading_module_immutable_pandas_does_not_break_immutability():
# We need to test the effects of reloading the module, because we modify the global variable
# pd.DataFrame.loc, pd.DataFrame.iloc,
# pd.Series.loc, pd.Series.iloc
# upon every reload of the module.
df = create_immutable_dataframe()
df2 = df.copy()
immutable_pandas2 = importlib.reload(immutable_pandas)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df.astype(int, inplace=True)
immutable_pandas2.make_dataframe_immutable(df2)
with pytest.raises(immutable_pandas2.ImmutablePandas):
df2.astype(int, inplace=True)
def test_at_and_iat_crash():
immutable_dataframe = create_immutable_dataframe()
with pytest.raises(immutable_pandas.ImmutablePandas):
immutable_dataframe.iat[0, 0] = 1
with pytest.raises(immutable_pandas.ImmutablePandas):
immutable_dataframe.at[0, "x"] = 1