From 07a4ee3bef5f8d1387dfc1616e110c770b0d3791 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 28 Aug 2025 22:47:41 +0200 Subject: [PATCH 1/7] Improve performance of dataclasses by caching dataclass field names --- Lib/dataclasses.py | 32 ++++++++++++++++++++------------ 1 file changed, 20 insertions(+), 12 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index b98f21dcbe9220..28645313fd5bc8 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -207,6 +207,8 @@ def __repr__(self): # The name of an attribute on the class where we store the Field # objects. Also used to check if a class is a Data Class. _FIELDS = '__dataclass_fields__' +# The name of an attribute on the class where we store the field names +_FIELD_NAMES = '__dataclass_field_names__' # The name of an attribute on the class that stores the parameters to # @dataclass. @@ -1052,6 +1054,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # Remember all of the fields on our class (including bases). This # also marks this class as being a dataclass. setattr(cls, _FIELDS, fields) + setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values() if f._field_type is _FIELD)) # Was this class defined with an explicit __hash__? Note that if # __eq__ is defined in this class, then python will automatically @@ -1196,13 +1199,13 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # the code instead of iterating over fields. But that can be a project for # another day, if performance becomes an issue. def _dataclass_getstate(self): - return [getattr(self, f.name) for f in fields(self)] + return [getattr(self, name) for name in _field_names(self)] def _dataclass_setstate(self, state): - for field, value in zip(fields(self), state): + for field_name, value in zip(_field_names(self), state): # use setattr because dataclass may be frozen - object.__setattr__(self, field.name, value) + object.__setattr__(self, field_name, value) def _get_slots(cls): @@ -1285,7 +1288,7 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): # Create a new dict for our new class. cls_dict = dict(cls.__dict__) - field_names = tuple(f.name for f in fields(cls)) + field_names = _field_names(cls) # Make sure slots don't overlap with those in base classes. inherited_slots = set( itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1])) @@ -1377,8 +1380,6 @@ def fields(class_or_instance): Accepts a dataclass or an instance of one. Tuple elements are of type Field. """ - - # Might it be worth caching this, per class? try: fields = getattr(class_or_instance, _FIELDS) except AttributeError: @@ -1388,6 +1389,13 @@ def fields(class_or_instance): # order, so the order of the tuple is as the fields were defined. return tuple(f for f in fields.values() if f._field_type is _FIELD) +def _field_names(class_or_instance): + """Return a tuple describing the field names of this dataclass. + + Accepts a dataclass or an instance of one. Excludes pseudo-fields + """ + + return getattr(class_or_instance, _FIELD_NAMES) def _is_dataclass_instance(obj): """Returns True if obj is an instance of a dataclass.""" @@ -1433,13 +1441,13 @@ def _asdict_inner(obj, dict_factory): # dataclass instance: fast path for the common case if dict_factory is dict: return { - f.name: _asdict_inner(getattr(obj, f.name), dict) - for f in fields(obj) + name: _asdict_inner(getattr(obj, name), dict) + for name in _field_names(obj_type) } else: return dict_factory([ - (f.name, _asdict_inner(getattr(obj, f.name), dict_factory)) - for f in fields(obj) + (name, _asdict_inner(getattr(obj, name), dict_factory)) + for name in _field_names(obj_type) ]) # handle the builtin types first for speed; subclasses handled below elif obj_type is list: @@ -1522,8 +1530,8 @@ def _astuple_inner(obj, tuple_factory): return obj elif _is_dataclass_instance(obj): return tuple_factory([ - _astuple_inner(getattr(obj, f.name), tuple_factory) - for f in fields(obj) + _astuple_inner(getattr(obj, name), tuple_factory) + for name in _field_names(obj) ]) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned From ab5d43b24a986b76da6be6063dc0fc56bc5fe559 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 28 Aug 2025 23:38:03 +0200 Subject: [PATCH 2/7] Apply suggestions from code review MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: Bénédikt Tran <10796600+picnixz@users.noreply.github.com> --- Lib/dataclasses.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 28645313fd5bc8..3e96bb0f766937 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -207,7 +207,7 @@ def __repr__(self): # The name of an attribute on the class where we store the Field # objects. Also used to check if a class is a Data Class. _FIELDS = '__dataclass_fields__' -# The name of an attribute on the class where we store the field names +# The name of an attribute on the class where we store the field names. _FIELD_NAMES = '__dataclass_field_names__' # The name of an attribute on the class that stores the parameters to @@ -1054,7 +1054,8 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # Remember all of the fields on our class (including bases). This # also marks this class as being a dataclass. setattr(cls, _FIELDS, fields) - setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values() if f._field_type is _FIELD)) + setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values() + if f._field_type is _FIELD)) # Was this class defined with an explicit __hash__? Note that if # __eq__ is defined in this class, then python will automatically From 20c603ae8b7498ee80facf00d1f0acdbf4b3bdfe Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Thu, 28 Aug 2025 23:38:35 +0200 Subject: [PATCH 3/7] Update Lib/dataclasses.py --- Lib/dataclasses.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index 3e96bb0f766937..f0b3064ea9fe21 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1393,7 +1393,7 @@ def fields(class_or_instance): def _field_names(class_or_instance): """Return a tuple describing the field names of this dataclass. - Accepts a dataclass or an instance of one. Excludes pseudo-fields + Accepts a dataclass or an instance of one. Excludes pseudo-fields. """ return getattr(class_or_instance, _FIELD_NAMES) From ea105e28fab15fa970acf8837df30ba7489aba64 Mon Sep 17 00:00:00 2001 From: "blurb-it[bot]" <43283697+blurb-it[bot]@users.noreply.github.com> Date: Fri, 29 Aug 2025 17:58:43 +0000 Subject: [PATCH 4/7] =?UTF-8?q?=F0=9F=93=9C=F0=9F=A4=96=20Added=20by=20blu?= =?UTF-8?q?rb=5Fit.?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst | 1 + 1 file changed, 1 insertion(+) create mode 100644 Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst diff --git a/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst b/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst new file mode 100644 index 00000000000000..7aee3a29aef484 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst @@ -0,0 +1 @@ +Improve performance of :func:`dataclasses.asdict` up to 40%. From 573b18676d7b9f1db69179ceddb637c032f018d7 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 23 Sep 2025 12:09:06 +0200 Subject: [PATCH 5/7] address review comments --- Lib/dataclasses.py | 2 +- .../next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index f0b3064ea9fe21..e1790b29b7517a 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1395,9 +1395,9 @@ def _field_names(class_or_instance): Accepts a dataclass or an instance of one. Excludes pseudo-fields. """ - return getattr(class_or_instance, _FIELD_NAMES) + def _is_dataclass_instance(obj): """Returns True if obj is an instance of a dataclass.""" return hasattr(type(obj), _FIELDS) diff --git a/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst b/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst index 7aee3a29aef484..ab63a205a4be7b 100644 --- a/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst +++ b/Misc/NEWS.d/next/Library/2025-08-29-17-58-36.gh-issue-138232.-W4iaS.rst @@ -1 +1 @@ -Improve performance of :func:`dataclasses.asdict` up to 40%. +Improve performance of :func:`dataclasses.asdict` up to 40% by caching the field names on dataclass classes. From 5c8892bd89a36fe6839c9884e2a678b7c42123e2 Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 23 Sep 2025 12:13:27 +0200 Subject: [PATCH 6/7] inline _field_names --- Lib/dataclasses.py | 20 +++++++------------- 1 file changed, 7 insertions(+), 13 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index e1790b29b7517a..cab97a3264f27b 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -1054,6 +1054,7 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # Remember all of the fields on our class (including bases). This # also marks this class as being a dataclass. setattr(cls, _FIELDS, fields) + # Store field names. Excludes pseudo-fields. setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values() if f._field_type is _FIELD)) @@ -1200,11 +1201,11 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # the code instead of iterating over fields. But that can be a project for # another day, if performance becomes an issue. def _dataclass_getstate(self): - return [getattr(self, name) for name in _field_names(self)] + return [getattr(self, name) for name in self.__dataclass_field_names__] def _dataclass_setstate(self, state): - for field_name, value in zip(_field_names(self), state): + for field_name, value in zip(self.__dataclass_field_names__, state): # use setattr because dataclass may be frozen object.__setattr__(self, field_name, value) @@ -1289,7 +1290,7 @@ def _add_slots(cls, is_frozen, weakref_slot, defined_fields): # Create a new dict for our new class. cls_dict = dict(cls.__dict__) - field_names = _field_names(cls) + field_names = cls.__dataclass_field_names__ # Make sure slots don't overlap with those in base classes. inherited_slots = set( itertools.chain.from_iterable(map(_get_slots, cls.__mro__[1:-1])) @@ -1390,13 +1391,6 @@ def fields(class_or_instance): # order, so the order of the tuple is as the fields were defined. return tuple(f for f in fields.values() if f._field_type is _FIELD) -def _field_names(class_or_instance): - """Return a tuple describing the field names of this dataclass. - - Accepts a dataclass or an instance of one. Excludes pseudo-fields. - """ - return getattr(class_or_instance, _FIELD_NAMES) - def _is_dataclass_instance(obj): """Returns True if obj is an instance of a dataclass.""" @@ -1443,12 +1437,12 @@ def _asdict_inner(obj, dict_factory): if dict_factory is dict: return { name: _asdict_inner(getattr(obj, name), dict) - for name in _field_names(obj_type) + for name in obj_type.__dataclass_field_names__ } else: return dict_factory([ (name, _asdict_inner(getattr(obj, name), dict_factory)) - for name in _field_names(obj_type) + for name in obj_type.__dataclass_field_names__ ]) # handle the builtin types first for speed; subclasses handled below elif obj_type is list: @@ -1532,7 +1526,7 @@ def _astuple_inner(obj, tuple_factory): elif _is_dataclass_instance(obj): return tuple_factory([ _astuple_inner(getattr(obj, name), tuple_factory) - for name in _field_names(obj) + for name in obj.__dataclass_field_names__ ]) elif isinstance(obj, tuple) and hasattr(obj, '_fields'): # obj is a namedtuple. Recurse into it, but the returned From b1419fd7b56c426b47014a792424ff136bd53c9d Mon Sep 17 00:00:00 2001 From: Pieter Eendebak Date: Tue, 23 Sep 2025 23:08:35 +0200 Subject: [PATCH 7/7] inline part 2 --- Lib/dataclasses.py | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/Lib/dataclasses.py b/Lib/dataclasses.py index cab97a3264f27b..86f11b009f84b5 100644 --- a/Lib/dataclasses.py +++ b/Lib/dataclasses.py @@ -207,8 +207,6 @@ def __repr__(self): # The name of an attribute on the class where we store the Field # objects. Also used to check if a class is a Data Class. _FIELDS = '__dataclass_fields__' -# The name of an attribute on the class where we store the field names. -_FIELD_NAMES = '__dataclass_field_names__' # The name of an attribute on the class that stores the parameters to # @dataclass. @@ -1055,8 +1053,8 @@ def _process_class(cls, init, repr, eq, order, unsafe_hash, frozen, # also marks this class as being a dataclass. setattr(cls, _FIELDS, fields) # Store field names. Excludes pseudo-fields. - setattr(cls, _FIELD_NAMES, tuple(f.name for f in fields.values() - if f._field_type is _FIELD)) + cls.__dataclass_field_names__ = tuple(f.name for f in fields.values() + if f._field_type is _FIELD) # Was this class defined with an explicit __hash__? Note that if # __eq__ is defined in this class, then python will automatically