-
Notifications
You must be signed in to change notification settings - Fork 222
/
Copy pathitem_32_use_getattr.py
251 lines (190 loc) · 8.5 KB
/
item_32_use_getattr.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
# Item_32_Use __getattr__, __getattribute__, and __setattr__ for lazy
# attributes
# Python's language hooks make it easy to write generic code for gluing
# systems together. For example, say you want to represent teh rows of your
# database as Python objects. Your database has its schema set. Your code
# that uses objects corresponding to those rows must also know what your
# database looks like. However, in Python, the code that connects your Python
# object to the database doesn't need to know the schema of your rows: it can
# be generic.
# How is that possible? Plain instance attributes, @property methods, and
# descriptors can't do this because they all need to be defined in advance.
# Python makes this dynamic behavior possible with the __getattr__ special
# method. If your class defines __getattr__, that method is called every time
# an attribute can't be found in an object's instance dictionary.
class LazyDB(object):
def __init__(self):
self.exists = 5
def __getattr__(self, name):
value = 'Value for %s' % name
setattr(self, name, value)
return value
# Here, I access the missing property foo. This causes Python to call the
# __getattr__ method above, which mutates the instance dictionary __dict__.
data = LazyDB()
print('Before:', data.__dict__)
print('foo: ', data.foo)
print('After: ', data.__dict__)
# Before: {'exists': 5}
# foo: Value for foo
# After: {'exists': 5, 'foo': 'Value for foo'}
# Here, I add logging to LazyDB to show when __getattr__ is actually called.
# Note that I use super().__getattr__() to get the real property value in
# order to avoid infinite recursion.
class LoggingLazyDB(LazyDB):
# def __init__(self):
# super().__init__()
def __getattr__(self, name):
print('Called __getattr__(%s)' % name)
return super().__getattr__(name)
data = LoggingLazyDB()
print('exists:', data.exists)
print('foo: ', data.foo)
print('foo: ', data.foo)
# exists: 5
# Called __getattr__(foo)
# foo: Value for foo
# foo: Value for foo
# The exists attribute is present in the instance dictionary, so __getattr__
# is never called for it. The foo attribute is not in the instance dictionary
# initially, so __getattr__ is called the first time. But the call to
# __getattr__ for foo also does a setattr, which populates foo in the instance
# dictionary. This is why the second time I access foo there isn't a call to
# __getattr__.
# This behavior is especially helpful for use cases like lazily accessing
# schemaless data. __getattr__ runs once to do the hard work of loading a
# property; all subsequent accesses retrieve the existing result.
# Say you also want transactions in this database system. The next time the
# user accesses a property, you want to know whether the corresponding row in
# the database is still valid and whether the transaction is still open. The
# __getattr__ hook won't let you do this reliably because it will use the
# object's instance dictionary as the fast path for existing attributes.
# To enable this use case, Python has another language hook call
# __getattribute__. This special method is called every time an attribute is
# accessed on an object, even in cases where it does exist in the attribute
# dictionary. This enables you to do things like check global transaction
# state on every property access. Here, I define ValidatingDB to log each time
# __getattribute__ is called:
class ValidatingDB(object):
def __init__(self):
self.exists = 5
def __getattribute__(self, name):
print('Called __getattribute__(%s)' % name)
try:
return super().__getattribute__(name)
except AttributeError:
value = 'Value for %s' % name
setattr(self, name, value)
return value
data = ValidatingDB()
print('exists:', data.exists)
print('foo: ', data.foo)
print('foo: ', data.foo)
# Called __getattribute__(exists)
# exists: 5
# Called __getattribute__(foo)
# foo: Value for foo
# Called __getattribute__(foo)
# foo: Value for foo
# In the event that a dynamically accessed property shouldn't exist, you can
# raise an AttributeError to cause Python's standard missing property behavior
# for both __getattr__ and __getattribute__.
class MissingPropertyDB(object):
def __getattr__(self, name):
if name == 'bad_name':
raise AttributeError('%s is missing' % name)
data = MissingPropertyDB()
# data.bad_name
# AttributeError: bad_name is missing
# Python code implementing generic functionality often relies on the hasattr
# built-in function to determine when properties exist, and the getattr
# built-in function to retrieve property values. These functions also look in
# the instance dictionary for an attribute name before calling __getattr__.
data = LoggingLazyDB()
print('Before: ', data.__dict__)
print('foo exists: ', hasattr(data, 'foo'))
print('After: ', data.__dict__)
print('foo exists: ', hasattr(data, 'foo'))
# Before: {'exists': 5}
# Called __getattr__(foo)
# foo exists: True
# After: {'foo': 'Value for foo', 'exists': 5}
# foo exists: True
# In the example above, __getattr__ is only called once. In contrast, classes
# that implement __getattribute__ will have that method called each time
# hasattr or getattr is run on an object.
data = ValidatingDB()
print('foo exists: ', hasattr(data, 'foo'))
print('foo exists: ', hasattr(data, 'foo'))
# Called __getattribute__(foo)
# foo exists: True
# Called __getattribute__(foo)
# foo exists: True
# Now, say you want to lazily push data back to the database when values are
# assigned to your Python object. You can do this with __setattr__, a similar
# to language hook that lets you intercept arbitrary attribute assignments.
# Unlike retrieving an attribute with __getattr__ and __getattribute__,
# there's no need for two separate methods. The __setattr__ method is always
# called every time an attribute is assigned on an instance (either directly
# or through the setattr built-in function).
class SavingDB(object):
def __setattr__(self, name, value):
'''Save some data to the DB log'''
super().__setattr__(name, value)
# Here, I define a logging subclass of SavingDB. Its __setattr__ method is
# always called on each attribute assignment:
class LoggingSavingDB(SavingDB):
def __setattr__(self, name, value):
print('Called __setattr__(%s, %r)' % (name, value))
super().__setattr__(name, value)
data = LoggingSavingDB()
print('Before: ', data.__dict__)
data.foo = 5
print('After: ', data.__dict__)
data.foo = 7
print('Finally:', data.__dict__)
# Before: {}
# Called __setattr__(foo, 5)
# After: {'foo': 5}
# Called __setattr__(foo, 7)
# Finally: {'foo': 7}
# The problem with __getattribute__ and __setattr__ is that they're called on
# every attribute access for an object, even when you many not want to happen.
# For example, say you want attribute accesses on your object to actually look
# up keys in an associated dictionary.
class BrokenDictionaryDB(object):
def __init__(self, data):
self._data = {}
def __getattribute__(self, name):
print('Called __getattribute__(%s)' % name)
return self._data[name]
# This requires accessing self._data from the __getatribute__ method.
# However, if you actually try to do that, Python will recurse until it
# reaches its stack limit, and then it'll die.
data = BrokenDictionaryDB({'foo': 3})
# data.foo
# The problem is that __getattribute__ access self._data, which causes
# __getattribute__ to run again, which access self._data again, and so on.
# The solution is to use the super().__getattribute__ method on your instance
# to fetch values from the instance attribute dictionary. This avoids the
# recursion.
class DictionaryDB(object):
def __init__(self, data):
self._data = data
def __getattribute__(self, name):
data_dict = super().__getattribute__('_data')
return data_dict[name]
data = DictionaryDB({'foo': 3})
print('foo: ', data.foo)
# foo: 3
# Similarly, you'll need __setattr__ methods that modify attributes on an
# object to use super().__setattr__.
# Things to remember
# 1. Use __getattr__ and __setattr__ to lazily load and save attributes for an
# object.
# 2. Understand that __getattr__ only gets called once when accessing a
# missing attribute, whereas __getattribute__ gets called every time an
# attribute is accessed.
# 3. Avoid infinite recursion in __getattribute__ and __setattr__ by using
# methods from super() (i.e., the object class) to access instance
# attributes directly.