-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmodel.py
438 lines (356 loc) · 14.6 KB
/
model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
# -*- coding: utf-8 -*-
"""
chemdataextractor.model
~~~~~~~~~~~~~~~~~~~~~~~
Data model for extracted information.
"""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import copy
from abc import ABCMeta
from collections import MutableSequence
import json
import logging
import six
from .utils import python_2_unicode_compatible
log = logging.getLogger(__name__)
class BaseType(six.with_metaclass(ABCMeta)):
# This is assigned by ModelMeta to match the attribute on the Model
name = None
def __init__(self, default=None, null=False, required=False, contextual=False):
"""
:param default: (Optional) The default value for this field if none is set.
:param bool null: (Optional) Include in serialized output even if value is None. Default False.
:param bool required: (Optional) Whether a value is required. Default False.
:param bool contextual: (Optional) Whether this value is contextual. Default False.
"""
self.default = default
self.null = null
self.required = required
self.contextual = contextual
def __get__(self, instance, owner):
"""Descriptor for retrieving a value from a field in a Model."""
# Check if Model class is being called, rather than Model instance
if instance is None:
return self
# Get value from Model instance if available
# print(self)
# print(self.name)
value = instance._values.get(self.name)
# print(value)
# If value is None or empty string then return the default value, if set
# if value in [None, ''] and self.default is not None:
# return self.default
return value
def __set__(self, instance, value):
"""Descriptor for assigning a value to a field in a Model."""
instance._values[self.name] = self.process(value)
def process(self, value):
"""Convert an assigned value into the desired data format."""
return value
def serialize(self, value, primitive=False):
"""Serialize this field."""
if hasattr(value, 'serialize'):
# i.e. value is a nested model
return value.serialize(primitive=primitive)
else:
return value
class StringType(BaseType):
""""""
def process(self, value):
"""Convert value to a unicode string. Useful in case lxml _ElementUnicodeResult are passed from parser."""
return six.text_type(value) if value is not None else None
class FloatType(BaseType):
"""An floating point number field."""
def process(self, value):
"""Convert value to a float."""
return float(value)
class ModelType(BaseType):
def __init__(self, model, **kwargs):
self.model_class = model
self.model_name = self.model_class.__name__
super(ModelType, self).__init__(**kwargs)
def serialize(self, value, primitive=False):
"""Serialize this field."""
return value.serialize(primitive=primitive)
class ListType(BaseType):
def __init__(self, field, default=None, **kwargs):
super(ListType, self).__init__(**kwargs)
self.field = field
self.default = default if default is not None else []
# def __get__(self, instance, owner):
# """Descriptor for retrieving a value from a field in a Model."""
# # Check if Model class is being called, rather than Model instance
# if instance is None:
# return self
# # Get value from Model instance if available
# value = instance._values.get(self.name)
# # If value is None or empty string then return the default value, if set
# if value in [None, '', []]:
# return self.default if self.default is not None else []
# return value
def __set__(self, instance, value):
"""Descriptor for assigning a value to a ListField in a Model."""
# Run process for the nested field type for each value in list
instance._values[self.name] = [self.field.process(v) for v in value]
def serialize(self, value, primitive=False):
"""Serialize this field."""
return [self.field.serialize(v, primitive=primitive) for v in value]
class ModelMeta(ABCMeta):
""""""
def __new__(mcs, name, bases, attrs):
fields = {}
for attr_name, attr_value in six.iteritems(attrs):
if isinstance(attr_value, BaseType):
# Set the name attribute on the Type to the attribute name on the Model
attr_value.name = six.text_type(attr_name)
fields[attr_name] = attr_value
cls = super(ModelMeta, mcs).__new__(mcs, name, bases, attrs)
cls.fields = cls.fields.copy()
cls.fields.update(fields)
return cls
def __setattr__(cls, key, value):
if isinstance(value, BaseType):
value.name = six.text_type(key)
cls.fields[key] = value
return super(ModelMeta, cls).__setattr__(key, value)
@python_2_unicode_compatible
class BaseModel(six.with_metaclass(ModelMeta)):
""""""
fields = {}
def __init__(self, **raw_data):
""""""
self._values = {}
for key, value in six.iteritems(raw_data):
setattr(self, key, value)
# Set defaults
for key, field in six.iteritems(self.fields):
if key not in raw_data:
setattr(self, key, copy.copy(field.default))
def __repr__(self):
return '<%s>' % (self.__class__.__name__,)
def __str__(self):
return '<%s>' % (self.__class__.__name__,)
def __eq__(self, other):
# TODO: Check this actually works as expected (what about default values?)
if isinstance(other, self.__class__):
return self._values == other._values
return False
def __iter__(self):
return iter(self.fields)
def __delattr__(self, attr):
"""Handle deletion of field values by setting to default if specified."""
# Set to default value
if attr in self.fields:
setattr(self, attr, self.fields[attr].default)
else:
super(BaseModel, self).__delattr__(attr)
def __getitem__(self, key):
"""Redirect dictionary-style field access to attribute-style."""
try:
if key in self.fields:
return getattr(self, key)
except AttributeError:
pass
raise KeyError(key)
def __setitem__(self, key, value):
"""Redirect dictionary-style field setting to attribute-style."""
if key not in self.fields:
raise KeyError(key)
return setattr(self, key, value)
def __contains__(self, name):
try:
val = getattr(self, name)
return val is not None
except AttributeError:
return False
def keys(self):
return list(iter(self))
def items(self):
return [(k, getattr(self, k)) for k in self]
def values(self):
return [getattr(self, k) for k in self]
def get(self, key, default=None):
return getattr(self, key, default)
# def validate(self):
# """"""
# for field_name in self:
# self.fields[field_name].validate()
@property
def is_contextual(self):
for k in self:
value = getattr(self, k)
field = self.fields.get(k)
# Not contextual if any contextual=False field has a value
if value not in [None, '', []]:
# If a ListType, it depends on the contained type
if isinstance(field, ListType):
# If a list of Models, not contextual if any of them isn't
if isinstance(field.field, ModelType):
for model_value in value:
if not model_value.is_contextual:
return False
elif not field.field.contextual:
return False
else:
# If a single Model type, not contextual if it isn't
if isinstance(field, ModelType):
if not value.is_contextual:
return False
elif not field.contextual:
return False
return True
def serialize(self, primitive=False):
"""Convert Model to python dictionary."""
# Serialize fields to a dict
data = {}
for field_name in self:
value = getattr(self, field_name)
field = self.fields.get(field_name)
if value is not None:
value = field.serialize(value, primitive=primitive)
# Skip empty fields unless field.null
if not field.null and value in [None, '', []]:
continue
data[field.name] = value
return data
def to_json(self, *args, **kwargs):
"""Convert Model to JSON."""
return json.dumps(self.serialize(primitive=True), *args, **kwargs)
@python_2_unicode_compatible
class ModelList(MutableSequence):
"""Wrapper around a list of Models objects to facilitate operations on all at once."""
def __init__(self, *models):
# print(self)
self.models = list(models)
def __getitem__(self, index):
# print(self.models[index])
return self.models[index]
def __setitem__(self, index, value):
self.models[index] = value
def __delitem__(self, index):
del self.models[index]
def __len__(self):
return len(self.models)
def __repr__(self):
return self.models.__repr__()
def __str__(self):
return self.models.__str__()
def insert(self, index, value):
self.models.insert(index, value)
def serialize(self):
"""Serialize to a list of python dictionaries."""
# print(models)
return [e.serialize() for e in self.models]
def to_json(self, *args, **kwargs):
"""Convert ModelList to JSON."""
return json.dumps(self.serialize(), *args, **kwargs)
class ParticleDiameter(BaseModel):
value = StringType()
units = StringType(contextual=True)
# diameter = StringType(contextual=True)
# diameter_units = StringType(contextual=True)
class ParticleDensity(BaseModel):
value = StringType()
units = StringType(contextual=True)
class ParticleDensity(BaseModel):
value = StringType()
units = StringType(contextual=True)
class GasDensity(BaseModel):
value = StringType()
units = StringType()
class GasViscosity(BaseModel):
value = StringType()
units = StringType(contextual=True)
class BedVoidage(BaseModel):
value = StringType()
class Sphericity(BaseModel):
value = StringType()
class Velocity(BaseModel):
value = StringType()
units = StringType(contextual=True)
temperature = StringType(contextual=True)
temperature_units = StringType(contextual=True)
pressure = StringType(contextual=True)
pressure_units = StringType(contextual=True)
diameter = StringType(contextual=True)
diameter_units = StringType(contextual=True)
density= StringType(contextual=True)
density_units = StringType(contextual=True)
apparatus = StringType(contextual=True)
class TabelTemp(BaseModel):
value = StringType()
units = StringType(contextual=True)
class TabelPress(BaseModel):
value = StringType()
units = StringType(contextual=True)
class Compound(BaseModel):
names = ListType(StringType())
labels = ListType(StringType())
roles = ListType(StringType())
particle_diameter = ListType(ModelType(ParticleDiameter))
particle_density = ListType(ModelType(ParticleDensity))
gas_density = ListType(ModelType(GasDensity))
gas_viscosity = ListType(ModelType(GasViscosity))
particle_sphericity = ListType(ModelType(Sphericity))
bed_voidage = ListType(ModelType(BedVoidage))
Umf = ListType(ModelType(Velocity))
Table_Temp = ListType(ModelType(TabelTemp))
Table_Press = ListType(ModelType(TabelPress))
def merge(self, other):
"""Merge data from another Compound into this Compound."""
log.debug('Merging: %s and %s' % (self.serialize(), other.serialize()))
for k in self.keys():
for new_item in other[k]:
# if new_item not in self[k]: #
self[k].append(new_item)
log.debug('Result: %s' % self.serialize())
return self
def merge_contextual(self, other):
"""Merge in contextual info from a template Compound."""
# TODO: This is currently dependent on our data model? Make more robust to schema changes
# Currently we assume all lists at Compound level, with 1 further potential nested level of lists
for k in self.keys():
# print('key: %s' % k)
for item in self[k]:
# print('item: %s' % item)
# print(other.get(k,[]))
for other_item in other.get(k, []):
# Skip text properties (don't merge names, labels, roles)
if isinstance(other_item, six.text_type):
continue
for otherk in other_item.keys():
if isinstance(other_item[otherk], list):
if len(other_item[otherk]) > 0 and len(item[otherk]) > 0:
other_nested_item = other_item[otherk][0]
for othernestedk in other_nested_item.keys():
for nested_item in item[otherk]:
if not nested_item[othernestedk]:
nested_item[othernestedk] = other_nested_item[othernestedk]
elif not item[otherk]:
item[otherk] = other_item[otherk]
log.debug('Result: %s' % self.serialize())
return self
@property
def is_unidentified(self):
if not self.names and not self.labels:
return True
return False
# @property
# def is_contextual(self):
# for k in self:
# # Not contextual if we have any names or labels
# if k in {'names', 'labels'}:
# return False
# return super(Compound, self).is_contextual
@property
def is_id_only(self):
"""Return True if identifier information only."""
for key, value in self.items():
if key not in {'names', 'labels', 'roles'} and value:
return False
if self.names or self.labels:
return True
return False