Python Notes

Wednesday, October 20, 2004

MetaTemplate: Generic data templates in pure Python

I'm happy to announce that the metatemplate module is now sufficiently mature for a beta-quality release. I've been using it for my own projects over the past few weeks. The latest changes involved cleaning the code, and the addition of a new __attr__ private member to the template which allows to map alternative names to Python names. This feature was needed to allow template attributes to be stored on external resources (such as INI files) using alternative names that can include characters that are invalid in standard Python symbol names.

This module can be used to declare standard data structures containing attributes and arbitrarily nested templates. Attributes are stored in the order thay are declared in the source code, which allows the template to be used to build structures that reflect this ordering. Some applications include ini-style configuration handling, data entry forms, document structure templates, and generic data records with ordered members.


"""
metatemplate.py -- template metaclass that can be used to customize any
class to store user-defined attributes in the original definition
order.

"""

import sys
import itertools
from inspect import isclass, isdatadescriptor
from types import StringType, IntType, FloatType, ListType, DictType

#----------------------------------------------------------------------
# Debug constants (for testing purposes)

debug_generic_attribute = 0
debug_typed_attribute = 0
debug_auto_instantiation = 0
debug_iterator = 0

#----------------------------------------------------------------------
# AbstractAttribute is the ancestor of all classes that can be used
# in the metatemplate framework. Abstract attributes are named.

class AbstractAttribute(object):
name = ''

#----------------------------------------------------------------------
# GenericAttribute is the ancestor of all simple elements that are
# used as attributes of templates
#
# When referred from a instance, the __get__ method returns the value
# associated with the attribute, using the default value if needed.
# If called from the class, the __get__ method returns the property
# object itself. This is used for some internal checks.

class GenericAttribute(AbstractAttribute):
""" Generic attributes for generic containers """
def __init__(self, default = None, name = None):
self._seqno = next_attribute_id()
self.value = default
self.name = name
def __repr__(self):
return "" % (self.name)
def __get__(self, instance, owner):
if debug_generic_attribute:
print "GET self:[%s], instance:[%s], owner:[%s]" % (self, instance, owner)
if instance:
attrdict = instance.__dict__.setdefault('__attr__', {})
return attrdict.get(self.name, self.value)
else:
return self
def __set__(self, instance, value):
if debug_generic_attribute:
print "SET self:[%s], instance:[%s], value:[%s]" % (self, instance, value)
attrdict = instance.__dict__.setdefault('__attr__', {})
attrdict[self.name] = value

class TypedAttribute(GenericAttribute):
""" Typed attributes for generic containers """
def __init__(self, default = None, name = None, mytype = None):
GenericAttribute.__init__(self, default, name)
if mytype:
if isclass(mytype):
self.mytype = mytype
else:
raise TypeError("Argument expects None "
"or a valid type/class")
else:
self.mytype = type(default)
def __repr__(self):
return "" % (self.name, self.mytype.__name__)
def __set__(self, instance, value):
if debug_typed_attribute:
print "SET self:[%s], instance:[%s], value:[%s]" % (self, instance, value)
if not isinstance(value, self.mytype):
if isinstance(value, StringType):
# tries to convert a string to the correct target
# type; needed when reading values from files.
value = self.mytype(value)
else:
raise TypeError, "Expected %s attribute" % self.mytype.__name__
attrdict = instance.__dict__.setdefault('__attr__', {})
attrdict[self.name] = value

#----------------------------------------------------------------------
# auxiliary functions

def stripindent(docstring):
"""
stripindent - reformats a multiline, triple-quoted string, removing
extra leading spaces that are used for indentation purposes.
"""
# shamelessly taken from PEP 257:
# http://www.python.org/peps/pep-0257.html
if not docstring:
return ''
# Convert tabs to spaces (following the normal Python rules)
# and split into a list of lines:
lines = docstring.expandtabs().splitlines()
# Determine minimum indentation (first line doesn't count):
indent = sys.maxint
for line in lines[1:]:
stripped = line.lstrip()
if stripped:
indent = min(indent, len(line) - len(stripped))
# Remove indentation (first line is special):
trimmed = [lines[0].strip()]
if indent < sys.maxint:
for line in lines[1:]:
trimmed.append(line[indent:].rstrip())
# Strip off trailing and leading blank lines:
while trimmed and not trimmed[-1]:
trimmed.pop()
while trimmed and not trimmed[0]:
trimmed.pop(0)
# Return a single string:
return '\n'.join(trimmed)

#----------------------------------------------------------------------

next_attribute_id = itertools.count().next

def getfields(dct):
"""
takes a dictionary of class attributes and returns a decorated list
containing all valid field instances and their relative position.

"""
for fname, fobj in dct.items():
if isinstance(fobj,GenericAttribute):
yield (fobj._seqno, (fname, fobj))
elif isclass(fobj) and issubclass(fobj,AbstractAttribute):
yield (fobj._seqno, (fname, fobj))
elif (fname[0] != '_'):
# conventional attributes from basic types are just stored
# as GenericAttributes, and put at the end of the list,
# in alphabetical order
basic_types = (StringType, IntType, FloatType, ListType, DictType)
if isinstance(fobj, basic_types):
yield (sys.maxint, (fname, GenericAttribute(fobj)))
else:
yield (0, (fname, fobj))
else:
yield (0, (fname, fobj))

def makefieldsdict(dct, bases):
# build the field list and sort it
fields = list(getfields(dct))
fields.sort()
# undecorate the list and build a dict that will be returned later
sorted_field_list = [field[1] for field in fields]
field_dict = dict(sorted_field_list)
# finds all nested instances and classes that are templates
attribute_list = [field for field in sorted_field_list
if (isinstance(field[1],AbstractAttribute) or
(isclass(field[1]) and
issubclass(field[1],AbstractAttribute)
))]
# check baseclasses for attributes inherited but not overriden
# !!WARNING: this code does not checks correctly for multiple
# base classes if there are name clashes between overriden
# members. This is not recommended anyway.
inherited = []
for baseclass in bases:
base_field_list = getattr(baseclass, '__fields__', None)
# looks for a valid __fields__ attribute in an ancestor
if isinstance(base_field_list, ListType):
fnames = [f[0] for f in attribute_list]
for fname, fobj in base_field_list:
# checks for overriden attributes
if (fname in fnames):
# overriden - inherited list contains the new value
newobj = field_dict[fname]
inherited.append((fname, newobj))
# remove attribute and quick check field names list
attribute_list.remove((fname, field_dict[fname]))
fnames.remove(fname)
else:
# copy the original entry into the inherited list
inherited.append((fname, fobj))

#---------------------------------------------------------------
# IMPLEMENTATION NOTE
# Templates have two private members named __fields__ and
# __attr__. The former stores the ordered field definitions,
# while the later is a dict indexed by the alternative attribute
# name. There are situations where each structure is more
# convenient. The ideal situation would be to have a ordered
# dict, but this is not the case right now...

# stores the ordered field list in the new class directory
all_fields = inherited + attribute_list
field_dict['__fields__'] = all_fields

# generates a dict indexed by the alternative attribute name; for
# each key it stores a tuple containing the field name (used for
# binding in the template) and the object definition
attr_dict = {}
for field in all_fields:
fname, fobj = field
if isinstance(fobj, AbstractAttribute):
if not fobj.name:
fobj.name = fname
attr_dict[fobj.name] = field
field_dict['__attr__'] = attr_dict
return field_dict

#----------------------------------------------------------------------
# MetaTemplate metaclass
#
# Most of the hard work is done outside the class by the auxiliary
# functions makefieldsdict() and getfields()

class MetaTemplate(type):
def __new__(cls, name, bases, dct):
# works out the attribute ordering; keeps inherited order
newdct = makefieldsdict(dct, bases)
# creates the class using only the processed field list
newclass = type.__new__(cls, name, bases, newdct)
newclass._seqno = next_attribute_id()
newclass.name = name
return newclass

#----------------------------------------------------------------------
# GenericTemplate superclass

class GenericTemplate(AbstractAttribute):
__metaclass__ = MetaTemplate

def __init__(self):
""" instantiates all nested classes upon creation """

# builds a copy of the field list. this is needed to allow
# customizations of the instance not to be reflected in the
# original class field list.
self.__fields__ = list(self.__class__.__fields__)

# auto instantiates nested classes and attributes
if debug_auto_instantiation:
print "AutoInstantiation <%s>: fieldlist = %s" % \ self.name, self.__fields__)
for fname, fobj in self.__fields__:
if isclass(fobj) and issubclass(fobj,GenericTemplate):
# found a nested class
if debug_auto_instantiation:
print "AutoInstantiation <%s>: field[%s] is a "\ "Container Subclass" % (self.name, fname)
fobj = fobj()
setattr(self, fname, fobj)
elif isinstance(fobj, AbstractAttribute):
# found an attribute instance
if debug_auto_instantiation:
print "AutoInstantiation <%s>: field[%s] is an "\ "Attribute Instance" % (self.name, fname)
#setattr(fobj, 'name', fname)
else:
if debug_auto_instantiation:
print "AutoInstantiation <%s>: field[%s] is "\ "unknown" % (self.name, fname)

def iterall(cls, preorder=True, posorder=False, interface=None, _iterlevel=0):
"""
Generic recursive iterator for nested templates

This iterator handles all the recursion needed to navigate deep
nested structures. It 'flattens' the structure, returning a
simple sequence of attributes that can be processed
automatically by sequential code.

This iterator was implemented originally for testing purposes.
It's a class method, because some information can only be
acessed by the class (example: alternative attribute names).

preorder is a flag. If True, nested structures will be returned
*before* descending into its component attributes.

posorder is a flag. If True, nested structures will be returned
*after* descending into its component attributes.

if both preorder and posorder are true, nested structures will
be returned both *before* and *after* the component attributes
are returned.

interface is a filter. It allows to retrieve only the members
that expose a particular interface. It doesnt descend on nested
classes that don't expose that interface.

_iterlevel is a simple-minded watchdog. It was included for
debugging during development, to stop code infinite recursion
in some weird cases, and will probably be removed from released
code.
"""
if not interface:
interface = GenericTemplate
if debug_iterator:
if _iterlevel > 5:
print "[4] Recursion limit exceeded"
return
if debug_iterator:
print "[1] entry code:", cls.name
if preorder:
yield cls
for fname, fobj in cls.__fields__:
obj = getattr(cls, fname)
if debug_iterator:
print "[2] yield:", cls.name, fname, obj.name
if isclass(obj) and issubclass(obj, interface):
if debug_iterator:
print "[3] found nested class: ", obj, [x[0] for x in obj.__fields__]
for member in obj.iterall(preorder=preorder, posorder=posorder,
interface=interface, _iterlevel=_iterlevel+1):
yield member
elif isinstance(obj, GenericAttribute):
yield obj
if posorder:
yield cls
iterall = classmethod(iterall)

def iterfields(self):
"""Simple iterator: returns ordered fields"""
for fname, fobj in self.__fields__:
yield getattr(self, fname)

def __repr__(self):
return "<%s '%s'>" % (self.__class__.__name__, self.name,)

7 Comments:

Post a Comment

<< Home