> class MyClass > > def __str__(self): > return "%s-%s-%s" %(self.field1, self.field2, self.field3) > > def __repr__(self): > return str(self) > > def __hash__(self): > return hash(str(self)) > > > is there anything that can be done to speed up this simply code? right > now it is taking well over 15 minutes to process, on a 3 Ghz machine > with lots of RAM (though this is all taking CPU power, not RAM at this > point.)
class MyClass(object): # a new style class with slots saves some memory __slots__ = ("field1", "field2", "field2") def __hash__(self): # In general this is faster than your approach because # it requires less function calls. However all fields must # be hashable return hash((self.field1, self.field2, self.field2)) def __eq__(self, other): # you MUST provide a rich compare __eq__ function # when you provide your own hash function. It's called # when hash(a) == hash(b). if not isinstance(other, MyClass): return NotImplemented return (self.field1 == other.field1 and self.field2 == other.field2 and self.field3 == other.field3) def __str__(self): return "%s-%s-%s" %(self.field1, self.field2, self.field3) # faster than your approach because it doesn't require more function # lookups. You can omit this alias, too. It's not required. __repr__ = __str__ Instead of the try/except clause I recommend colletions.defaultdict. Defaultdict takes one callable as factory function. Since int() returns 0 it has the same effect as your code. from collections import defaultdict elt = defaultdict(int) Christian -- http://mail.python.org/mailman/listinfo/python-list