On Sep 19, 2:01 pm, [EMAIL PROTECTED] wrote: > Gerard flanagan: > > > data.sort() > > datadict = \ > > dict((k, len(list(g))) for k,g in groupby(data, lambda s: > > '.'.join(s.split('.',2)[:2]))) > > That code may run correctly, but it's quite unreadable, while good > Python programmers value high readability. So the right thing to do is > to split that line into parts, giving meaningful names, and maybe even > add comments. > > len(list(g))) looks like a good job for my little leniter() function > (or better just an extension to the semantics of len) that time ago > some people here have judged as useless, while I use it often in both > Python and D ;-) > Extending len() to support iterables sounds like a good idea, except that it could be misleading when:
len(file(path)) returns the number of lines and /not/ the length in bytes as you might first think! :-) Anyway, here's another possible implementation using bags (multisets): def major_version(version_string): "convert '1.2.3.2' to '1.2'" return '.'.join(version_string.split('.')[:2]) versions = ["1.1.1.1", "1.2.2.2", "1.2.2.3", "1.3.1.2", "1.3.4.5"] bag_of_versions = bag(major_version(x) for x in versions) dict_of_counts = dict(bag_of_versions.items()) Here's my implementation of the bag class in Python (sorry about the length): class bag(object): def __init__(self, iterable = None): self._counts = {} if isinstance(iterable, dict): for x, n in iterable.items(): if not isinstance(n, int): raise TypeError() if n < 0: raise ValueError() self._counts[x] = n elif iterable: for x in iterable: try: self._counts[x] += 1 except KeyError: self._counts[x] = 1 def __and__(self, other): new_counts = {} for x, n in other._counts.items(): try: new_counts[x] = min(self._counts[x], n) except KeyError: pass result = bag() result._counts = new_counts return result def __iand__(self): new_counts = {} for x, n in other._counts.items(): try: new_counts[x] = min(self._counts[x], n) except KeyError: pass self._counts = new_counts def __or__(self, other): new_counts = self._counts.copy() for x, n in other._counts.items(): try: new_counts[x] = max(new_counts[x], n) except KeyError: new_counts[x] = n result = bag() result._counts = new_counts return result def __ior__(self): for x, n in other._counts.items(): try: self._counts[x] = max(self._counts[x], n) except KeyError: self._counts[x] = n def __len__(self): return sum(self._counts.values()) def __list__(self): result = [] for x, n in self._counts.items(): result.extend([x] * n) return result def __repr__(self): return "bag([%s])" % ", ".join(", ".join([repr(x)] * n) for x, n in self._counts.items()) def __iter__(self): for x, n in self._counts.items(): for i in range(n): yield x def keys(self): return self._counts.keys() def values(self): return self._counts.values() def items(self): return self._counts.items() def __add__(self, other): for x, n in other.items(): self._counts[x] = self._counts.get(x, 0) + n def __contains__(self, x): return x in self._counts def add(self, x): try: self._counts[x] += 1 except KeyError: self._counts[x] = 1 def __add__(self, other): new_counts = self._counts.copy() for x, n in other.items(): try: new_counts[x] += n except KeyError: new_counts[x] = n result = bag() result._counts = new_counts return result def __sub__(self, other): new_counts = self._counts.copy() for x, n in other.items(): try: new_counts[x] -= n if new_counts[x] < 1: del new_counts[x] except KeyError: pass result = bag() result._counts = new_counts return result def __iadd__(self, other): for x, n in other.items(): try: self._counts[x] += n except KeyError: self._counts[x] = n def __isub__(self, other): for x, n in other.items(): try: self._counts[x] -= n if self._counts[x] < 1: del self._counts[x] except KeyError: pass def clear(self): self._counts = {} def count(self, x): return self._counts.get(x, 0) -- http://mail.python.org/mailman/listinfo/python-list