I have written a script to find the modules which export the largest number of names. The gc.getreferrers(*objs) function gives also an idea of the dependencies between the modules.
The code (statsmod.py) : #!/usr/bin/env python # -*- coding: latin-1 -*- """ statsmod.py module rudimentaire de statistiques des noms exportés par les modules de la bibliothèque standard """ import sys import gc from glob import glob import os, os.path from os.path import basename def browse_stdlib(): """browse the standard library returns list of names of modules """ pyver = 'python%s' % (sys.version[:3],) pyglob = os.path.join(sys.prefix, 'lib', pyver, '*.py') # lpys = glob(pyglob) if os.path.exists(os.path.join(sys.prefix, 'Lib', 'os.pyc')): pyglob = os.path.join(sys.prefix, 'Lib', '*.py') lpys = map(basename, glob(pyglob)) names = [ name[:-3] for name in lpys ] # remove some obsolete modules ('this' + DeprecationWarning) for dontparse in ("this", "tzparse", 'FCNTL', 'posixfile', 'pre', 'regsub', 'statcache', 'TERMIOS', 'xmllib'): try: names.remove(dontparse) except ValueError: continue return names def exports(names, with_modules=False): """imports all the modules in names returns a 2-tuple : - list of tuples : NumberOfExternalNames len(dir(module)) nodname - list of modules (if with_modules is true) """ res = [] add = res.append _all = [] modules = [] # this simple minded method (__import__) doesn't include sys ? for name in names: print name, " ", try: module = __import__(name, globals(), locals(), _all) ldir = len(dir(module)) if hasattr(module, '__all__'): nexports = len(module.__all__) else: nexports = ldir add((nexports, ldir, name)) if with_modules: modules.append(module) # del sys.modules[name] except ImportError, msg: print "cannot import module", name, msg return res, modules def pm_histo(values, nbins=20): """a poor man histogram Return a list of nbins tuples (left, right) such that the union of the consecutive ranges(left, right) is range(len(values)+1) values[k] """ vlo, vhi = values[0], values[-1]+1 nbins = min(nbins, vhi-vlo) deltax = int((vhi - vlo)/nbins) assert deltax > 0 ranges = [] add = ranges.append left = 0 # left index first bin val = vlo + deltax while val < vhi: for right in range(left, len(values)): if values[right] > val: break add((left, right)) left = right val = val + deltax return ranges def basic_stat(seq): """basic statistics on the values in seq Returns NumberOfItems, MeanValue, StandardDeviation, variance """ s0, s1, s2 = 0, 0, 0 for indx, item in enumerate(seq): s0 = s0 + 1 # seq may be an iterable without len Xi = float(item) if not indx: Xmin = Xi s1 = s1 + Xi s2 = s2 + Xi*Xi # s0 = len(seq) # sum of 0 order Xm = s1/s0 # mean value Xmax = Xi median = (Xmin + Xmax)*0.5 variance = (s2 - s0*Xm*Xm)/s0 # ecart-type ** 2 import math stddev = math.sqrt(variance) # ecart-type return s0, Xmin, Xmax, median, Xm, stddev # , variance if __name__ == '__main__': names = ['cStringIO', 'sys', 'gc' ] names.extend(browse_stdlib()) freqs, modules = exports(names, True) print # exports() prints without new line print "%d imported modules and %d in sys.modules" % ( len(freqs), len(sys.modules)) print "number of unreachable objects", gc.collect() simples = [] while modules: module = modules.pop() # print module.__name__, sys.getrefcount(module) items = gc.get_referrers(module) litems = len(items) if litems <= 2: simples.append(module.__name__) del sys.modules[module.__name__], module, items else: print "referrers of %s" % (module.__name__,) for item in items[2:]: name = item.get('__file__', 'unknown') if name.endswith('__init__.pyc'): pslash = name.rfind(os.sep) pslash = name[:pslash].rfind(os.sep) name = name[pslash+1:][:-4] # strip .pyc elif name.endswith('__init__.py'): pslash = name.rfind(os.sep) pslash = name[:pslash].rfind(os.sep) name = name[pslash+1:][:-3] # strip .py elif name.endswith('.pyc'): pslash = name.rfind(os.sep) name = name[pslash+1:][:-4] # strip .pyc elif name.endswith('.py'): pslash = name.rfind(os.sep) name = name[pslash+1:][:-3] # strip .py print name, del module, items print print "number of unreachable objects", gc.collect() print "new length of sys.modules %d" % (len(sys.modules),) print "%d simple modules" % (len(simples),) freqs.sort() values = [item[0] for item in freqs ] # print freqs[-2:] # supprimés # del values[-2:] ranges = pm_histo(values) ranges2 = [ item for item in ranges if item[1] > item[0]] limite = ranges[0][1] + 1 # first bin rangesbas = pm_histo(values[:95], 6) print rangesbas lbin = 11 start = 0 print "St Nb. min max median average stddev" fmt = "%3d%3d%6.1f%6.1f%8.3f%8.3f%8.3f" while start < len(values): res = (start,) + basic_stat(values[start:start+lbin]) print fmt % res start = start + lbin print "modules with a lot of external names :" for item in freqs[140:]: print item Parts of output of python -i statsmod.py (python2.4 windows) repr rexec rfc822 rlcompleter cannot import module rlcompleter No module named readline robotparser sched ... etc ... whrandom C:\Python24\lib\whrandom.py:38: DeprecationWarning: the whrandom module is deprecated; please use the random module DeprecationWarning) xdrlib xmlrpclib zipfile ... etc ... _threading_local __future__ __phello__.foo Hello world... cannot import module __phello__.foo No module named foo cannot import module __phello__.foo No module named foo number of unreachable objects 0 referrers of __future__ referrers of __future__ ... etc ... referrers of socket asynchat asyncore BaseHTTPServer SocketServer urllib httplib ftplib imaplib nntplib poplib smtpd smtplib Utils ... etc ... referrers of cStringIO logging\__init__ xmlrpclib number of unreachable objects 564 new length of sys.modules 154 121 simple modules [(0, 39), (39, 58), (58, 74), (74, 79), (79, 91), (91, 94)] St Nb. min max median average stddev 0 11 1.0 1.0 1.000 1.000 0.000 11 11 1.0 2.0 1.500 1.545 0.498 22 11 2.0 2.0 2.000 2.000 0.000 33 11 2.0 3.0 2.500 2.455 0.498 44 11 3.0 3.0 3.000 3.000 0.000 55 11 3.0 4.0 3.500 3.727 0.445 66 11 4.0 5.0 4.500 4.273 0.445 77 11 5.0 6.0 5.500 5.818 0.386 88 11 6.0 8.0 7.000 6.909 0.668 99 11 9.0 10.0 9.500 9.545 0.498 110 11 10.0 12.0 11.000 11.182 0.716 121 11 12.0 16.0 14.000 13.818 1.113 132 11 16.0 21.0 18.500 17.636 1.367 143 11 21.0 29.0 25.000 24.818 2.367 154 11 31.0 51.0 41.000 38.364 7.413 165 11 55.0 92.0 73.500 70.636 10.764 176 5 97.0 136.0 116.500 111.400 13.865 modules with a lot of external names : (18, 40, 'cgi') (19, 19, 'cgitb') ... etc ... (72, 72, 'pydoc') (74, 74, 'cookielib') (78, 78, 'urllib2') (86, 86, 'symbol') (92, 92, 'sre_constants') (97, 97, 'xmlrpclib') (101, 118, 'os') (107, 107, 'sre_compile') (116, 116, 'sre_parse') (136, 151, 'socket') Output with python 2.3.3 Linux gives a greater number for socket as the OpenSSL library is wrapped. gc.collect() at the interactive prompt gives 0. (good) Conclusion : sre_compile and sre_parse should be coded with a __all__ attribute The standard library contains a module 'tzparse' which cannot be imported ! Most library modules do not begin with #!/usr/bin/env python and a coding cookie. Regards -- http://mail.python.org/mailman/listinfo/python-list