Kay Schluehr: >[Yes, I have too much time...]
Thank you for your code. If you allow me to, I may put some code derived from yours back into the rosettacode.org site. >Here is an evil imperative, non-recursive generator: I like imperative, non-recursive code :-) If you count the number of items of the results, you find the sequence A002662: www.research.att.com/~njas/sequences/A002662 The series grows as: lambda n: sum(C(n, k) for k in xrange(3, n+1)) I have modified your code a little, here and there, so I can show it again. Here are some timings. Note: n = 19 => result = 261_972 subs n = 21 => result = 1_048_365 subs Timings, seconds, best of 3: N=19 N=21 v1: 2.51 17.88 v2: 0.63 3.58 v3: 2.47 10.65 v4: 0.61 3.55 v5: 0.84 5.45 v6: 0.64 2.67 v7: 0.58 3.07 v8: 0.44 1.83 v9: 0.07 0.21 v10: 2.22 9.58 v9 computes the 67_108_512 subs of n=27 in 14.54 s. The versions: v1) original eager Python+Psyco version v2) eager Python+Psyco version v3) lazy Python version v4) eager Python+Psyco version plus optimization v5) eager D version with my libs v6) lazy D version with my libs v7) eager D version without my libs plus optimization v8) lazy D version without my libs v9) lazy D version without my libs, no allocations 10) lazy Python version, no allocations Used DMD compiler V.1.033 with: -O -release -inline Python 2.5.2, Psyco 1.5.2. Some comments: - The current Python GC manages memory more efficienty than the current default D GC. This is a known issue, D is a much younger language, and there are various different situations (not just regarding its GC) where it's slower (for example regarding I/O, associative arrays, etc). - Being compiled, and having a very light iteration protocol, D is much faster in the lazy version. Avoiding to allocate new memory reduces total time a lot. - In D when n is small the eager version is faster, but things change with n is large, because this time the allocation time and the cache misses start to dominate. - The Python version doesn't improve much when you know much long the result is, while the D version improves significantly. This because the list append in Python is much more efficient than in D. Python is a much more refined language, and despite D supposed to be a fast compiled "to the metal" system language, it's often not faster. For example Python dicts are often faster than D AAs. D is currently in quick development so its data structures and many details aren't optimized. Python is much older and its data structures are well optized, lot of smart people has made them fast. - If you avoid memory allocation in D you can generally go quite fast. - Comparing different languages like this is useful for D, because it's young, and doing such comparisons I have found many performance problems in it, sometimes they have even being discussed, understood, addressed. - In D another possible output is an uint (integer not signed always 32 bits), where the bits = 1 are the items in the current subset. This may be fast enough. Bye, bearophile ------------------------- THE CODE: # 1) original eager Python+Psyco version from sys import argv def ncsub(seq, s=0): if seq: x = seq[:1] xs = seq[1:] p2 = s % 2 p1 = not p2 return [x + ys for ys in ncsub(xs, s + p1)] + ncsub(xs, s + p2) else: return [[]] if s >= 3 else [] import psyco; psyco.bind(ncsub) n = 10 if len(argv) < 2 else int(argv[1]) print len( ncsub(range(1, n)) ) ------------------------- # 2) eager Python+Psyco version from sys import argv def ncsub(seq): n = len(seq) res = [] for i in xrange(1, 2 ** n): S = [] nc = False for j in xrange(n + 1): k = i >> j if k == 0: if nc: res.append(S) break elif k % 2: S.append(seq[j]) elif S: nc = True return res import psyco; psyco.bind(ncsub) n = 10 if len(argv) < 2 else int(argv[1]) print len( ncsub(range(1, n)) ) ------------------------- # 3) lazy Python version # Psyco not used because makes it slower from sys import argv def leniter(iterator): nelements = 0 for _ in iterator: nelements += 1 return nelements def ncsub(seq): n = len(seq) for i in xrange(1, 2 ** n): S = [] nc = False for j in xrange(n + 1): k = i >> j if k == 0: if nc: yield S break elif k % 2: S.append(seq[j]) elif S: nc = True n = 10 if len(argv) < 2 else int(argv[1]) print leniter( ncsub(range(1, n)) ) ------------------------- # 4) eager Python+Psyco version plus optimization def C(n, k): result = 1 for d in xrange(1, k+1): result *= n n -= 1 result /= d return result # www.research.att.com/~njas/sequences/A002662 nsubs = lambda n: sum(C(n, k) for k in xrange(3, n+1)) def ncsub(seq): n = len(seq) result = [None] * nsubs(n) pos = 0 for i in xrange(1, 2 ** n): S = [] nc = False for j in xrange(n + 1): k = i >> j if k == 0: if nc: result[pos] = S pos += 1 break elif k % 2: S.append(seq[j]) elif S: nc = True return result from sys import argv import psyco; psyco.full() n = 10 if len(argv) < 2 else int(argv[1]) print len( ncsub(range(1, n)) ) ------------------------- // 5) eager D version with my libs // all the D code is generic (templated) import d.all, std.conv; T[][] ncsub(T)(T[] seq) { int n = len(seq); T[][] result; auto R = xrange(n + 1); foreach (i; xrange(1, 1 << n)) { T[] S; bool nc = false; foreach (j; R) { int k = i >> j; if (k == 0) { if (nc) result ~= S; break; } else if (k % 2) S ~= seq[j]; else if (S.length) nc = true; } } return result; } void main(string[] args) { int n = args.length == 2 ? toInt(args[1]) : 10; putr( len( ncsub(range(1, n)) ) ); } ------------------------- // 6) lazy D version with my libs import d.all, std.conv; struct Ncsub(T) { T[] seq; void generator() { int n = len(seq); foreach (i; xrange(1, 1 << n)) { T[] S; bool nc = false; foreach (j; xrange(n + 1)) { int k = i >> j; if (k == 0) { if (nc) yield(S); break; } else if (k % 2) S ~= seq[j]; else if (S.length) nc = true; } } } mixin Generator!(T[]); } void main(string[] args) { int n = args.length == 2 ? toInt(args[1]) : 10; putr( len( Ncsub!(int)(range(1, n)) ) ); } ------------------------- // 7) eager D version without my libs plus optimization import std.stdio: putr = writefln; import std.conv: toInt; long C(long n, long k) { long result = 1; for (long d = 1; d < k+1; d++) { result *= n; n--; result /= d; } return result; } long nsubs(long n) { // www.research.att.com/~njas/sequences/A002662 long tot = 0; for (int k = 3; k <= n; k++) tot += C(n, k); return tot; } T[][] ncsub(T)(T[] seq) { auto result = new T[][nsubs(seq.length)]; int pos; for (int i = 1; i < (1 << seq.length); i++) { T[] S; bool nc = false; for (int j; j < seq.length + 1; j++) { int k = i >> j; if (k == 0) { if (nc) result[pos++] = S; break; } else if (k % 2) S ~= seq[j]; else if (S.length) nc = true; } } return result; } void main(string[] args) { int n = args.length == 2 ? toInt(args[1]) : 10; auto range = new int[n - 1]; foreach (i, ref el; range) el = i + 1; putr( ncsub(range).length ); } ------------------------- // 8) lazy D version without my libs import std.conv: toInt; import std.stdio: putr = writefln; struct Ncsub(T) { T[] seq; int opApply(int delegate(ref int[]) dg) { int result, n = seq.length; OUTER: for (int i = 1; i < (1 << seq.length); i++) { T[] S; bool nc = false; for (int j; j < seq.length + 1; j++) { int k = i >> j; if (k == 0) { if (nc) { result = dg(S); if (result) break OUTER; } break; } else if (k % 2) S ~= seq[j]; else if (S.length) nc = true; } } return result; } } void main(string[] args) { int n = args.length == 2 ? toInt(args[1]) : 10; auto range = new int[n - 1]; foreach (i, ref el; range) el = i + 1; int count; foreach (sub; Ncsub!(int)(range)) count++; putr(count); } ------------------------- // 9) lazy D version without my libs, no allocations // the slicing S[0..len_S] doesn't copy memory import std.conv: toInt; import std.stdio: putr = writefln; struct Ncsub(T) { T[] seq; int opApply(int delegate(ref int[]) dg) { int result, n = seq.length; auto S = new int[n]; OUTER: for (int i = 1; i < (1 << seq.length); i++) { int len_S; bool nc = false; for (int j; j < seq.length + 1; j++) { int k = i >> j; if (k == 0) { if (nc) { T[] auxS = S[0 .. len_S]; result = dg(auxS); if (result) break OUTER; } break; } else if (k % 2) S[len_S++] = seq[j]; else if (len_S) nc = true; } } return result; } } void main(string[] args) { int n = args.length == 2 ? toInt(args[1]) : 10; auto range = new int[n - 1]; foreach (i, ref el; range) el = i + 1; int count; foreach (sub; Ncsub!(int)(range)) count++; putr(count); } ------------------------- # 10) lazy Python version, no allocations from sys import argv def leniter(iterator): nelements = 0 for _ in iterator: nelements += 1 return nelements def ncsub(seq, S): n = len(seq) for i in xrange(1, 2 ** n): lenS = 0 nc = False for j in xrange(n + 1): k = i >> j if k == 0: if nc: yield lenS break elif k % 2: S[lenS] = seq[j] lenS += 1 elif lenS: nc = True n = 10 if len(argv) < 2 else int(argv[1]) s = [None] * (n-1) print leniter( ncsub(range(1, n), s) ) ------------------------- -- http://mail.python.org/mailman/listinfo/python-list