""" calcrmsd - an rmsd calculator

Copyright 2011 Noel M. O'Boyle

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License version 2 as
published by the Free Software Foundation.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>.
"""

import os
import pybel
import time

from optparse import OptionParser

def main(input_ext, reference, output_ext, testfile, rmsd_cutoff):
    
    ref_iter = pybel.readfile(input_ext, reference)
    test_iter = pybel.readfile(output_ext, testfile)

    cutoff_passed = 0

    # For every test_conf there is a reference, but not v.v.

    align = pybel.ob.OBAlign()
    N = 0
    finish = False
    conf = test_iter.next()
    while not finish:
        N += 1
        title = conf.title.split("_")[0]
        
        # Find the reference
        ref = ref_iter.next()
        while ref.title != title:
            print "**Molecule %d\n..title = %s\n..number of confs = 0" % (N, ref.title)
            N += 1
            ref = ref_iter.next()
        print "**Looking at molecule %d\n..title = %s" % (N, ref.title)

        align.SetRefMol(ref.OBMol)
        rmsd = []
        while conf.title.split("_")[0] == title:
            align.SetTargetMol(conf.OBMol)
            align.Align()
            rmsd.append(align.GetRMSD())
            try:
                conf = test_iter.next()
            except StopIteration:
                finish = True
                break
        print "..number of confs = %d" % len(rmsd)
        print "..minimum rmsd = %.2f" % min(rmsd)
        rmsd.sort()

        binvals = [0.2, 0.5, 1.0, 1.5, 2.0, 3.0, 4.0, 100.0]
        bin_idx = 0
        bins = [0]*len(binvals)
        for r in rmsd:
            while r > binvals[bin_idx]:
                bin_idx += 1
                
            bins[bin_idx] += 1

        cumbins = [0] * len(bins)
        cumbins[0] = bins[0]
        for i in range(1, len(bins)):
            cumbins[i] = bins[i] + cumbins[i - 1]
        print "..# confs less than cutoffs: %s" % ", ".join(["%.1f" % x for x in binvals])
        print "..%s" % str(cumbins)
        print "..cutoff (%.2f) passed =" % rmsd_cutoff, 
        if min(rmsd) < rmsd_cutoff:
            print "Yes"
            cutoff_passed += 1
        else:
            print "No"
                 
        print

    print "**Summary"
    print "..number of molecules = %d" % N
    print "..less than cutoff(%.2f) = %d" % (rmsd_cutoff, cutoff_passed)
    print
    

if __name__ == "__main__":
    # Set up the options parser
    usage = "usage: %prog [options] reference test_file"
    parser = OptionParser(usage)
    parser.add_option("-i", dest="inputformat",
                      help="input file format", metavar="FILE")
    parser.add_option("-o", dest="outputformat",
                      help="output file format", metavar="FILE")
    parser.add_option("-r", "--rmsd", dest="rmsd_cutoff", default=0.5,
                      type="float",
                      help="RMSD cutoff (default 0.5)", metavar="RMSD")

    (options, args) = parser.parse_args()

    # Error handling
    if len(args) != 2:
        parser.error("You need to specify two arguments, the reference file and the test file")
    reference, testfile = args[0], args[1]
    if not os.path.isfile(reference):
        parser.error("Cannot find the reference file, '%s'" % reference)
    if not os.path.isfile(testfile):
        parser.error("Cannot find the test file, '%s'" % testfile)

    input_ext = options.inputformat
    if not input_ext:
        input_ext = reference.split(".")[-1]

    if input_ext not in pybel.informats.keys():
        parser.error("The reference file format is unknown.\n\n  Please specify a valid format with '-i'.")

    output_ext = options.outputformat
    if not output_ext:
        output_ext = testfile.split(".")[-1]
    if output_ext not in pybel.outformats.keys():
        parser.error("The test file format is unknown.\n\n  Please specify a valid format with '-o'.")

    main(input_ext, reference, output_ext, testfile, options.rmsd_cutoff)

    print "**Finished\n"
    
