from tools.Stats.histogram import Multihist, Histogram
from glob import glob

for results_file in glob('/scratch2/data2/Rfam/*.log')[2:]:
    print results_file
    results = open(results_file).readlines()
    if not results:
        continue
    results = filter(re.compile('\[\(\(\d').match, results)
    results = [eval(line) for line in results]
    good = [log(line[0][1]) for line in results]
    bad  = [log(line[1][1]) for line in results]

    # Print out sensitivity at specificities 97.5% and 100%
    bad.sort()
    cutoff = bad[int(0.025*len(bad))]
    print len([score for score in good if score < cutoff])/float(len(good))
    print len([score for score in good if score < bad[0]])/float(len(good))
    print len(good)

    bad.sort()

    # Get rid  of all but  60 of  the long string  of ones in  the control
    # results
    # bad = bad[:-(bad.count(0) - 60)]

    graph_obj = Multihist(2)
    graph_obj.xlabel('Log probability w.r.t null-hypothesis')
    graph_obj.ylabel('count')
    graph_obj.plot(good)
    graph_obj.plot(bad)

    raw_input('press key')
graph_obj.hardcopy('/var/tmp/graph.ps')

