#!/usr/bin/python import sys import os def main(argv): if len(argv) < 2 or not os.access(argv[1], os.R_OK): usage() # Read in the data data = [[conv(x) for x in line.split(",")] for line in open(sys.argv[1])] # Get the grade column grades = [row[0] for row in data] # Now, for each column (other than the first), for i in range(1, max([len(x) for x in data])): col = [] for row in data: if len(row) > i: col.append(row[i]) else: col.append(None) # Process this column against the overall grades diff, disc = evalScores(grades, col) if type(col[0]) == type(""): print col[0], else: print "Q%03d" % (i,), print "Difficulty = %.2f\tDiscrimination = %.2f" % (diff, disc) def usage(): msg = """Usage: %s FILE FILE is CSV formatted, column 1 is ability estimate (course grade), other columns are grades in range [0 .. 1] for each student (row), on each question (column) Row #1 may be headers to aid in identifying results.""" % sys.argv[0] sys.exit(msg) def conv(x): # If we can make a float out of this, do so. # If not, and it isn't the empty string, return the string. # If it was the empty string, then return None try: return float(x) except: if x.strip(): return x return None def evalDisc(split, data): # For everything <= grade, error is difference from 0. # For everything else, error is difference from 1. error = 0.0 for grade, score in data: if grade <= split: error += score else: error += (1 - score) return 1 - error / len(data) def evalScores(grades, col): # We want to find all the split points in the scores in col # For each split, find the discrimination # Take the max # First, build the actual data we will use here: data = [(grades[i], col[i]) for i in range(len(grades)) \ if type(grades[i]) == type(col[i]) == type(0.0)] # Now we sort that, which will sort by the first element of the pairs data.sort() # Now loop through to find the best split bestDisc = 0 bestDiff = 0 for grade, score in data: tempDisc = evalDisc(grade, data) if tempDisc > bestDisc: bestDisc = tempDisc bestDiff = grade # Return the best one we found return bestDiff, bestDisc if __name__ == "__main__": main(sys.argv)