This week, I was able to rewrite my code to build the matrix into the format that we need.

import re f = open(r"test.probs", "r") count = 0; data = [] while(True): line = f.readline() # When readline returns an empty string, the file is fully read. if line == "": break # When a newline is returned, the line is empty. if line == "\n": continue stripped = line.strip() m = re.match(r'.*\:([\d\.]+).*\:([\d\.]+).*', stripped) if m is not None: difference = 0; # Check if the probability that they entail each other is # greater than the probability that they don't. if float(m.group(2)) > float(m.group(1)): # If the difference in the probability that they entail # each other is greater than the threshold of 0.5, then # append to the list called 'data'. difference = float(m.group(2)) - float(m.group(1)); if difference > 0.5: data.append((m.group(1), m.group(2), "right")) # The length of data is the number of sentences that entail each other in the article, # and this will also be the value that we will need to put into the matrix. count = len(data) print(count)