Week 24: 2/12 – 2/16

This week, I was able to rewrite my code to build the matrix into the format that we need.

import re

f = open(r"test.probs", "r")
count = 0;

data = []
while(True):

line = f.readline()

# When readline returns an empty string, the file is fully read.
 if line == "":
 break

# When a newline is returned, the line is empty.
 if line == "\n":
 continue

stripped = line.strip()

m = re.match(r'.*\:([\d\.]+).*\:([\d\.]+).*', stripped)
 if m is not None:
 difference = 0;

# Check if the probability that they entail each other is
 # greater than the probability that they don't.
 if float(m.group(2)) > float(m.group(1)):

# If the difference in the probability that they entail
 # each other is greater than the threshold of 0.5, then
 # append to the list called 'data'.
 difference = float(m.group(2)) - float(m.group(1));
 if difference > 0.5:
 data.append((m.group(1), m.group(2), "right"))

# The length of data is the number of sentences that entail each other in the article,
# and this will also be the value that we will need to put into the matrix.
count = len(data)
print(count)