Running Model on Data without highlights
Can this setup be run for a data-set which has no highlights available?
I didn't have any highlight in my data, I made my data into that format, with code something like .
st = 1 for i in range(circular['Text'].shape[0]): with open('file'+str(st)+'.story','w+') as writer: writer.write(circular['Text'][i]+ '\n\n') writer.write('@highlight' + '\n') writer.write(circular['Summary'][i])
st+=1
Like I had a simple csv file with text and summary, you read them in through pandas and then you execute above code. Also, for custom data that's not cnn or dailymail, you can use this for making your own bin files with this
Please try use my code, that read csv file with text and summary. Replace that with the Batcher.
def example_generator(data_path, single_pass):
while True:
filelist = tf.gfile.Glob(data_path)
assert filelist, ('Error: Empty filelist at %s' % data_path)
if single_pass:
filelist = sorted(filelist)
else:
random.shuffle(filelist)
for f in filelist:
dataframe = pd.read_csv(f, encoding="utf-8")
dataframe["content"] = dataframe["content"].astype(str)
dataframe["title"] = dataframe["title"].astype(str)
for _, row in dataframe.iterrows():
yield row["content"], row["title"]
if single_pass:
print("example_generator completed reading all datafiles. No more data.")
break
class MyBatcher(Batcher):
def fill_example_queue(self):
input_gen = example_generator(self._data_path, self._single_pass)
while True:
try:
(article, abstract) = next(input_gen)
except StopIteration:
tf.logging.info("The example generator for this example queue filling thread has exhausted data.")
if self._single_pass:
tf.logging.info("single_pass mode is on, so we've finished reading dataset. This thread is stopping.")
self._finished_reading = True
break
else:
raise Exception("single_pass mode is off but the example generator is out of data; error.")
abstract_sentences = [abstract]
example = Example(article, abstract_sentences, self._vocab, self._hps)
self._example_queue.put(example)