【bert】result exact_match and f1 abnormal
In bert Offline accuracy test,I submit multi batch size. when exec response on lg.QuerySamplesComplete one by one, can get right result.
def issue_queries(self, query_samples):
batch_size = self.batch_size
if len(query_samples) < batch_size:
batch_size = len(query_samples)
batch_input_1 = np.empty([batch_size, self.max_seq_length], dtype=np.int32)
batch_input_2 = np.empty([batch_size, self.max_seq_length], dtype=np.int32)
batch_input_3 = np.empty([batch_size, self.max_seq_length], dtype=np.int32)
query_id = np.empty([batch_size], dtype=int)
batch_counter = 0
with torch.no_grad():
for i in range(len(query_samples)):
print(i)
eval_features = self.qsl.get_features(query_samples[i].index)
# import pdb
# pdb.set_trace()
batch_input_1[batch_counter % batch_size] = torch.IntTensor(eval_features.input_ids).numpy()
batch_input_2[batch_counter % batch_size] = torch.IntTensor(eval_features.input_mask).numpy()
batch_input_3[batch_counter % batch_size] = torch.IntTensor(eval_features.segment_ids).numpy()
query_id[i % batch_size] = query_samples[i].id
batch_counter += 1
if batch_counter % batch_size == 0 or i == (len(query_samples) - 1):
model_output = self.model([batch_input_1, batch_input_2, batch_input_3])
response = []
for idx_output in range(batch_counter):
start_scores, end_scores = torch.from_numpy(model_output[0][idx_output]), torch.from_numpy(model_output[1][idx_output])
output = torch.stack([start_scores, end_scores], axis=-1).squeeze(0).cpu().numpy()
response_array = array.array("B", output.tobytes())
bi = response_array.buffer_info()
response = lg.QuerySampleResponse(query_id[idx_output], bi[0], bi[1])
lg.QuerySamplesComplete([response])
batch_counter = 0
when exec lg.QuerySamplesComplete for list of response,the exact_match and f1 will be low
def issue_queries(self, query_samples):
batch_size = self.batch_size
if len(query_samples) < batch_size:
batch_size = len(query_samples)
batch_input_1 = np.empty([batch_size, self.max_seq_length], dtype=np.int32)
batch_input_2 = np.empty([batch_size, self.max_seq_length], dtype=np.int32)
batch_input_3 = np.empty([batch_size, self.max_seq_length], dtype=np.int32)
query_id = np.empty([batch_size], dtype=int)
batch_counter = 0
with torch.no_grad():
for i in range(len(query_samples)):
print(i)
eval_features = self.qsl.get_features(query_samples[i].index)
# import pdb
# pdb.set_trace()
batch_input_1[batch_counter % batch_size] = torch.IntTensor(eval_features.input_ids).numpy()
batch_input_2[batch_counter % batch_size] = torch.IntTensor(eval_features.input_mask).numpy()
batch_input_3[batch_counter % batch_size] = torch.IntTensor(eval_features.segment_ids).numpy()
query_id[i % batch_size] = query_samples[i].id
batch_counter += 1
if batch_counter % batch_size == 0 or i == (len(query_samples) - 1):
model_output = self.model([batch_input_1, batch_input_2, batch_input_3])
response = []
for idx_output in range(batch_counter):
start_scores, end_scores = torch.from_numpy(model_output[0][idx_output]), torch.from_numpy(model_output[1][idx_output])
output = torch.stack([start_scores, end_scores], axis=-1).squeeze(0).cpu().numpy()
response_array = array.array("B", output.tobytes())
bi = response_array.buffer_info()
response.append(lg.QuerySampleResponse(query_id[idx_output], bi[0], bi[1]))
lg.QuerySamplesComplete(response)
batch_counter = 0
@arjunsuresh Please help, thanks!
This is a backend reference implementation for bert which does support batching. If you are working on a fork of the inference repository can you please share the fork link so that I can test out the run?
@arjunsuresh I think batching not affect,the diff is
the first one i can get right accuracy, the second one accuracy will be low。maybe it relative to loadgen, I'm confused. Thanks for help me!