EasyParallelLibrary
EasyParallelLibrary copied to clipboard
Gradient Checkpoint with auto type got a TypeError
After I added below codes to my worked functions, I got a TypeError
epl_config = epl.Config({
"gradient_checkpoint.type": "auto",
"zero.level": "v1",
"amp.level": "O1", "amp.loss_scale": 128
})
epl.init(epl_config)
epl.set_default_strategy(epl.replicate(1))
error info:
"/venv/lib/python2.7/site-packages/tensorflow/contrib/graph_editor/util.py", line 214, in get_unique_graph
t) for t in check_types]), type(op)))
TypeError: Expected a type in (<class 'tensorflow.python.framework.ops.Tensor'>), got: <class 'tensorflow.python.ops.resource_variable_ops.Resource
my worked functions: (modeling module from robert)
import epl
import tensorflow as tf
from tensorflow.contrib import layers, metrics
epl_config = epl.Config({
"gradient_checkpoint.type": "auto",
"zero.level": "v1",
"amp.level": "O1", "amp.loss_scale": 128
})
epl.init(epl_config)
epl.set_default_strategy(epl.replicate(1))
bert_path = 'robert_checkpoint_path'
def model_fn(features, labels, mode, params):
is_train_bool = mode == tf.estimator.ModeKeys.TRAIN
# Building BERT model
bert_config = modeling.BertConfig.from_dict({
"attention_probs_dropout_prob": 0.1,
"directionality": "bidi",
"hidden_act": "gelu",
"hidden_dropout_prob": 0.1,
"hidden_size": 768,
"initializer_range": 0.02,
"intermediate_size": 3072,
"max_position_embeddings": 512,
"num_attention_heads": 12,
"num_hidden_layers": 6,
"pooler_fc_size": 768,
"pooler_num_attention_heads": 12,
"pooler_num_fc_layers": 3,
"pooler_size_per_head": 128,
"pooler_type": "first_token_transform",
"type_vocab_size": 2,
"vocab_size": 21128
})
bert = modeling.BertModel(
config=bert_config,
is_training=is_train_bool,
input_ids=input_ids,
input_mask=input_mask,
token_type_ids=segment_ids,
use_one_hot_embeddings=False,
scope='bert'
)
# Getting BERT's outputs
bert_output = bert.get_sequence_output()
# Loading pre-trained BERT
if is_train_bool:
tvars = tf.trainable_variables()
(
assignment_map, initialized_names
) = modeling.get_assignment_map_from_checkpoint(
tvars, bert_path
)
tf.train.init_from_checkpoint(bert_path, assignment_map)
tf.logging.info("**** Trainable Variables ****")
for var in tvars:
tf.logging.info(" name = {}, shape = {}{}".format(
var.name, var.shape,
", *INIT_FROM_CKPT*" if var.name in initialized_names
else ''
))
with tf.variable_scope("network"):
# MLP
first_hidden_layer = tf.layers.dense(
tf.concat(bert_output, axis=1), 128, activation=tf.nn.relu)
second_hidden_layer = tf.layers.dense(
first_hidden_layer, 128, activation=tf.nn.relu)
logits = tf.layers.dense(second_hidden_layer, 1)
predictions = tf.sigmoid(logits)
predictions = tf.identity(predictions, name="predict")
if mode == tf.estimator.ModeKeys.PREDICT:
return tf.estimator.EstimatorSpec(
mode=mode, predictions={
"predict": predictions,
'label': features['label'],
}
)
labels = tf.reshape(labels, [-1, 1])
loss = tf.losses.sigmoid_cross_entropy(labels, logits)
epl.add_to_collection(loss, epl.GraphKeys.GLOBAL_MEAN_OBJECTS)
optimizer = tf.train.AdamOptimizer()
train_op = optimizer.minimize(loss=loss,
global_step=tf.train.get_global_step())
predictions = tf.reshape(predictions, [-1, 1])
eval_metric_ops = {
"auc": tf.metrics.auc(labels, predictions),
"f1": metrics.f1_score(labels, predictions),
"precision": tf.metrics.precision_at_thresholds(
labels, predictions, [0.5]
),
"recall": tf.metrics.recall_at_thresholds(
labels, predictions, [0.5]
)
}
return tf.estimator.EstimatorSpec(
mode=mode,
loss=loss,
predictions={"predict": predictions},
train_op=train_op,
eval_metric_ops=eval_metric_ops)