EasyParallelLibrary icon indicating copy to clipboard operation
EasyParallelLibrary copied to clipboard

Gradient Checkpoint with auto type got a TypeError

Open RussellZZ opened this issue 3 years ago • 0 comments

After I added below codes to my worked functions, I got a TypeError

epl_config = epl.Config({
    "gradient_checkpoint.type": "auto",
    "zero.level": "v1",
    "amp.level": "O1", "amp.loss_scale": 128
})
epl.init(epl_config)
epl.set_default_strategy(epl.replicate(1))

error info:

"/venv/lib/python2.7/site-packages/tensorflow/contrib/graph_editor/util.py", line 214, in get_unique_graph
    t) for t in check_types]), type(op)))
TypeError: Expected a type in (<class 'tensorflow.python.framework.ops.Tensor'>), got: <class 'tensorflow.python.ops.resource_variable_ops.Resource

my worked functions: (modeling module from robert)

import epl
import tensorflow as tf
from tensorflow.contrib import layers, metrics

epl_config = epl.Config({
    "gradient_checkpoint.type": "auto",
    "zero.level": "v1",
    "amp.level": "O1", "amp.loss_scale": 128
})
epl.init(epl_config)
epl.set_default_strategy(epl.replicate(1))
bert_path = 'robert_checkpoint_path'


def model_fn(features, labels, mode, params):
    is_train_bool = mode == tf.estimator.ModeKeys.TRAIN

    # Building BERT model
    bert_config = modeling.BertConfig.from_dict({
        "attention_probs_dropout_prob": 0.1,
        "directionality": "bidi",
        "hidden_act": "gelu",
        "hidden_dropout_prob": 0.1,
        "hidden_size": 768,
        "initializer_range": 0.02,
        "intermediate_size": 3072,
        "max_position_embeddings": 512,
        "num_attention_heads": 12,
        "num_hidden_layers": 6,
        "pooler_fc_size": 768,
        "pooler_num_attention_heads": 12,
        "pooler_num_fc_layers": 3,
        "pooler_size_per_head": 128,
        "pooler_type": "first_token_transform",
        "type_vocab_size": 2,
        "vocab_size": 21128
    })
    bert = modeling.BertModel(
        config=bert_config,
        is_training=is_train_bool,
        input_ids=input_ids,
        input_mask=input_mask,
        token_type_ids=segment_ids,
        use_one_hot_embeddings=False,
        scope='bert'
    )
    # Getting BERT's outputs
    bert_output = bert.get_sequence_output()
    # Loading pre-trained BERT
    if is_train_bool:
        tvars = tf.trainable_variables()
        (
            assignment_map, initialized_names
        ) = modeling.get_assignment_map_from_checkpoint(
            tvars, bert_path
        )
        tf.train.init_from_checkpoint(bert_path, assignment_map)
        tf.logging.info("**** Trainable Variables ****")
        for var in tvars:
            tf.logging.info("  name = {}, shape = {}{}".format(
                var.name, var.shape,
                ", *INIT_FROM_CKPT*" if var.name in initialized_names
                else ''
            ))

    with tf.variable_scope("network"):
        # MLP
        first_hidden_layer = tf.layers.dense(
            tf.concat(bert_output, axis=1), 128, activation=tf.nn.relu)
        second_hidden_layer = tf.layers.dense(
            first_hidden_layer, 128, activation=tf.nn.relu)
        logits = tf.layers.dense(second_hidden_layer, 1)
        predictions = tf.sigmoid(logits)

    predictions = tf.identity(predictions, name="predict")

    if mode == tf.estimator.ModeKeys.PREDICT:
        return tf.estimator.EstimatorSpec(
            mode=mode, predictions={
                "predict": predictions,
                'label': features['label'],
            }
        )
    labels = tf.reshape(labels, [-1, 1])
    loss = tf.losses.sigmoid_cross_entropy(labels, logits)
    epl.add_to_collection(loss, epl.GraphKeys.GLOBAL_MEAN_OBJECTS)

    optimizer = tf.train.AdamOptimizer()
    train_op = optimizer.minimize(loss=loss,
                                  global_step=tf.train.get_global_step())
    predictions = tf.reshape(predictions, [-1, 1])
    eval_metric_ops = {
        "auc": tf.metrics.auc(labels, predictions),
        "f1": metrics.f1_score(labels, predictions),
        "precision": tf.metrics.precision_at_thresholds(
            labels, predictions, [0.5]
        ),
        "recall": tf.metrics.recall_at_thresholds(
            labels, predictions, [0.5]
        )
    }

    return tf.estimator.EstimatorSpec(
        mode=mode,
        loss=loss,
        predictions={"predict": predictions},
        train_op=train_op,
        eval_metric_ops=eval_metric_ops)

RussellZZ avatar Jul 24 '22 08:07 RussellZZ