lgstd

Results 1 comments of lgstd

import torch from DeBERTa.deberta.disentangled_attention import DisentangledSelfAttention from DeBERTa.deberta.config import ModelConfig config = ModelConfig() config.hidden_size = 128 config.num_attention_heads = 4 config.share_att_key = False config.pos_att_type = 'c2p|p2c|p2p' config.relative_attention = True config.position_buckets =...