操作步骤
步骤一:安装AI Gallery SDK
通过pip在本地或云上开发环境安装AI Gallery SDK(galleryformers)。
pip install galleryformers
建议在虚拟环境(Python 3.8+)中安装AI Gallery SDK,以便管理不同的项目,避免依赖项之间产生兼容性问题。
步骤二:构建自定义模型
本文使用NewBert模型介绍构建自定义模型的流程。
- 编写自定义配置类。
模型的configuration包含了构建模型所需的所有信息的对象,需要尽可能完整。
from galleryformers import PretrainedConfig from typing import List class NewBertConfig(PretrainedConfig): model_type = "bert" def __init__( self, vocab_size=30522, hidden_size=768, num_hidden_layers=12, num_attention_heads=12, intermediate_size=3072, hidden_act="gelu", hidden_dropout_prob=0.1, attention_probs_dropout_prob=0.1, max_position_embeddings=512, type_vocab_size=2, initializer_range=0.02, layer_norm_eps=1e-12, pad_token_id=0, position_embedding_type="absolute", use_cache=True, classifier_dropout=None, **kwargs, ): super().__init__(pad_token_id=pad_token_id, **kwargs) self.vocab_size = vocab_size self.hidden_size = hidden_size self.num_hidden_layers = num_hidden_layers self.num_attention_heads = num_attention_heads self.hidden_act = hidden_act self.intermediate_size = intermediate_size self.hidden_dropout_prob = hidden_dropout_prob self.attention_probs_dropout_prob = attention_probs_dropout_prob self.max_position_embeddings = max_position_embeddings self.type_vocab_size = type_vocab_size self.initializer_range = initializer_range self.layer_norm_eps = layer_norm_eps self.position_embedding_type = position_embedding_type self.use_cache = use_cache self.classifier_dropout = classifier_dropout
- 自定义配置类必须继承自“PretrainedConfig”。
- 自定义配置类的“__init__”必须接受任何“kwargs”,这些“kwargs”需要传递给“__init__”。
- 完成自定义配置类的编写后,可以使用该类创建配置实例。
newbert1_config = NewBertConfig(num_hidden_layers=6, num_attention_heads=10, use_cache=False) newbert1_config.save_pretrained("mynewbert")
这一步会在本地名为mynewbert的文件夹中保存一个名为config.json的文件。
该配置实例同样可以通过调用from_pretrained方法加载。
newbert1_config.from_pretrained("mynewbert")
- 编写完配置部分,开始编写自定义模型。
下面展示了3种模型基类的代码示例,为了确保示例不过于复杂,本文对部分代码片段进行了省略展示。
- 预训练模型基类NewBertPreTrainedModel
from galleryformers import PreTrainedModel from .configuration_newbert import NewBertConfig class NewBertPreTrainedModel(PreTrainedModel): config_class = NewBertConfig load_tf_weights = load_tf_weights_in_bert base_model_prefix = "bert" supports_gradient_checkpointing = True def _init_weights(self, module): """Initialize the weights""" if isinstance(module, nn.Linear): module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) if module.bias is not None: module.bias.data.zero_() elif isinstance(module, nn.Embedding): module.weight.data.normal_(mean=0.0, std=self.config.initializer_range) if module.padding_idx is not None: module.weight.data[module.padding_idx].zero_() elif isinstance(module, nn.LayerNorm): module.bias.data.zero_() module.weight.data.fill_(1.0)
- 基础模型类NewBertModel:该类继承自NewBertPreTrainedModel。
class NewBertModel(NewBertPreTrainedModel): def __init__(self, config, add_pooling_layer=True): super().__init__(config) self.config = config self.embeddings = BertEmbeddings(config) self.encoder = BertEncoder(config) self.pooler = BertPooler(config) if add_pooling_layer else None # Initialize weights and apply final processing self.post_init() def get_input_embeddings(self): return self.embeddings.word_embeddings def set_input_embeddings(self, value): self.embeddings.word_embeddings = value def _prune_heads(self, heads_to_prune): for layer, heads in heads_to_prune.items(): self.encoder.layer[layer].attention.prune_heads(heads) def forward( self, input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, token_type_ids: Optional[torch.Tensor] = None, position_ids: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None, encoder_hidden_states: Optional[torch.Tensor] = None, encoder_attention_mask: Optional[torch.Tensor] = None, past_key_values: Optional[List[torch.FloatTensor]] = None, use_cache: Optional[bool] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ...)
所有的模型都需要通过“forward”方法来实现自己的推理逻辑,这个方法会在执行“model(input_ids)”的时候进行调用
- 模型基类NewBertForXXX:该类承自NewBertPreTrainedModel。
该类可用于执行AI Gallery工具链服务,此处以文本问答(Question Answering)的任务类型为例:
class NewBertForQuestionAnswering(NewBertPreTrainedModel): def __init__(self, config): super().__init__(config) self.num_labels = config.num_labels self.bert = BertModel(config, add_pooling_layer=False) self.qa_outputs = nn.Linear(config.hidden_size, config.num_labels) # Initialize weights and apply final processing self.post_init() def forward( self, input_ids: Optional[torch.Tensor] = None, attention_mask: Optional[torch.Tensor] = None, token_type_ids: Optional[torch.Tensor] = None, position_ids: Optional[torch.Tensor] = None, head_mask: Optional[torch.Tensor] = None, inputs_embeds: Optional[torch.Tensor] = None, start_positions: Optional[torch.Tensor] = None, end_positions: Optional[torch.Tensor] = None, output_attentions: Optional[bool] = None, output_hidden_states: Optional[bool] = None, return_dict: Optional[bool] = None, ) return_dict = return_dict if return_dict is not None else self.config.use_return_dict outputs = self.bert( input_ids, attention_mask=attention_mask, token_type_ids=token_type_ids, position_ids=position_ids, head_mask=head_mask, inputs_embeds=inputs_embeds, output_attentions=output_attentions, output_hidden_states=output_hidden_states, return_dict=return_dict, ) sequence_output = outputs[0] logits = self.qa_outputs(sequence_output) start_logits, end_logits = logits.split(1, dim=-1) start_logits = start_logits.squeeze(-1).contiguous() end_logits = end_logits.squeeze(-1).contiguous() total_loss = None if start_positions is not None and end_positions is not None: # If we are on multi-GPU, split add a dimension if len(start_positions.size()) > 1: start_positions = start_positions.squeeze(-1) if len(end_positions.size()) > 1: end_positions = end_positions.squeeze(-1) # sometimes the start/end positions are outside our model inputs, we ignore these terms ignored_index = start_logits.size(1) start_positions = start_positions.clamp(0, ignored_index) end_positions = end_positions.clamp(0, ignored_index) loss_fct = CrossEntropyLoss(ignore_index=ignored_index) start_loss = loss_fct(start_logits, start_positions) end_loss = loss_fct(end_logits, end_positions) total_loss = (start_loss + end_loss) / 2 if not return_dict: output = (start_logits, end_logits) + outputs[2:] return ((total_loss,) + output) if total_loss is not None else output return QuestionAnsweringModelOutput( loss=total_loss, start_logits=start_logits, end_logits=end_logits, hidden_states=outputs.hidden_states, attentions=outputs.attentions, )
这个多头模型的“forward”函数会先调用“self.bert.forward()”,然后再调用“self.masked_lm_head.__call__()”方法来生成最终的结果。
- 预训练模型基类NewBertPreTrainedModel
- 完成了自定义模型类的编写后,可以使用该类创建一个模型实例:
newbert = NewBertForQuestionAnswering(newbert1_config)
模型权重可以通过调用“.from_pretrained()”加载:newbert.from_pretrained(pretrained_model_name_or_path="./您的权重文件本地存储路径/.")
后续操作
自定义模型文件构建完成后,可以参考托管模型资产将模型文件托管至AI Gallery。建议托管的模型文件列表参见表1。
文件名称 |
描述 |
---|---|
config.json |
模型配置文件。 |
model.safetensors或pytorch_model.bin |
预训练模型的权重文件。 |
tokenizer.json |
(可选)预处理器的词表文件,用于初始化Tokenizer。 |
tokenizer_config.json |
(可选)预处理器的配置文件。 |
modeling_xxx.py |
(可选)自定义模型的代码文件,继承自PretrainedModel,包含实现自定义推理逻辑的代码。 |
configuration_xxx.py |
(可选)自定义配置的代码文件,继承自PretrainedConfig,包含实现自定义配置的逻辑代码。 |