• 看源码逐行学习ChatGLM2-6B大模型,项目中的modeling_chatglm.py文件


    模型代码地址

    """ PyTorch ChatGLM model. 
    ChatGLMModel模型结构 (假设输入X大小为 3x5)  转载自:https://blog.csdn.net/hjyai94/article/details/132504200
    (embedding) Embedding (转置后 5x3x4096)
        word_embeddings: Embedding(65024, 4096)
    (rotary_pos_emb) RotaryEmbedding()
    (encoder) GLMTransformer
        (layers) ModuleList
        0-27: 28 x GLMBlock
            (input_layernorm) RMSNorm() (输入输出大小: 5x3x4096)
            (self_attention) SelfAttention
                (query_key_value) Linear(in_features=4096, out_features=4608, bias=True)
                (core_attention) CoreAttention(attention_dropout) Dropout(p=0.0, inplace=False))
                (dense) Linear(in_features=4096, out_features=4096, bias=False)
            (post_attention_layernorm) RMSNorm()
            (mlp) MLP
                (dense_h_to_4h) Linear(in_features=4096, out_features=27392, bias=False)
                (dense_4h_to_h) Linear(in_features=13696, out_features=4096, bias=False)
        (final_layernorm) RMSNorm()
    (output_layer) Linear(in_features=4096, out_features=65024, bias=False) (输出大小: 3x5x65024)
    
    """
    #导入基础库
    import math
    import copy
    import warnings
    import re
    import sys
    #导入pytorch相关库
    import torch
    import torch.utils.checkpoint
    import torch.nn.functional as F
    from torch import nn
    from torch.nn import CrossEntropyLoss, LayerNorm
    from torch.nn.utils import skip_init
    from typing import Optional, Tuple, Union, List, Callable, Dict, Any
    #导入transformer相关库
    from transformers.modeling_outputs import (
        BaseModelOutputWithPast,
        CausalLMOutputWithPast,
    )
    from transformers.modeling_utils import PreTrainedModel
    from transformers.utils import logging
    from transformers.generation.logits_process import LogitsProcessor
    from transformers.generation.utils import LogitsProcessorList, StoppingCriteriaList, GenerationConfig, ModelOutput
    #导入同一目录下的configuration_chatglm.py的ChatGLMConfig类,这个类里面就是定义了模型结构参数,例如网络层数num_layers,词表大小vocab_size等参数
    from .configuration_chatglm import ChatGLMConfig
    
    # flags required to enable jit fusion kernels
    #在非 macOS 系统上禁用性能分析模式和执行器,并允许在 CPU 和 GPU 上执行运算图融合。
    if sys.platform != 'darwin':
        torch._C._jit_set_profiling_mode(False)
        torch._C._jit_set_profiling_executor(False)
        torch._C._jit_override_can_fuse_on_cpu(True)
        torch._C._jit_override_can_fuse_on_gpu(True)
    
    #logging来自transformers.utils 模块
    #__name__: 是一个内置的 Python 变量,表示当前模块的名称。如果当前模块是主程序,则 __name__ 的值为 '__main__'。如果在其它地方被导入,则为该模块的名称。
    #logger 是一个用来记录(log)信息的对象。在配置了 logger 后,你可以通过它在代码的各个部分记录不同级别的消息(例如:debug, info, warning, error, critical)。
    #例如可以logger.info("This is an info message")   logger.warning("This is a warning message")
    logger = logging.get_logger(__name__)
    
    #模型的地址
    _CHECKPOINT_FOR_DOC = "THUDM/ChatGLM2-6B"
    
    #模型配置参数文件的地址
    _CONFIG_FOR_DOC = "ChatGLM6BConfig"
    #预训练模型文件的地址
    CHATGLM_6B_PRETRAINED_MODEL_ARCHIVE_LIST = [
        "THUDM/chatglm2-6b",
        # See all ChatGLM models at https://huggingface.co/models?filter=chatglm
    ]
    
    #类的初始化方法
    def default_init(cls, *args, **kwargs):
        return cls(*args, **kwargs)
    
    
    class InvalidScoreLogitsProcessor(LogitsProcessor):
        def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor) -> torch.FloatTensor:
            if torch.isnan(scores).any() or torch.isinf(scores).any():
                scores.zero_()
                scores[..., 5] = 5e4
            return scores
    
    #该前缀编码层类用于微调,会在原ChatGLM2的模型上,在模型输入时加入一个前缀编码层,微调时只会更新这部分参数的梯度
    class PrefixEncoder(torch.nn.Module):
        """
        The torch.nn model to encode the prefix
        Input shape: (batch-size, prefix-length)
        Output shape: (batch-size, prefix-length, 2*layers*hidden)
        """
    
        def __init__(self, config: ChatGLMConfig):
            super().__init__()
            #默认为false,是否自定义前缀编码器层,如果self.prefix_projection是True进入if
            self.prefix_projection = config.prefix_projection
            if self.prefix_projection:
                # Use a two-layer MLP to encode the prefix
                #默认num_layers=28,kv_channels=128,multi_query_group_num=1,这些可以从同目录下的configuration_chatglm.py的ChatGLMConfig类看到
                kv_size = config.num_layers * config.kv_channels * config.multi_query_group_num * 2
                #默认pre_seq_len=None,pre_seq_len表示每次前缀序列的预定义长度,作为Embedding的输入节点数,kv_size表示Embedding的输出节点数
                self.embedding = torch.nn.Embedding(config.pre_seq_len, kv_size)
                #定义一个trans层,数据流为embedding层-->trans层-->decoding层,用于帮助特征转化一下再进入decoding层
                self.trans = torch.nn.Sequential(
                    torch.nn.Linear(kv_size, config.hidden_size),
                    torch.nn.Tanh(),
                    torch.nn.Linear(config.hidden_size, kv_size)
                )
            else:
                #否则直接定义embedding
                self.embedding = torch.nn.Embedding(config.pre_seq_len,
                                                    config.num_layers * config.kv_channels * config.multi_query_group_num * 2)
    
        def forward(self, prefix: torch.Tensor):
            #如果使用自定义层则数据流为prompt-->Embedding层(后面定义的)-->embedding层(self.embedding)-->trans层-->decoding层
            if self.prefix_projection:
                prefix_tokens = self.embedding(prefix)
                past_key_values = self.trans(prefix_tokens)
            else:
                #如果不使用自定义层则数据流为prompt-->Embedding层(后面定义的)-->embedding层(self.embedding)-->decoding层
                past_key_values = self.embedding(prefix)
            return past_key_values
    
    #定义了一个方法来分tensor变量,方法为根据最好一层分,例如输入为[2,512,8]分解块数为4个则会生成4个[2,512,2]
    def split_tensor_along_last_dim(
            tensor: torch.Tensor,
            num_partitions: int,
            contiguous_split_chunks: bool = False,
    ) -> List[torch.Tensor]:
        """Split a tensor along its last dimension.
    
        Arguments:
            tensor: input tensor.
            num_partitions: number of partitions to split the tensor
            contiguous_split_chunks: If True, make each chunk contiguous
                                     in memory.
    
        Returns:
            A list of Tensors
        """
        # Get the size and dimension.
        #假设tensor为[2,512,8],tensor.dim()会返回3,因此last_dim=2
        last_dim = tensor.dim() - 1
        #tensor.size()会返回一个元组(2,512,8),因此tensor.size()[last_dim]=8
        #因此num_partitions为4的话,last_dim_size为2,注意//为向下取整
        last_dim_size = tensor.size()[last_dim] // num_partitions
        # Split.
        #根据最后一维度划分,得到4个[2,512,2],如果多的话最后一个可能为[2,512,1]
        tensor_list = torch.split(tensor, last_dim_size, dim=last_dim)
        # Note: torch.split does not create contiguous tensors by default.
        #如果需要得到内存连续的张量
        if contiguous_split_chunks:
            return tuple(chunk.contiguous() for chunk in tensor_list)
    
        return tensor_list
    
    #位置编码层,采用了RoPE位置编码方式,采用了PaLM的实现方式
    class RotaryEmbedding(nn.Module):
        def __init__(self, dim, original_impl=False, device=None, dtype=None):
            super().__init__()
            #先计算好θ
            inv_freq = 1.0 / (10000 ** (torch.arange(0, dim, 2, device=device).to(dtype=dtype) / dim))
            self.register_buffer("inv_freq", inv_freq)
            self.dim = dim
            self.original_impl = original_impl
    
        def forward_impl(
                self, seq_len: int, n_elem: int, dtype: torch.dtype, device: torch.device, base: int = 10000
        ):
            """Enhanced Transformer with Rotary Position Embedding.
    
            Derived from: https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/labml_nn/
            transformers/rope/__init__.py. MIT License:
            https://github.com/labmlai/annotated_deep_learning_paper_implementations/blob/master/license.
            """
            # $\Theta = {\theta_i = 10000^{\frac{2(i-1)}{d}}, i \in [1, 2, ..., \frac{d}{2}]}$
            theta = 1.0 / (base ** (torch.arange(0, n_elem, 2, dtype=dtype, device=device) / n_elem))
    
            # Create position indexes `[0, 1, ..., seq_len - 1]`
            seq_idx = torch.arange(seq_len, dtype=dtype, device=device)
    
            # Calculate the product of position index and $\theta_i$
            idx_theta = torch.outer(seq_idx, theta).float()
    
            cache = torch.stack([torch.cos(idx_theta), torch.sin(idx_theta)], dim=-1)
    
            # this is to mimic the behaviour of complex32, else we will get different results
            if dtype in (torch.float16, torch.bfloat16, torch.int8):
                cache = cache.bfloat16() if dtype == torch.bfloat16 else cache.half()
            return cache
    
        def forward(self, max_seq_len, offset=0):
            return self.forward_impl(
                max_seq_len, self.dim, dtype=self.inv_freq.dtype, device=self.inv_freq.device
            )
    
    #把下面的函数注释成了pytorch库函数,其中jit是代表"just-in-time"(即时)编译。jit模块是PyTorch的一个子模块,提供了用于将Python代码转换为高效、优化的机器码的工具。
    @torch.jit.script
    def apply_rotary_pos_emb(x: torch.Tensor, rope_cache: torch.Tensor) -> torch.Tensor:
        # x: [sq, b, np, hn]
        sq, b, np, hn = x.size(0), x.size(1), x.size(2), x.size(3)
        rot_dim = rope_cache.shape[-2] * 2
        x, x_pass = x[..., :rot_dim], x[..., rot_dim:]
        # truncate to support variable sizes
        rope_cache = rope_cache[:sq]
        xshaped = x.reshape(sq, -1, np, rot_dim // 2, 2)
        rope_cache = rope_cache.view(sq, -1, 1, xshaped.size(3), 2)
        x_out2 = torch.stack(
            [
                xshaped[..., 0] * rope_cache[..., 0] - xshaped[..., 1] * rope_cache[..., 1],
                xshaped[..., 1] * rope_cache[..., 0] + xshaped[..., 0] * rope_cache[..., 1],
            ],
            -1,
        )
        x_out2 = x_out2.flatten(3)
        return torch.cat((x_out2, x_pass), dim=-1)
    
    #RMSNorm 类继承自 torch.nn.Module,它是创建 PyTorch 模块的基类。
    #定义了一个自定义的 PyTorch 模块 RMSNorm,它通过计算均方根归一化输入张量并应用可训练的权重,来实现一种特定的归一化操作。
    class RMSNorm(torch.nn.Module):
        def __init__(self, normalized_shape, eps=1e-5, device=None, dtype=None, **kwargs):
            super().__init__()
            #通过 torch.nn.Parameter 创建了一个可训练的权重张量 self.weight,其形状由 normalized_shape 指定,并将其存储为模块的属性。
            self.weight = torch.nn.Parameter(torch.empty(normalized_shape, device=device, dtype=dtype))
            self.eps = eps#用于设置一个小的常数以防止除以零错误
    
        def forward(self, hidden_states: torch.Tensor):
            input_dtype = hidden_states.dtype#用于保存输入张量的数据类型,以便最后返回时将输出转换回相同的数据类型。
            variance = hidden_states.to(torch.float32).pow(2).mean(-1, keepdim=True)#首先,通过 hidden_states.to(torch.float32) 将输入张量转换为 torch.float32 数据类型,然后计算其平方(.pow(2)),再在最后一个维度上求平均值(.mean(-1, keepdim=True))。这样得到的 variance 张量表示输入张量在最后一个维度上的方差。
            hidden_states = hidden_states * torch.rsqrt(variance + self.eps)#接下来,将输入张量 hidden_states 乘以 torch.rsqrt(variance + self.eps),其中 torch.rsqrt 是计算倒数的平方根的函数。这样做是为了对输入张量进行归一化,使其具有单位标准差。
            #hidden_states 乘以权重张量 self.weight,并将结果转换回输入张量的数据类型 input_dtype。
            return (self.weight * hidden_states).to(input_dtype)
    
    #自注意力层核心
    class CoreAttention(torch.nn.Module):
        def __init__(self, config: ChatGLMConfig, layer_number):
            super(CoreAttention, self).__init__()
            #默认apply_query_key_layer_scaling为True,使用Q,K层的维度
            self.apply_query_key_layer_scaling = config.apply_query_key_layer_scaling
            #默认attention_softmax_in_fp32为True,softmax保留32位
            self.attention_softmax_in_fp32 = config.attention_softmax_in_fp32
            if self.apply_query_key_layer_scaling:
                self.attention_softmax_in_fp32 = True
            self.layer_number = max(1, layer_number)
            #默认kv的维度kv_channels=128,多头数量num_attention_heads=32
            projection_size = config.kv_channels * config.num_attention_heads
    
            # Per attention head and per partition values.
            #进入前维度
            self.hidden_size_per_partition = projection_size
            每个attention head的维度
            self.hidden_size_per_attention_head = projection_size // config.num_attention_heads
            #多头数量num_attention_heads=32
            self.num_attention_heads_per_partition = config.num_attention_heads
            #下面计算了注意力机制公式里K*V除以的根号dk
            coeff = None
            self.norm_factor = math.sqrt(self.hidden_size_per_attention_head)
            if self.apply_query_key_layer_scaling:
                coeff = self.layer_number
                self.norm_factor *= coeff
            self.coeff = coeff
            #默认attention_dropout=0
            self.attention_dropout = torch.nn.Dropout(config.attention_dropout)
    
        def forward(self, query_layer, key_layer, value_layer, attention_mask):
            pytorch_major_version = int(torch.__version__.split('.')[0])
            if pytorch_major_version >= 2:
                #for k in [query_layer, key_layer, value_layer]: 这个循环会遍历query_layer, key_layer, 和value_layer这三个张量。
                #k.permute(1, 2, 0, 3): permute是一个PyTorch的方法,用于改变张量的轴的顺序。假设原张量的维度顺序是(0, 1, 2, 3)(假设张量有四个维度),permute(1, 2, 0, 3)将会把这个顺序改变为(1, 2, 0, 3)。
                #具体来说,原来在位置0的维度(通常是batch_size)现在移动到了位置2,位置1和2的维度向前移动了一个位置,而位置3的维度保持不变。
                query_layer, key_layer, value_layer = [k.permute(1, 2, 0, 3) for k in [query_layer, key_layer, value_layer]]
                if attention_mask is None and query_layer.shape[2] == key_layer.shape[2]:
                    context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer,
                                                                                     is_causal=True)
                else:
                    if attention_mask is not None:
                        #将attention_mask中0,1呼唤
                        attention_mask = ~attention_mask
                    #实现softmax(QK^T/sqrt(dk))*V
                    context_layer = torch.nn.functional.scaled_dot_product_attention(query_layer, key_layer, value_layer,
                                                                                     attention_mask)
                #换维度
                context_layer = context_layer.permute(2, 0, 1, 3)
    
                new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,)
                #重新转化维度
                context_layer = context_layer.reshape(*new_context_layer_shape)
            else:
                # Raw attention scores
    
                # [b, np, sq, sk]
                output_size = (query_layer.size(1), query_layer.size(2), query_layer.size(0), key_layer.size(0))
    
                # [sq, b, np, hn] -> [sq, b * np, hn]
                query_layer = query_layer.view(output_size[2], output_size[0] * output_size[1], -1)
                # [sk, b, np, hn] -> [sk, b * np, hn]
                key_layer = key_layer.view(output_size[3], output_size[0] * output_size[1], -1)
    
                # preallocting input tensor: [b * np, sq, sk]
                matmul_input_buffer = torch.empty(
                    output_size[0] * output_size[1], output_size[2], output_size[3], dtype=query_layer.dtype,
                    device=query_layer.device
                )
    
                # Raw attention scores. [b * np, sq, sk]
                matmul_result = torch.baddbmm(
                    matmul_input_buffer,
                    query_layer.transpose(0, 1),  # [b * np, sq, hn]
                    key_layer.transpose(0, 1).transpose(1, 2),  # [b * np, hn, sk]
                    beta=0.0,
                    alpha=(1.0 / self.norm_factor),
                )
    
                # change view to [b, np, sq, sk]
                attention_scores = matmul_result.view(*output_size)
    
                # ===========================
                # Attention probs and dropout
                # ===========================
    
                # attention scores and attention mask [b, np, sq, sk]
                if self.attention_softmax_in_fp32:
                    attention_scores = attention_scores.float()
                if self.coeff is not None:
                    attention_scores = attention_scores * self.coeff
                if attention_mask is None and attention_scores.shape[2] == attention_scores.shape[3]:
                    attention_mask = torch.ones(output_size[0], 1, output_size[2], output_size[3],
                                                device=attention_scores.device, dtype=torch.bool)
                    attention_mask.tril_()
                    attention_mask = ~attention_mask
                if attention_mask is not None:
                    attention_scores = attention_scores.masked_fill(attention_mask, float("-inf"))
                attention_probs = F.softmax(attention_scores, dim=-1)
                attention_probs = attention_probs.type_as(value_layer)
    
                # This is actually dropping out entire tokens to attend to, which might
                # seem a bit unusual, but is taken from the original Transformer paper.
                attention_probs = self.attention_dropout(attention_probs)
                # =========================
                # Context layer. [sq, b, hp]
                # =========================
    
                # value_layer -> context layer.
                # [sk, b, np, hn] --> [b, np, sq, hn]
    
                # context layer shape: [b, np, sq, hn]
                output_size = (value_layer.size(1), value_layer.size(2), query_layer.size(0), value_layer.size(3))
                # change view [sk, b * np, hn]
                value_layer = value_layer.view(value_layer.size(0), output_size[0] * output_size[1], -1)
                # change view [b * np, sq, sk]
                attention_probs = attention_probs.view(output_size[0] * output_size[1], output_size[2], -1)
                # matmul: [b * np, sq, hn]
                context_layer = torch.bmm(attention_probs, value_layer.transpose(0, 1))
                # change view [b, np, sq, hn]
                context_layer = context_layer.view(*output_size)
                # [b, np, sq, hn] --> [sq, b, np, hn]
                context_layer = context_layer.permute(2, 0, 1, 3).contiguous()
                # [sq, b, np, hn] --> [sq, b, hp]
                new_context_layer_shape = context_layer.size()[:-2] + (self.hidden_size_per_partition,)
                context_layer = context_layer.view(*new_context_layer_shape)
    
            return context_layer
    
    
    class SelfAttention(torch.nn.Module):
        """Parallel self-attention layer abstract class.
    
        Self-attention layer takes input with size [s, b, h]
        and returns output of the same size.
        """
    
        def __init__(self, config: ChatGLMConfig, layer_number, device=None):
            super(SelfAttention, self).__init__()
            self.layer_number = max(1, layer_number)
            
            self.projection_size = config.kv_channels * config.num_attention_heads
    
            # Per attention head and per partition values.
            self.hidden_size_per_attention_head = self.projection_size // config.num_attention_heads
            self.num_attention_heads_per_partition = config.num_attention_heads
    
            self.multi_query_attention = config.multi_query_attention
            self.qkv_hidden_size = 3 * self.projection_size
            if self.multi_query_attention:
                #默认multi_query_group_num=1
                self.num_multi_query_groups_per_partition = config.multi_query_group_num
                self.qkv_hidden_size = (
                        self.projection_size + 2 * self.hidden_size_per_attention_head * config.multi_query_group_num
                )
            self.query_key_value = nn.Linear(config.hidden_size, self.qkv_hidden_size,
                                             bias=config.add_bias_linear or config.add_qkv_bias,
                                             device=device, **_config_to_kwargs(config)
                                             )
    
            self.core_attention = CoreAttention(config, self.layer_number)
    
            # Output.
            self.dense = nn.Linear(self.projection_size, config.hidden_size, bias=config.add_bias_linear,
                                   device=device, **_config_to_kwargs(config)
                                   )
    
        def _allocate_memory(self, inference_max_sequence_len, batch_size, device=None, dtype=None):
            if self.multi_query_attention:
                num_attention_heads = self.num_multi_query_groups_per_partition
            else:
                num_attention_heads = self.num_attention_heads_per_partition
            return torch.empty(
                inference_max_sequence_len,
                batch_size,
                num_attention_heads,
                self.hidden_size_per_attention_head,
                dtype=dtype,
                device=device,
            )
    
        def forward(
                self, hidden_states, attention_mask, rotary_pos_emb, kv_cache=None, use_cache=True
        ):
            # hidden_states: [sq, b, h]
    
            # =================================================
            # Pre-allocate memory for key-values for inference.
            # =================================================
            # =====================
            # Query, Key, and Value
            # =====================
    
            # Attention heads [sq, b, h] --> [sq, b, (np * 3 * hn)]
            mixed_x_layer = self.query_key_value(hidden_states)
    
            if self.multi_query_attention:
                (query_layer, key_layer, value_layer) = mixed_x_layer.split(
                    [
                        self.num_attention_heads_per_partition * self.hidden_size_per_attention_head,
                        self.num_multi_query_groups_per_partition * self.hidden_size_per_attention_head,
                        self.num_multi_query_groups_per_partition * self.hidden_size_per_attention_head,
                    ],
                    dim=-1,
                )
                query_layer = query_layer.view(
                    query_layer.size()[:-1] + (self.num_attention_heads_per_partition, self.hidden_size_per_attention_head)
                )
                key_layer = key_layer.view(
                    key_layer.size()[:-1] + (self.num_multi_query_groups_per_partition, self.hidden_size_per_attention_head)
                )
                value_layer = value_layer.view(
                    value_layer.size()[:-1]
                    + (self.num_multi_query_groups_per_partition, self.hidden_size_per_attention_head)
                )
            else:
                new_tensor_shape = mixed_x_layer.size()[:-1] + \
                                   (self.num_attention_heads_per_partition,
                                    3 * self.hidden_size_per_attention_head)
                mixed_x_layer = mixed_x_layer.view(*new_tensor_shape)
    
                # [sq, b, np, 3 * hn] --> 3 [sq, b, np, hn]
                (query_layer, key_layer, value_layer) = split_tensor_along_last_dim(mixed_x_layer, 3)
    
            # apply relative positional encoding (rotary embedding)
            if rotary_pos_emb is not None:
                query_layer = apply_rotary_pos_emb(query_layer, rotary_pos_emb)
                key_layer = apply_rotary_pos_emb(key_layer, rotary_pos_emb)
    
            # adjust key and value for inference
            if kv_cache is not None:
                cache_k, cache_v = kv_cache
                key_layer = torch.cat((cache_k, key_layer), dim=0)
                value_layer = torch.cat((cache_v, value_layer), dim=0)
            if use_cache:
                kv_cache = (key_layer, value_layer)
            else:
                kv_cache = None
    
            if self.multi_query_attention:
                key_layer = key_layer.unsqueeze(-2)
                key_layer = key_layer.expand(
                    -1, -1, -1, self.num_attention_heads_per_partition // self.num_multi_query_groups_per_partition, -1
                )
                key_layer = key_layer.contiguous().view(
                    key_layer.size()[:2] + (self.num_attention_heads_per_partition, self.hidden_size_per_attention_head)
                )
                value_layer = value_layer.unsqueeze(-2)
                value_layer = value_layer.expand(
                    -1, -1, -1, self.num_attention_heads_per_partition // self.num_multi_query_groups_per_partition, -1
                )
                value_layer = value_layer.contiguous().view(
                    value_layer.size()[:2] + (self.num_attention_heads_per_partition, self.hidden_size_per_attention_head)
                )
    
            # ==================================
            # core attention computation
            # ==================================
    
            context_layer = self.core_attention(query_layer, key_layer, value_layer, attention_mask)
    
            # =================
            # Output. [sq, b, h]
            # =================
    
            output = self.dense(context_layer)
    
            return output, kv_cache
    
    
    def _config_to_kwargs(args):
        common_kwargs = {
            "dtype": args.torch_dtype,
        }
        return common_kwargs
    
    #atttention后的MLP层
    class MLP(torch.nn.Module):
        """MLP.
    
        MLP will take the input with h hidden state, project it to 4*h
        hidden dimension, perform nonlinear transformation, and project the
        state back into h hidden dimension.
        """
    
        def __init__(self, config: ChatGLMConfig, device=None):
            super(MLP, self).__init__()
    
            self.add_bias = config.add_bias_linear
    
            # Project to 4h. If using swiglu double the output width, see https://arxiv.org/pdf/2002.05202.pdf
            self.dense_h_to_4h = nn.Linear(
                config.hidden_size,
                config.ffn_hidden_size * 2,
                bias=self.add_bias,
                device=device,
                **_config_to_kwargs(config)
            )
    
            def swiglu(x):
                x = torch.chunk(x, 2, dim=-1)
                return F.silu(x[0]) * x[1]
    
            self.activation_func = swiglu
    
            # Project back to h.
            self.dense_4h_to_h = nn.Linear(
                config.ffn_hidden_size,
                config.hidden_size,
                bias=self.add_bias,
                device=device,
                **_config_to_kwargs(config)
            )
    
        def forward(self, hidden_states):
            # [s, b, 4hp]
            intermediate_parallel = self.dense_h_to_4h(hidden_states)
            intermediate_parallel = self.activation_func(intermediate_parallel)
            # [s, b, h]
            output = self.dense_4h_to_h(intermediate_parallel)
            return output
    
    #28层指的就是28个GLMBlock,每个里面包含(RMSNorm,SelfAttention(Linear,CoreAttention,Linear),RMSNorm,MLP),最后一层的最后再加个RMSNorm
    class GLMBlock(torch.nn.Module):
        """A single transformer layer.
    
        Transformer layer takes input with size [s, b, h] and returns an
        output of the same size.
        """
    
        def __init__(self, config: ChatGLMConfig, layer_number, device=None):
            super(GLMBlock, self).__init__()
            self.layer_number = layer_number
    
            self.apply_residual_connection_post_layernorm = config.apply_residual_connection_post_layernorm
    
            self.fp32_residual_connection = config.fp32_residual_connection
    
            LayerNormFunc = RMSNorm if config.rmsnorm else LayerNorm
            # Layernorm on the input data.
            self.input_layernorm = LayerNormFunc(config.hidden_size, eps=config.layernorm_epsilon, device=device,
                                                 dtype=config.torch_dtype)
    
            # Self attention.
            self.self_attention = SelfAttention(config, layer_number, device=device)
            self.hidden_dropout = config.hidden_dropout
    
            # Layernorm on the attention output
            self.post_attention_layernorm = LayerNormFunc(config.hidden_size, eps=config.layernorm_epsilon, device=device,
                                                          dtype=config.torch_dtype)
    
            # MLP
            self.mlp = MLP(config, device=device)
    
        def forward(
                self, hidden_states, attention_mask, rotary_pos_emb, kv_cache=None, use_cache=True,
        ):
            # hidden_states: [s, b, h]
    
            # Layer norm at the beginning of the transformer layer.
            layernorm_output = self.input_layernorm(hidden_states)
            # Self attention.
            attention_output, kv_cache = self.self_attention(
                layernorm_output,
                attention_mask,
                rotary_pos_emb,
                kv_cache=kv_cache,
                use_cache=use_cache
            )
    
            # Residual connection.
            if self.apply_residual_connection_post_layernorm:
                residual = layernorm_output
            else:
                residual = hidden_states
    
            layernorm_input = torch.nn.functional.dropout(attention_output, p=self.hidden_dropout, training=self.training)
            layernorm_input = residual + layernorm_input
    
            # Layer norm post the self attention.
            layernorm_output = self.post_attention_layernorm(layernorm_input)
    
            # MLP.
            mlp_output = self.mlp(layernorm_output)
    
            # Second residual connection.
            if self.apply_residual_connection_post_layernorm:
                residual = layernorm_output
            else:
                residual = layernorm_input
    
            output = torch.nn.functional.dropout(mlp_output, p=self.hidden_dropout, training=self.training)
            output = residual + output
    
            return output, kv_cache
    
    #28个GLMBlock块组成的Transformer网络
    class GLMTransformer(torch.nn.Module):
        """Transformer class."""
    
        def __init__(self, config: ChatGLMConfig, device=None):
            super(GLMTransformer, self).__init__()
            #默认false
            self.fp32_residual_connection = config.fp32_residual_connection
            #默认True
            self.post_layer_norm = config.post_layer_norm
    
            # Number of layers.默认28
            self.num_layers = config.num_layers
    
            # Transformer layers.
            def build_layer(layer_number):
                return GLMBlock(config, layer_number, device=device)
    
            self.layers = torch.nn.ModuleList([build_layer(i + 1) for i in range(self.num_layers)])
    
            if self.post_layer_norm:
                #默认rmsnorm为True
                LayerNormFunc = RMSNorm if config.rmsnorm else LayerNorm
                # Final layer norm before output.
                self.final_layernorm = LayerNormFunc(config.hidden_size, eps=config.layernorm_epsilon, device=device,
                                                     dtype=config.torch_dtype)
            #减少GPU内存消耗
            self.gradient_checkpointing = False
    
        def _get_layer(self, layer_number):
            return self.layers[layer_number]
    
        #Optional 是Python的typing模块提供的一个类型提示,用于表示某个参数可以是特定的类型或None。
        #例如,Optional[int] 表示该值可以是int类型或None
        def forward(
                self, hidden_states, attention_mask, rotary_pos_emb, kv_caches=None,
                use_cache: Optional[bool] = True,
                output_hidden_states: Optional[bool] = False,
        ):
            if not kv_caches:
                kv_caches = [None for _ in range(self.num_layers)]
            presents = () if use_cache else None
            if self.gradient_checkpointing and self.training:
                if use_cache:
                    logger.warning_once(
                        "`use_cache=True` is incompatible with gradient checkpointing. Setting `use_cache=False`..."
                    )
                    use_cache = False
    
            all_self_attentions = None
            all_hidden_states = () if output_hidden_states else None
            for index in range(self.num_layers):
                #默认false
                if output_hidden_states:
                    all_hidden_states = all_hidden_states + (hidden_states,)
    
                layer = self._get_layer(index)
                if self.gradient_checkpointing and self.training:
                    layer_ret = torch.utils.checkpoint.checkpoint(
                        layer,
                        hidden_states,
                        attention_mask,
                        rotary_pos_emb,
                        kv_caches[index],
                        use_cache
                    )
                else:
                    layer_ret = layer(
                        hidden_states,
                        attention_mask,
                        rotary_pos_emb,
                        kv_cache=kv_caches[index],
                        use_cache=use_cache
                    )
                hidden_states, kv_cache = layer_ret
                if use_cache:
                    presents = presents + (kv_cache,)
    
            if output_hidden_states:
                all_hidden_states = all_hidden_states + (hidden_states,)
    
            # Final layer norm. 默认True
            if self.post_layer_norm:
                hidden_states = self.final_layernorm(hidden_states)
    
            return hidden_states, presents, all_hidden_states, all_self_attentions
    
    #用于预训练的模型类
    class ChatGLMPreTrainedModel(PreTrainedModel):
        """
        An abstract class to handle weights initialization and
        a simple interface for downloading and loading pretrained models.
        """
    
        is_parallelizable = False
        supports_gradient_checkpointing = True
        config_class = ChatGLMConfig
        base_model_prefix = "transformer"
        _no_split_modules = ["GLMBlock"]
    
        def _init_weights(self, module: nn.Module):
            """Initialize the weights."""
            return
    
        def get_masks(self, input_ids, past_key_values, padding_mask=None):
            #默认seq_length=2048
            batch_size, seq_length = input_ids.shape
            full_attention_mask = torch.ones(batch_size, seq_length, seq_length, device=input_ids.device)
            full_attention_mask.tril_()
            past_length = 0
            if past_key_values:
                past_length = past_key_values[0][0].shape[0]
            if past_length:
                full_attention_mask = torch.cat((torch.ones(batch_size, seq_length, past_length,
                                                            device=input_ids.device), full_attention_mask), dim=-1)
            if padding_mask is not None:
                full_attention_mask = full_attention_mask * padding_mask.unsqueeze(1)
            if not past_length and padding_mask is not None:
                full_attention_mask -= padding_mask.unsqueeze(-1) - 1
            full_attention_mask = (full_attention_mask < 0.5).bool()
            full_attention_mask.unsqueeze_(1)
            return full_attention_mask
    
        def get_position_ids(self, input_ids, device):
            batch_size, seq_length = input_ids.shape
            position_ids = torch.arange(seq_length, dtype=torch.long, device=device).unsqueeze(0).repeat(batch_size, 1)
            return position_ids
    
        def _set_gradient_checkpointing(self, module, value=False):
            if isinstance(module, GLMTransformer):
                module.gradient_checkpointing = value
    
    #Embedding层
    class Embedding(torch.nn.Module):
        """Language model embeddings."""
    
        def __init__(self, config: ChatGLMConfig, device=None):
            super(Embedding, self).__init__()
    
            self.hidden_size = config.hidden_size
            # Word embeddings (parallel).
            self.word_embeddings = nn.Embedding(
                #padded_vocab_size默认65024
                config.padded_vocab_size,
                self.hidden_size,
                dtype=config.torch_dtype,
                device=device
            )
            self.fp32_residual_connection = config.fp32_residual_connection
    
        def forward(self, input_ids):
            # Embeddings.
            words_embeddings = self.word_embeddings(input_ids)
            embeddings = words_embeddings
            # Data format change to avoid explicit tranposes : [b s h] --> [s b h].
            embeddings = embeddings.transpose(0, 1).contiguous()
            # If the input flag for fp32 residual connection is set, convert for float.
            if self.fp32_residual_connection:
                embeddings = embeddings.float()
            return embeddings
    
    #继承预训练模型类
    class ChatGLMModel(ChatGLMPreTrainedModel):
        def __init__(self, config: ChatGLMConfig, device=None, empty_init=True):
            super().__init__(config)
            #是否先不初始化参数empty_init
            if empty_init:
                #skip_init是2.0版本以上的torch.nn.utils下一个库,在不初始化参数 / 缓冲区的情况下实例化模块
                init_method = skip_init
            else:
                init_method = default_init
            init_kwargs = {}
            if device is not None:
                init_kwargs["device"] = device
            #定义输入的embedding层
            self.embedding = init_method(Embedding, config, **init_kwargs)
            self.num_layers = config.num_layers
            self.multi_query_group_num = config.multi_query_group_num
            self.kv_channels = config.kv_channels
    
            #定义输入的Rotary位置编码embedding层
            # Rotary positional embeddings
            self.seq_length = config.seq_length
            rotary_dim = (
                config.hidden_size // config.num_attention_heads if config.kv_channels is None else config.kv_channels
            )
    
            self.rotary_pos_emb = RotaryEmbedding(rotary_dim // 2, original_impl=config.original_rope, device=device,
                                                  dtype=config.torch_dtype)
            #定义主体模块GLMTransformer层
            self.encoder = init_method(GLMTransformer, config, **init_kwargs)
            #定义输出层output_layer
            self.output_layer = init_method(nn.Linear, config.hidden_size, config.padded_vocab_size, bias=False,
                                            dtype=config.torch_dtype, **init_kwargs)
            self.pre_seq_len = config.pre_seq_len
            #默认false
            self.prefix_projection = config.prefix_projection
            #如果前缀长度不为空,说明需要经过prefix_encoder层,则定义prefix_encoder
            if self.pre_seq_len is not None:
                #微调prefix_encoder不更新主体网络参数
                for param in self.parameters():
                    param.requires_grad = False
                #使用torch.arange函数生成一个从0开始,到self.pre_seq_len - 1结束的整数序列的tensor
                #.long(): 这个方法用于将上述生成的tensor转换为长整型(int64)
                #即生成一个从0、1、2到self.pre_seq_len - 1的序列的long类型tensor
                self.prefix_tokens = torch.arange(self.pre_seq_len).long()
                self.prefix_encoder = PrefixEncoder(config)
                self.dropout = torch.nn.Dropout(0.1)
    
        def get_input_embeddings(self):
            #self.embedding.word_embeddings是一个定义好的nn.Embedding()层
            return self.embedding.word_embeddings
    
        def get_prompt(self, batch_size, device, dtype=torch.half):
    
            prefix_tokens = self.prefix_tokens.unsqueeze(0).expand(batch_size, -1).to(device)
    
            past_key_values = self.prefix_encoder(prefix_tokens).type(dtype)
    
            past_key_values = past_key_values.view(
                batch_size,
                self.pre_seq_len,
                self.num_layers * 2,
                self.multi_query_group_num,
                self.kv_channels
            )
            # seq_len, b, nh, hidden_size
            past_key_values = self.dropout(past_key_values)
            past_key_values = past_key_values.permute([2, 1, 0, 3, 4]).split(2)
            return past_key_values
    
        def forward(
                self,
                input_ids,
                position_ids: Optional[torch.Tensor] = None,
                attention_mask: Optional[torch.BoolTensor] = None,
                full_attention_mask: Optional[torch.BoolTensor] = None,
                past_key_values: Optional[Tuple[Tuple[torch.Tensor, torch.Tensor], ...]] = None,
                inputs_embeds: Optional[torch.Tensor] = None,
                use_cache: Optional[bool] = None,
                output_hidden_states: Optional[bool] = None,
                return_dict: Optional[bool] = None,
        ):
            output_hidden_states = (
                output_hidden_states if output_hidden_states is not None else self.config.output_hidden_states
            )
            use_cache = use_cache if use_cache is not None else self.config.use_cache
            return_dict = return_dict if return_dict is not None else self.config.use_return_dict
            #获取批次大小batch_size和输入长度seq_length
            batch_size, seq_length = input_ids.shape
    
            #inputs_embeds为self.embedding层
            if inputs_embeds is None:
                inputs_embeds = self.embedding(input_ids)
            #有前缀长度,则使用经过处理后的past_key_values
            if self.pre_seq_len is not None:
                if past_key_values is None:
                    #获得past_key_values
                    past_key_values = self.get_prompt(batch_size=batch_size, device=input_ids.device,
                                                      dtype=inputs_embeds.dtype)
                if attention_mask is not None:
                    attention_mask = torch.cat([attention_mask.new_ones((batch_size, self.pre_seq_len)),
                                                attention_mask], dim=-1)
    
            if full_attention_mask is None:
                if (attention_mask is not None and not attention_mask.all()) or (past_key_values and seq_length != 1):
                    full_attention_mask = self.get_masks(input_ids, past_key_values, padding_mask=attention_mask)
    
            # Rotary positional embeddings
            rotary_pos_emb = self.rotary_pos_emb(self.seq_length)
            if position_ids is not None:
                rotary_pos_emb = rotary_pos_emb[position_ids]
            else:
                rotary_pos_emb = rotary_pos_emb[None, :seq_length]
            rotary_pos_emb = rotary_pos_emb.transpose(0, 1).contiguous()
    
            # Run encoder.
            #主体模块GLMTransformer层
            hidden_states, presents, all_hidden_states, all_self_attentions = self.encoder(
                inputs_embeds, full_attention_mask, rotary_pos_emb=rotary_pos_emb,
                kv_caches=past_key_values, use_cache=use_cache, output_hidden_states=output_hidden_states
            )
            
            #
            if not return_dict:
                return tuple(v for v in [hidden_states, presents, all_hidden_states, all_self_attentions] if v is not None)
    
            return BaseModelOutputWithPast(
                last_hidden_state=hidden_states,
                past_key_values=presents,
                hidden_states=all_hidden_states,
                attentions=all_self_attentions,
            )
        #用于量化方法
        def quantize(self, weight_bit_width: int):
            from .quantization import quantize
            quantize(self.encoder, weight_bit_width)
            return self
    
    #用于条件生成chatglm2的类
    class ChatGLMForConditionalGeneration(ChatGLMPreTrainedModel):
        def __init__(self, config: ChatGLMConfig, empty_init=True, device=None):
            super().__init__(config)
    
            self.max_sequence_length = config.max_length
            self.transformer = ChatGLMModel(config, empty_init=empty_init, device=device)
            self.config = config
            self.quantized = False
    
            if self.config.quantization_bit:
                self.quantize(self.config.quantization_bit, empty_init=True)
    
        def _update_model_kwargs_for_generation(
                self,
                outputs: ModelOutput,
                model_kwargs: Dict[str, Any],
                is_encoder_decoder: bool = False,
                standardize_cache_format: bool = False,
        ) -> Dict[str, Any]:
            # update past_key_values
            model_kwargs["past_key_values"] = self._extract_past_from_model_output(
                outputs, standardize_cache_format=standardize_cache_format
            )
    
            # update attention mask
            if "attention_mask" in model_kwargs:
                attention_mask = model_kwargs["attention_mask"]
                model_kwargs["attention_mask"] = torch.cat(
                    [attention_mask, attention_mask.new_ones((attention_mask.shape[0], 1))], dim=-1
                )
    
            # update position ids
            if "position_ids" in model_kwargs:
                position_ids = model_kwargs["position_ids"]
                new_position_id = position_ids[..., -1:].clone()
                new_position_id += 1
                model_kwargs["position_ids"] = torch.cat(
                    [position_ids, new_position_id], dim=-1
                )
    
            model_kwargs["is_first_forward"] = False
            return model_kwargs
    
        def prepare_inputs_for_generation(
                self,
                input_ids: torch.LongTensor,
                past_key_values: Optional[torch.Tensor] = None,
                attention_mask: Optional[torch.Tensor] = None,
                position_ids: Optional[torch.Tensor] = None,
                is_first_forward: bool = True,
                **kwargs
        ) -> dict:
            # only last token for input_ids if past is not None
            if position_ids is None:
                position_ids = self.get_position_ids(input_ids, device=input_ids.device)
            if not is_first_forward:
                position_ids = position_ids[..., -1:]
                input_ids = input_ids[:, -1:]
            return {
                "input_ids": input_ids,
                "past_key_values": past_key_values,
                "position_ids": position_ids,
                "attention_mask": attention_mask,
                "return_last_logit": True
            }
    
        def forward(
                self,
                input_ids: Optional[torch.Tensor] = None,
                position_ids: Optional[torch.Tensor] = None,
                attention_mask: Optional[torch.Tensor] = None,
                past_key_values: Optional[Tuple[torch.FloatTensor]] = None,
                inputs_embeds: Optional[torch.Tensor] = None,
                labels: Optional[torch.Tensor] = None,
                use_cache: Optional[bool] = None,
                output_attentions: Optional[bool] = None,
                output_hidden_states: Optional[bool] = None,
                return_dict: Optional[bool] = None,
                return_last_logit: Optional[bool] = False,
        ):
            use_cache = use_cache if use_cache is not None else self.config.use_cache
            return_dict = return_dict if return_dict is not None else self.config.use_return_dict
            
            '''
                self.transformer(...)获得chatGLMModel模型forward输出BaseModelOutputWithPast,通常包含:
                last_hidden_state: 最后一层的隐藏状态。
                past_key_values (或者叫 past): 用于注意力机制的key和value对。
                "Past"在这里指的是在Transformer模型中用于注意力机制的key和value对。
                hidden_states 是模型所有层的隐藏状态输出的列表
                attentions 是模型所有层的注意力权重的列表。这些权重显示了每个输入token对其他tokens的注意力分布
                在一些应用中,例如文本生成,保存这些“过去”的值是很有用的,因为这样可以避免重新计算整个输入序列,从而实现效率更高的逐个词的解码。
                BaseModelOutputWithPast(
                    last_hidden_state=hidden_states,
                    past_key_values=presents,
                    hidden_states=all_hidden_states,
                    attentions=all_self_attentions,
                )
            '''
            transformer_outputs = self.transformer(
                input_ids=input_ids,
                position_ids=position_ids,
                attention_mask=attention_mask,
                past_key_values=past_key_values,
                inputs_embeds=inputs_embeds,
                use_cache=use_cache,
                output_hidden_states=output_hidden_states,
                return_dict=return_dict,
            )
            #提出输出的隐藏层状态
            hidden_states = transformer_outputs[0]
            if return_last_logit:
                hidden_states = hidden_states[-1:]
            #获得每个词的概率
            lm_logits = self.transformer.output_layer(hidden_states)
            #transpose(0, 1)交换维度0和维度1
            lm_logits = lm_logits.transpose(0, 1).contiguous()
    
    
            loss = None
            #训练时有label,下面会计算loss值
            if labels is not None:
                lm_logits = lm_logits.to(torch.float32)
    
                # Shift so that tokens < n predict n
                shift_logits = lm_logits[..., :-1, :].contiguous()
                shift_labels = labels[..., 1:].contiguous()
                # Flatten the tokens
                loss_fct = CrossEntropyLoss(ignore_index=-100)
                #view(-1, shift_logits.size(-1))会自动计算维度,假设shift_logits为(10, 20, 50),那么shift_logits的总元素数量是 10 * 20 * 50 = 10000
                #则shift_logits.view(-1, shift_logits.size(-1))会得到(10,1000)的一个tensor
                loss = loss_fct(shift_logits.view(-1, shift_logits.size(-1)), shift_labels.view(-1))
    
                lm_logits = lm_logits.to(hidden_states.dtype)
                loss = loss.to(hidden_states.dtype)
    
            if not return_dict:
                output = (lm_logits,) + transformer_outputs[1:]
                return ((loss,) + output) if loss is not None else output
            '''
                CausalLMOutputWithPast输出包含的内容通常是:
                loss: 如果提供了标签,则计算并返回损失值。
                logits: 对应每个token的预测分数。
                past_key_values: 与上面描述的类似,这是用于注意力机制的key和value对。
                hidden_states: 可选的,模型的所有隐藏层的输出。
                attentions: 可选的,注意力权重。
            '''
            return CausalLMOutputWithPast(
                loss=loss,
                logits=lm_logits,
                past_key_values=transformer_outputs.past_key_values,
                hidden_states=transformer_outputs.hidden_states,
                attentions=transformer_outputs.attentions,
            )
        #staticmethod用于修饰类中的方法,使其可以在不创建类实例的情况下调用方法
        @staticmethod
        def _reorder_cache(
                past: Tuple[Tuple[torch.Tensor, torch.Tensor], ...], beam_idx: torch.LongTensor
        ) -> Tuple[Tuple[torch.Tensor, torch.Tensor], ...]:
            """
            This function is used to re-order the `past_key_values` cache if [`~PreTrainedModel.beam_search`] or
            [`~PreTrainedModel.beam_sample`] is called. This is required to match `past_key_values` with the correct
            beam_idx at every generation step.
    
            Output shares the same memory storage as `past`.
            """
            return tuple(
                (
                    layer_past[0].index_select(1, beam_idx.to(layer_past[0].device)),
                    layer_past[1].index_select(1, beam_idx.to(layer_past[1].device)),
                )
                for layer_past in past
            )
    
        #处理response字符串
        def process_response(self, response):
            response = response.strip()
            response = response.replace("[[训练时间]]", "2023年")
            return response
    
    
        def build_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None):
            #将历史和当前的对话内容转化成prompt
            prompt = tokenizer.build_prompt(query, history=history)
            #返回PyTorch tensor
            inputs = tokenizer([prompt], return_tensors="pt")
            inputs = inputs.to(self.device)
            return inputs
    
        def build_stream_inputs(self, tokenizer, query: str, history: List[Tuple[str, str]] = None):
            if history:
                prompt = "\n\n[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
                input_ids = tokenizer.encode(prompt, add_special_tokens=False)
                input_ids = input_ids[1:]
                inputs = tokenizer.batch_encode_plus([(input_ids, None)], return_tensors="pt", add_special_tokens=False)
            else:
                prompt = "[Round {}]\n\n问:{}\n\n答:".format(len(history) + 1, query)
                inputs = tokenizer([prompt], return_tensors="pt")
            inputs = inputs.to(self.device)
            return inputs
    
        #@torch.inference_mode() 是PyTorch的一个上下文管理器,当使用它作为装饰器时,它确保包装的函数内的所有代码都在推理模式下运行
        '''
            使用模型时的示例代码,适合api
            response, history = model.chat(tokenizer,
                                    prompt,
                                    history=history,
                                    max_length=max_length if max_length else 2048,
                                    top_p=top_p if top_p else 0.7,
                                    temperature=temperature if temperature else 0.95)
        '''
        @torch.inference_mode()
        def chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, max_length: int = 8192, num_beams=1,
                 do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None, **kwargs):
            if history is None:
                history = []
            if logits_processor is None:
                '''
                当生成文本时,模型为每个可能的token输出一个logit(即原始未归一化的预测值)。
                LogitsProcessorList是一种工具list,它包含了一系列的处理器,这些处理器可以修改这些logits。
                通过修改logits,可以影响模型的输出。
                例如,可以使用一个LogitsProcessor来实现温度调整、最小/最大长度限制、特定token的惩罚/奖励等。
                例如,可以对模型计算出的logits进行进一步处理,例如对“复读机现象”相应的概率进行惩罚,以避免模型生成结果不断重复。
                '''
                logits_processor = LogitsProcessorList()
            #添加一个处理,用于处理无效的概率输出,即输出预测字符的概率可能会很低,需要处理一下
            logits_processor.append(InvalidScoreLogitsProcessor())
            #num_beams是beam search的参数,这里默认为1,top_p是预设概率阈值,概率小于topp的得分置为0
            gen_kwargs = {"max_length": max_length, "num_beams": num_beams, "do_sample": do_sample, "top_p": top_p,
                          "temperature": temperature, "logits_processor": logits_processor, **kwargs}
            inputs = self.build_inputs(tokenizer, query, history=history)
    
            #生成id输出,self.generate是PreTrainedModel的方法,可以控制max_length,temperature,top_p等
            #控制temperature、top_k、top_p等参数的原理在beam search中,可以参考博客https://blog.csdn.net/weixin_44826203/article/details/130708623
            #beam search会对score整体除以temperature做缩放控制模型的状态,只取top_k的概率对应的词汇,其余的概率置为-inf
            outputs = self.generate(**inputs, **gen_kwargs)
            outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
            #解码成字符
            response = tokenizer.decode(outputs)
            response = self.process_response(response)
            history = history + [(query, response)]
            #返回当前轮回答和历史记录
            return response, history
    
        #实现流式的一次一次对话,通过记录past_key_values,实现高效问答
        '''
            使用模型时的示例代码,适合web
            for response, history, past_key_values in model.stream_chat(tokenizer, input, history, past_key_values=past_key_values,
                                                            return_past_key_values=True,
                                                            max_length=max_length, top_p=top_p,
                                                            temperature=temperature):
            chatbot[-1] = (parse_text(input), parse_text(response))
    
            yield chatbot, history, past_key_values
        '''
        @torch.inference_mode()
        def stream_chat(self, tokenizer, query: str, history: List[Tuple[str, str]] = None, past_key_values=None,
                        max_length: int = 8192, do_sample=True, top_p=0.8, temperature=0.8, logits_processor=None,
                        return_past_key_values=False, **kwargs):
            if history is None:
                history = []
            if logits_processor is None:
                logits_processor = LogitsProcessorList()
            logits_processor.append(InvalidScoreLogitsProcessor())
            gen_kwargs = {"max_length": max_length, "do_sample": do_sample, "top_p": top_p,
                          "temperature": temperature, "logits_processor": logits_processor, **kwargs}
            if past_key_values is None and not return_past_key_values:
                inputs = self.build_inputs(tokenizer, query, history=history)
            else:
                #上面定义了build_stream_inputs,是一个问答的形式构建的inputs
                inputs = self.build_stream_inputs(tokenizer, query, history=history)
            
            if past_key_values is not None:
                past_length = past_key_values[0][0].shape[0]
                if self.transformer.pre_seq_len is not None:
                    past_length -= self.transformer.pre_seq_len
                #加上之前的长度
                inputs.position_ids += past_length
                #获得新attention_mask
                attention_mask = inputs.attention_mask
                attention_mask = torch.cat((attention_mask.new_ones(1, past_length), attention_mask), dim=1)
                inputs['attention_mask'] = attention_mask
    
            for outputs in self.stream_generate(**inputs, past_key_values=past_key_values,
                                                return_past_key_values=return_past_key_values, **gen_kwargs):
                if return_past_key_values:
                    outputs, past_key_values = outputs
                outputs = outputs.tolist()[0][len(inputs["input_ids"][0]):]
                response = tokenizer.decode(outputs)
                if response and response[-1] != "�":
                    response = self.process_response(response)
                    new_history = history + [(query, response)]
                    if return_past_key_values:
                        #yield: 当一个函数包含yield关键字,它将不再是一个常规函数,而是一个生成器函数。这种函数在调用时不会执行,而是返回一个生成器对象。
                        yield response, new_history, past_key_values
                    else:
                        yield response, new_history
    
        @torch.inference_mode()
        def stream_generate(
                self,
                input_ids,
                generation_config: Optional[GenerationConfig] = None,
                logits_processor: Optional[LogitsProcessorList] = None,
                stopping_criteria: Optional[StoppingCriteriaList] = None,
                prefix_allowed_tokens_fn: Optional[Callable[[int, torch.Tensor], List[int]]] = None,
                return_past_key_values=False,
                **kwargs,
        ):
            batch_size, input_ids_seq_length = input_ids.shape[0], input_ids.shape[-1]
    
            if generation_config is None:
                generation_config = self.generation_config
            generation_config = copy.deepcopy(generation_config)
            model_kwargs = generation_config.update(**kwargs)
            bos_token_id, eos_token_id = generation_config.bos_token_id, generation_config.eos_token_id
    
            if isinstance(eos_token_id, int):
                eos_token_id = [eos_token_id]
    
            has_default_max_length = kwargs.get("max_length") is None and generation_config.max_length is not None
            if has_default_max_length and generation_config.max_new_tokens is None:
                warnings.warn(
                    f"Using `max_length`'s default ({generation_config.max_length}) to control the generation length. "
                    "This behaviour is deprecated and will be removed from the config in v5 of Transformers -- we"
                    " recommend using `max_new_tokens` to control the maximum length of the generation.",
                    UserWarning,
                )
            elif generation_config.max_new_tokens is not None:
                generation_config.max_length = generation_config.max_new_tokens + input_ids_seq_length
                if not has_default_max_length:
                    logger.warn(
                        f"Both `max_new_tokens` (={generation_config.max_new_tokens}) and `max_length`(="
                        f"{generation_config.max_length}) seem to have been set. `max_new_tokens` will take precedence. "
                        "Please refer to the documentation for more information. "
                        "(https://huggingface.co/docs/transformers/main/en/main_classes/text_generation)",
                        UserWarning,
                    )
    
            if input_ids_seq_length >= generation_config.max_length:
                input_ids_string = "decoder_input_ids" if self.config.is_encoder_decoder else "input_ids"
                logger.warning(
                    f"Input length of {input_ids_string} is {input_ids_seq_length}, but `max_length` is set to"
                    f" {generation_config.max_length}. This can lead to unexpected behavior. You should consider"
                    " increasing `max_new_tokens`."
                )
    
            # 2. Set generation parameters if not already defined
            logits_processor = logits_processor if logits_processor is not None else LogitsProcessorList()
            #对生成过程做停止控制的工具,例如达到一定长度时强行停止,达到一定生成时间时停止等
            stopping_criteria = stopping_criteria if stopping_criteria is not None else StoppingCriteriaList()
    
            logits_processor = self._get_logits_processor(
                generation_config=generation_config,
                input_ids_seq_length=input_ids_seq_length,
                encoder_input_ids=input_ids,
                prefix_allowed_tokens_fn=prefix_allowed_tokens_fn,
                logits_processor=logits_processor,
            )
    
            stopping_criteria = self._get_stopping_criteria(
                generation_config=generation_config, stopping_criteria=stopping_criteria
            )
            logits_warper = self._get_logits_warper(generation_config)
    
            unfinished_sequences = input_ids.new(input_ids.shape[0]).fill_(1)
            scores = None
            while True:
                model_inputs = self.prepare_inputs_for_generation(input_ids, **model_kwargs)
                # forward pass to get next token
                outputs = self(
                    **model_inputs,
                    return_dict=True,
                    output_attentions=False,
                    output_hidden_states=False,
                )
    
                next_token_logits = outputs.logits[:, -1, :]
    
                # pre-process distribution
                next_token_scores = logits_processor(input_ids, next_token_logits)
                next_token_scores = logits_warper(input_ids, next_token_scores)
    
                # sample
                probs = nn.functional.softmax(next_token_scores, dim=-1)
                if generation_config.do_sample:
                    next_tokens = torch.multinomial(probs, num_samples=1).squeeze(1)
                else:
                    next_tokens = torch.argmax(probs, dim=-1)
    
                # update generated ids, model inputs, and length for next step
                input_ids = torch.cat([input_ids, next_tokens[:, None]], dim=-1)
                model_kwargs = self._update_model_kwargs_for_generation(
                    outputs, model_kwargs, is_encoder_decoder=self.config.is_encoder_decoder
                )
                unfinished_sequences = unfinished_sequences.mul((sum(next_tokens != i for i in eos_token_id)).long())
                if return_past_key_values:
                    yield input_ids, outputs.past_key_values
                else:
                    yield input_ids
                # stop when each sentence is finished, or if we exceed the maximum length
                if unfinished_sequences.max() == 0 or stopping_criteria(input_ids, scores):
                    break
        #用于量化方法
        def quantize(self, bits: int, empty_init=False, device=None, **kwargs):
            if bits == 0:
                return
    
            from .quantization import quantize
    
            if self.quantized:
                logger.info("Already quantized.")
                return self
    
            self.quantized = True
    
            self.config.quantization_bit = bits
    
            self.transformer.encoder = quantize(self.transformer.encoder, bits, empty_init=empty_init, device=device,
                                                **kwargs)
            return self
    
    • 1
    • 2
    • 3
    • 4
    • 5
    • 6
    • 7
    • 8
    • 9
    • 10
    • 11
    • 12
    • 13
    • 14
    • 15
    • 16
    • 17
    • 18
    • 19
    • 20
    • 21
    • 22
    • 23
    • 24
    • 25
    • 26
    • 27
    • 28
    • 29
    • 30
    • 31
    • 32
    • 33
    • 34
    • 35
    • 36
    • 37
    • 38
    • 39
    • 40
    • 41
    • 42
    • 43
    • 44
    • 45
    • 46
    • 47
    • 48
    • 49
    • 50
    • 51
    • 52
    • 53
    • 54
    • 55
    • 56
    • 57
    • 58
    • 59
    • 60
    • 61
    • 62
    • 63
    • 64
    • 65
    • 66
    • 67
    • 68
    • 69
    • 70
    • 71
    • 72
    • 73
    • 74
    • 75
    • 76
    • 77
    • 78
    • 79
    • 80
    • 81
    • 82
    • 83
    • 84
    • 85
    • 86
    • 87
    • 88
    • 89
    • 90
    • 91
    • 92
    • 93
    • 94
    • 95
    • 96
    • 97
    • 98
    • 99
    • 100
    • 101
    • 102
    • 103
    • 104
    • 105
    • 106
    • 107
    • 108
    • 109
    • 110
    • 111
    • 112
    • 113
    • 114
    • 115
    • 116
    • 117
    • 118
    • 119
    • 120
    • 121
    • 122
    • 123
    • 124
    • 125
    • 126
    • 127
    • 128
    • 129
    • 130
    • 131
    • 132
    • 133
    • 134
    • 135
    • 136
    • 137
    • 138
    • 139
    • 140
    • 141
    • 142
    • 143
    • 144
    • 145
    • 146
    • 147
    • 148
    • 149
    • 150
    • 151
    • 152
    • 153
    • 154
    • 155
    • 156
    • 157
    • 158
    • 159
    • 160
    • 161
    • 162
    • 163
    • 164
    • 165
    • 166
    • 167
    • 168
    • 169
    • 170
    • 171
    • 172
    • 173
    • 174
    • 175
    • 176
    • 177
    • 178
    • 179
    • 180
    • 181
    • 182
    • 183
    • 184
    • 185
    • 186
    • 187
    • 188
    • 189
    • 190
    • 191
    • 192
    • 193
    • 194
    • 195
    • 196
    • 197
    • 198
    • 199
    • 200
    • 201
    • 202
    • 203
    • 204
    • 205
    • 206
    • 207
    • 208
    • 209
    • 210
    • 211
    • 212
    • 213
    • 214
    • 215
    • 216
    • 217
    • 218
    • 219
    • 220
    • 221
    • 222
    • 223
    • 224
    • 225
    • 226
    • 227
    • 228
    • 229
    • 230
    • 231
    • 232
    • 233
    • 234
    • 235
    • 236
    • 237
    • 238
    • 239
    • 240
    • 241
    • 242
    • 243
    • 244
    • 245
    • 246
    • 247
    • 248
    • 249
    • 250
    • 251
    • 252
    • 253
    • 254
    • 255
    • 256
    • 257
    • 258
    • 259
    • 260
    • 261
    • 262
    • 263
    • 264
    • 265
    • 266
    • 267
    • 268
    • 269
    • 270
    • 271
    • 272
    • 273
    • 274
    • 275
    • 276
    • 277
    • 278
    • 279
    • 280
    • 281
    • 282
    • 283
    • 284
    • 285
    • 286
    • 287
    • 288
    • 289
    • 290
    • 291
    • 292
    • 293
    • 294
    • 295
    • 296
    • 297
    • 298
    • 299
    • 300
    • 301
    • 302
    • 303
    • 304
    • 305
    • 306
    • 307
    • 308
    • 309
    • 310
    • 311
    • 312
    • 313
    • 314
    • 315
    • 316
    • 317
    • 318
    • 319
    • 320
    • 321
    • 322
    • 323
    • 324
    • 325
    • 326
    • 327
    • 328
    • 329
    • 330
    • 331
    • 332
    • 333
    • 334
    • 335
    • 336
    • 337
    • 338
    • 339
    • 340
    • 341
    • 342
    • 343
    • 344
    • 345
    • 346
    • 347
    • 348
    • 349
    • 350
    • 351
    • 352
    • 353
    • 354
    • 355
    • 356
    • 357
    • 358
    • 359
    • 360
    • 361
    • 362
    • 363
    • 364
    • 365
    • 366
    • 367
    • 368
    • 369
    • 370
    • 371
    • 372
    • 373
    • 374
    • 375
    • 376
    • 377
    • 378
    • 379
    • 380
    • 381
    • 382
    • 383
    • 384
    • 385
    • 386
    • 387
    • 388
    • 389
    • 390
    • 391
    • 392
    • 393
    • 394
    • 395
    • 396
    • 397
    • 398
    • 399
    • 400
    • 401
    • 402
    • 403
    • 404
    • 405
    • 406
    • 407
    • 408
    • 409
    • 410
    • 411
    • 412
    • 413
    • 414
    • 415
    • 416
    • 417
    • 418
    • 419
    • 420
    • 421
    • 422
    • 423
    • 424
    • 425
    • 426
    • 427
    • 428
    • 429
    • 430
    • 431
    • 432
    • 433
    • 434
    • 435
    • 436
    • 437
    • 438
    • 439
    • 440
    • 441
    • 442
    • 443
    • 444
    • 445
    • 446
    • 447
    • 448
    • 449
    • 450
    • 451
    • 452
    • 453
    • 454
    • 455
    • 456
    • 457
    • 458
    • 459
    • 460
    • 461
    • 462
    • 463
    • 464
    • 465
    • 466
    • 467
    • 468
    • 469
    • 470
    • 471
    • 472
    • 473
    • 474
    • 475
    • 476
    • 477
    • 478
    • 479
    • 480
    • 481
    • 482
    • 483
    • 484
    • 485
    • 486
    • 487
    • 488
    • 489
    • 490
    • 491
    • 492
    • 493
    • 494
    • 495
    • 496
    • 497
    • 498
    • 499
    • 500
    • 501
    • 502
    • 503
    • 504
    • 505
    • 506
    • 507
    • 508
    • 509
    • 510
    • 511
    • 512
    • 513
    • 514
    • 515
    • 516
    • 517
    • 518
    • 519
    • 520
    • 521
    • 522
    • 523
    • 524
    • 525
    • 526
    • 527
    • 528
    • 529
    • 530
    • 531
    • 532
    • 533
    • 534
    • 535
    • 536
    • 537
    • 538
    • 539
    • 540
    • 541
    • 542
    • 543
    • 544
    • 545
    • 546
    • 547
    • 548
    • 549
    • 550
    • 551
    • 552
    • 553
    • 554
    • 555
    • 556
    • 557
    • 558
    • 559
    • 560
    • 561
    • 562
    • 563
    • 564
    • 565
    • 566
    • 567
    • 568
    • 569
    • 570
    • 571
    • 572
    • 573
    • 574
    • 575
    • 576
    • 577
    • 578
    • 579
    • 580
    • 581
    • 582
    • 583
    • 584
    • 585
    • 586
    • 587
    • 588
    • 589
    • 590
    • 591
    • 592
    • 593
    • 594
    • 595
    • 596
    • 597
    • 598
    • 599
    • 600
    • 601
    • 602
    • 603
    • 604
    • 605
    • 606
    • 607
    • 608
    • 609
    • 610
    • 611
    • 612
    • 613
    • 614
    • 615
    • 616
    • 617
    • 618
    • 619
    • 620
    • 621
    • 622
    • 623
    • 624
    • 625
    • 626
    • 627
    • 628
    • 629
    • 630
    • 631
    • 632
    • 633
    • 634
    • 635
    • 636
    • 637
    • 638
    • 639
    • 640
    • 641
    • 642
    • 643
    • 644
    • 645
    • 646
    • 647
    • 648
    • 649
    • 650
    • 651
    • 652
    • 653
    • 654
    • 655
    • 656
    • 657
    • 658
    • 659
    • 660
    • 661
    • 662
    • 663
    • 664
    • 665
    • 666
    • 667
    • 668
    • 669
    • 670
    • 671
    • 672
    • 673
    • 674
    • 675
    • 676
    • 677
    • 678
    • 679
    • 680
    • 681
    • 682
    • 683
    • 684
    • 685
    • 686
    • 687
    • 688
    • 689
    • 690
    • 691
    • 692
    • 693
    • 694
    • 695
    • 696
    • 697
    • 698
    • 699
    • 700
    • 701
    • 702
    • 703
    • 704
    • 705
    • 706
    • 707
    • 708
    • 709
    • 710
    • 711
    • 712
    • 713
    • 714
    • 715
    • 716
    • 717
    • 718
    • 719
    • 720
    • 721
    • 722
    • 723
    • 724
    • 725
    • 726
    • 727
    • 728
    • 729
    • 730
    • 731
    • 732
    • 733
    • 734
    • 735
    • 736
    • 737
    • 738
    • 739
    • 740
    • 741
    • 742
    • 743
    • 744
    • 745
    • 746
    • 747
    • 748
    • 749
    • 750
    • 751
    • 752
    • 753
    • 754
    • 755
    • 756
    • 757
    • 758
    • 759
    • 760
    • 761
    • 762
    • 763
    • 764
    • 765
    • 766
    • 767
    • 768
    • 769
    • 770
    • 771
    • 772
    • 773
    • 774
    • 775
    • 776
    • 777
    • 778
    • 779
    • 780
    • 781
    • 782
    • 783
    • 784
    • 785
    • 786
    • 787
    • 788
    • 789
    • 790
    • 791
    • 792
    • 793
    • 794
    • 795
    • 796
    • 797
    • 798
    • 799
    • 800
    • 801
    • 802
    • 803
    • 804
    • 805
    • 806
    • 807
    • 808
    • 809
    • 810
    • 811
    • 812
    • 813
    • 814
    • 815
    • 816
    • 817
    • 818
    • 819
    • 820
    • 821
    • 822
    • 823
    • 824
    • 825
    • 826
    • 827
    • 828
    • 829
    • 830
    • 831
    • 832
    • 833
    • 834
    • 835
    • 836
    • 837
    • 838
    • 839
    • 840
    • 841
    • 842
    • 843
    • 844
    • 845
    • 846
    • 847
    • 848
    • 849
    • 850
    • 851
    • 852
    • 853
    • 854
    • 855
    • 856
    • 857
    • 858
    • 859
    • 860
    • 861
    • 862
    • 863
    • 864
    • 865
    • 866
    • 867
    • 868
    • 869
    • 870
    • 871
    • 872
    • 873
    • 874
    • 875
    • 876
    • 877
    • 878
    • 879
    • 880
    • 881
    • 882
    • 883
    • 884
    • 885
    • 886
    • 887
    • 888
    • 889
    • 890
    • 891
    • 892
    • 893
    • 894
    • 895
    • 896
    • 897
    • 898
    • 899
    • 900
    • 901
    • 902
    • 903
    • 904
    • 905
    • 906
    • 907
    • 908
    • 909
    • 910
    • 911
    • 912
    • 913
    • 914
    • 915
    • 916
    • 917
    • 918
    • 919
    • 920
    • 921
    • 922
    • 923
    • 924
    • 925
    • 926
    • 927
    • 928
    • 929
    • 930
    • 931
    • 932
    • 933
    • 934
    • 935
    • 936
    • 937
    • 938
    • 939
    • 940
    • 941
    • 942
    • 943
    • 944
    • 945
    • 946
    • 947
    • 948
    • 949
    • 950
    • 951
    • 952
    • 953
    • 954
    • 955
    • 956
    • 957
    • 958
    • 959
    • 960
    • 961
    • 962
    • 963
    • 964
    • 965
    • 966
    • 967
    • 968
    • 969
    • 970
    • 971
    • 972
    • 973
    • 974
    • 975
    • 976
    • 977
    • 978
    • 979
    • 980
    • 981
    • 982
    • 983
    • 984
    • 985
    • 986
    • 987
    • 988
    • 989
    • 990
    • 991
    • 992
    • 993
    • 994
    • 995
    • 996
    • 997
    • 998
    • 999
    • 1000
    • 1001
    • 1002
    • 1003
    • 1004
    • 1005
    • 1006
    • 1007
    • 1008
    • 1009
    • 1010
    • 1011
    • 1012
    • 1013
    • 1014
    • 1015
    • 1016
    • 1017
    • 1018
    • 1019
    • 1020
    • 1021
    • 1022
    • 1023
    • 1024
    • 1025
    • 1026
    • 1027
    • 1028
    • 1029
    • 1030
    • 1031
    • 1032
    • 1033
    • 1034
    • 1035
    • 1036
    • 1037
    • 1038
    • 1039
    • 1040
    • 1041
    • 1042
    • 1043
    • 1044
    • 1045
    • 1046
    • 1047
    • 1048
    • 1049
    • 1050
    • 1051
    • 1052
    • 1053
    • 1054
    • 1055
    • 1056
    • 1057
    • 1058
    • 1059
    • 1060
    • 1061
    • 1062
    • 1063
    • 1064
    • 1065
    • 1066
    • 1067
    • 1068
    • 1069
    • 1070
    • 1071
    • 1072
    • 1073
    • 1074
    • 1075
    • 1076
    • 1077
    • 1078
    • 1079
    • 1080
    • 1081
    • 1082
    • 1083
    • 1084
    • 1085
    • 1086
    • 1087
    • 1088
    • 1089
    • 1090
    • 1091
    • 1092
    • 1093
    • 1094
    • 1095
    • 1096
    • 1097
    • 1098
    • 1099
    • 1100
    • 1101
    • 1102
    • 1103
    • 1104
    • 1105
    • 1106
    • 1107
    • 1108
    • 1109
    • 1110
    • 1111
    • 1112
    • 1113
    • 1114
    • 1115
    • 1116
    • 1117
    • 1118
    • 1119
    • 1120
    • 1121
    • 1122
    • 1123
    • 1124
    • 1125
    • 1126
    • 1127
    • 1128
    • 1129
    • 1130
    • 1131
    • 1132
    • 1133
    • 1134
    • 1135
    • 1136
    • 1137
    • 1138
    • 1139
    • 1140
    • 1141
    • 1142
    • 1143
    • 1144
    • 1145
    • 1146
    • 1147
    • 1148
    • 1149
    • 1150
    • 1151
    • 1152
    • 1153
    • 1154
    • 1155
    • 1156
    • 1157
    • 1158
    • 1159
    • 1160
    • 1161
    • 1162
    • 1163
    • 1164
    • 1165
    • 1166
    • 1167
    • 1168
    • 1169
    • 1170
    • 1171
    • 1172
    • 1173
    • 1174
    • 1175
    • 1176
    • 1177
    • 1178
    • 1179
    • 1180
    • 1181
    • 1182
    • 1183
    • 1184
    • 1185
    • 1186
    • 1187
    • 1188
    • 1189
    • 1190
    • 1191
    • 1192
    • 1193
    • 1194
    • 1195
    • 1196
    • 1197
    • 1198
    • 1199
    • 1200
    • 1201
    • 1202
    • 1203
    • 1204
    • 1205
    • 1206
    • 1207
    • 1208
    • 1209
    • 1210
    • 1211
    • 1212
    • 1213
    • 1214
    • 1215
    • 1216
    • 1217
    • 1218
    • 1219
    • 1220
    • 1221
    • 1222
    • 1223
    • 1224
    • 1225
    • 1226
    • 1227
    • 1228
    • 1229
    • 1230
    • 1231
    • 1232
    • 1233
    • 1234
    • 1235
    • 1236
    • 1237
    • 1238
    • 1239
    • 1240
    • 1241
    • 1242
    • 1243
    • 1244
    • 1245
    • 1246
    • 1247
    • 1248
    • 1249
    • 1250
    • 1251
    • 1252
    • 1253
    • 1254
    • 1255
    • 1256
    • 1257
    • 1258
    • 1259
    • 1260
    • 1261
    • 1262
    • 1263
    • 1264
    • 1265
    • 1266
    • 1267
    • 1268
    • 1269
    • 1270
    • 1271
    • 1272
    • 1273
    • 1274
    • 1275
    • 1276
    • 1277
    • 1278
    • 1279
    • 1280
    • 1281
    • 1282
    • 1283
    • 1284
    • 1285
    • 1286
    • 1287
    • 1288
    • 1289
    • 1290
    • 1291
    • 1292
    • 1293
    • 1294
    • 1295
    • 1296
    • 1297
    • 1298
    • 1299
    • 1300
    • 1301
    • 1302
    • 1303
    • 1304
    • 1305
    • 1306
    • 1307
    • 1308
    • 1309
    • 1310
    • 1311
    • 1312
    • 1313
    • 1314
    • 1315
    • 1316
    • 1317
    • 1318
    • 1319
    • 1320
    • 1321
    • 1322
    • 1323
    • 1324
    • 1325
    • 1326
    • 1327
    • 1328
    • 1329
    • 1330
    • 1331
    • 1332
    • 1333
    • 1334
    • 1335
    • 1336
    • 1337
    • 1338
    • 1339
    • 1340
    • 1341
    • 1342
    • 1343
    • 1344
    • 1345
    • 1346
    • 1347
    • 1348
    • 1349
    • 1350
  • 相关阅读:
    mongodb 基本概念
    158_模型_Power BI 使用 DAX + SVG 打通制作商业图表几乎所有可能
    知识图谱从入门到应用——知识图谱的知识表示:向量表示方法
    这7款神仙软件,程序员必备!
    MySQL详细学习教程(建议收藏)
    imx8 yocto增加文件
    useRef和useState在 react Hooks中用法
    torch.distributed.launch 指定端口rdzv_endpoint
    VSCode:使用CMakeLists.txt构建C++项目
    浅谈 kafka
  • 原文地址:https://blog.csdn.net/a1920993165/article/details/133518294