Skip to content

NPU model format description*

  • In the section NPU Compiler Usage, we know that the model file running on GX8002 is generated using gxnpuc config.yaml.
  • This article gives a simple explanation of the structure of the model file
    model.h
     1
     2
     3
     4
     5
     6
     7
     8
     9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    73
    74
    75
    76
    77
    78
    // This file is automatically generated by NPU compiler.
    // Input/output size of the pattern + space occupied by model operation: sizeof(in_out) + sizeof(cmd_content) + sizeof(weight_content) + sizeof(data_content) + sizeof(tmp_content)
    const unsigned int total_size = 151550;
    
    // Space occupied by model operation: sizeof(cmd_content) + sizeof(weight_content) + sizeof(data_content) + sizeof(tmp_content)
    const unsigned int npu_size = 147018;
    
    // gxnpuc compiler version
    const char *version = "1.5.3rc7";
    
    // md5 value of the pb file corresponding to the model
    const char *pb_md5 = "bfe0140daa3d0440c768eda50ed40265";
    
    const char *npu_unit = "NPU32";
    
    // Model compilation time
    const char *model_info = "(20220212154001)";
    
    typedef unsigned short npu_data_t;
    
    // Input structure of the model
    struct input {
        npu_data_t Feats[1][15][40];
        npu_data_t State_c0[1][3][64];
        npu_data_t State_c1[1][4][64];
        npu_data_t State_c2[1][5][64];
    } __attribute__ ((packed));
    
    // The output structure of the model
    struct output {
        npu_data_t State_c0_out[1][3][64];
        npu_data_t State_c1_out[1][4][64];
        npu_data_t State_c2_out[1][5][64];
        float phone_prob[1][1][65];
    } __attribute__ ((packed));
    
    // Input + output structure of the model. For circular networks, more than two buffers are set up according to this structure diagram, one of which is the input address of the model, and the State_c0 of the next buffer is the output address of the model. This saves copying the model output once to the input at the next time.
    struct in_out {
        npu_data_t Feats[1][15][40];
        npu_data_t State_c0[1][3][64];
        npu_data_t State_c1[1][4][64];
        npu_data_t State_c2[1][5][64];
        float phone_prob[1][1][65];
    } __attribute__ ((packed));
    
    // The model's instruction array, read only
    const unsigned char cmd_content[5172] __attribute__ ((aligned(4))) = {
        0x01, 0x03, 0x02, 0x00, 0x00, 0x00, 0x00, 0x30, 0x00, 0x00,
        0x00, 0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x10,
        0x04, 0x02, 0x00, 0x00, 0x28, 0x0f, 0x10, 0x00, 0x28, 0x00,
        ......
        0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x80, 0x03, 0x00, 0x40,
        0x02, 0x00, 0x00, 0x00, 0x40, 0x05, 0x10, 0x00, 0x40, 0x00,
        0x40, 0x00, 0x40, 0x00, 0x40, 0x00, 0x00, 0x00, 0x00, 0x00,
        0x00, 0x00, 0x00, 0x00,
    };
    
    // The weight array of the model, read only
    const unsigned char weight_content[128166] __attribute__ ((aligned(4))) = {
        0x66, 0x3a, 0x6b, 0x40, 0x00, 0x44, 0x7f, 0x45, 0x41, 0x46,
        0x49, 0x46, 0x6b, 0x46, 0xc9, 0x46, 0x3d, 0x47, 0xfc, 0x46,
        0xa6, 0x46, 0xc1, 0x46, 0x9d, 0x46, 0x85, 0x46, 0xbe, 0x46,
        ......
        0x10, 0x54, 0x00, 0x3c, 0x00, 0x3c, 0x10, 0x54, 0x00, 0x3c,
        0x00, 0x40, 0x00, 0x00, 0x00, 0x3c, 0x00, 0x45, 0x80, 0x4a,
        0x00, 0xbc, 0x00, 0x45, 0x00, 0x3c, 0x00, 0x00, 0x00, 0x40,
        0x00, 0x3c,
    };
    
    // It is usually empty and read only
    unsigned char *tmp_content = (void*)0;
    
    // Generally null
    const unsigned char ops_content[0] __attribute__ ((aligned(4))) = {
    };
    
    // The temporary space required by the model runtime
    unsigned char data_content[13680] __attribute__ ((aligned(4)));