I2S output 6-channel 16k audio Solution

1. Introduction*

This article introduces a scheme to output 6 channel 16k audio via i2s on 8008/8008c. 4 channels 8k and 12 channels 8k are similar, can refer to the implementation. The maximum i2s sampling rate output of 8008/8008c is 48k, so the maximum output bandwidth is 2 channels 48k.
The standard I2S (Inter-IC Sound) protocol is a serial protocol used for digital audio transmission, usually used to connect audio codecs and digital signal processors. It only supports two-channel audio transmission, left and right channels. The extended protocol TDM (Time-Division Multiplexing) implements multi-channel audio transmission. Because i2s output of 8008/8008c does not support the TDM protocol, 6 channels of 16k audio can only be transmitted through 2 channels of 48k by increasing the i2s clock sampling rate.

2. Details*

As shown in the figure, edit the data of 3 channels 16k into one channel 48k through software, and then call the i2s output interface normally to output the data.
As shown in the figure, due to the inability to achieve synchronous transmission, the receiving end may receive the data in the order of L1 L2 L0 or L2 L0 L1. To solve this problem, we can add markers to the data of each channel. First, we can set the last bit of L0 and R0 to 1, and the last bit of other channels (L1, L2, R1, R2) to 0. Loss of minimum 1 bit precision has virtually no impact on audio quality.
After receiving the data, the receiving end first parses the data of L0 and R0, and checks if the last bit is 1 to determine. Then, the receiver can determine the order of other channels based on the data of L0 and R0. This way, the receiver can correctly parse the data of different channels.

3. VSP Usage Example*

vsp_sdk provides a reference example that outputs 4 mic data and 2 ref data via i2s.

cp configs/example_lib/8008c_wukong_v1.4_example_lib_i2sout_6ch_16kdata_4mic+2ref.config .config
make clean; make

IRAM0_TEXT_ATTR int VspDoI2sOutput6Channel16kData(VSP_CONTEXT *context)
{
    VSP_CONTEXT_HEADER *ctx_header  = context->ctx_header;

    int ref_num                    = ctx_header->ref_num;
    int mic_num                    = ctx_header->mic_num;

    if (mic_num != 4 || ref_num != 2)
        return -1;

    short *mic[4];
    short *ref[2];


    for (int i = 0; i < mic_num; i++) {
        mic[i] = VspProcessGetMicFrame(context,i,0); //Get N mic data
    }
    for (int i = 0; i < ref_num; i++) {
        ref[i] = VspProcessGetRefFrame(context,i,0); //Get N ref data
    }

    VspCopy6Channel16kDataTo2Channel48kSpk(context, mic[0], mic[1], mic[2], mic[3], ref[0], ref[1]);

    return 0;
}

4. Decode Example*

The following code implements decoding the obtained 3 channels 16k data record.pcm back to the original transmission order output.pcm by parsing the least significant bit. The receiving end can refer to the highlighted part.

swapChannel.c
#include <stdio.h>
#include <stdint.h>
#include <stdlib.h>
// Number of channels in the data
#define NUM_CHANNELS        (3)
// Number of samples to read
#define SAMPLES_PER_READ    (100 * NUM_CHANNELS)
/**
 * Swaps the channels of an audio data array.
 *
 * @param data - Pointer to the audio data array
 * @param numSamples - Number of samples in the audio data array
 */
void swapChannels(uint16_t* data, int numSamples)
{
    // Iterate over the audio data array
    for (int i = 0; i < numSamples; i += NUM_CHANNELS) {
        // Iterate over the channels
        for (int c = 0; c < NUM_CHANNELS; c++) {
            // Check if the least significant bit of the current sample is 1
            if ((data[i + c] & 0x0001) == 1) {
                int temp[NUM_CHANNELS];
                // Swap the channels by rotating the array elements
                for (int j = 0; j < NUM_CHANNELS; j++) {
                    temp[j] = data[i + ((j + c) % NUM_CHANNELS)];
                }
                // Copy the swapped channels back to the audio data array
                for (int k = 0; k < NUM_CHANNELS; k++) {
                    data[i + k] = temp[k];
                }
            }
        }
    }
}
int main() {
    const char* inputFileName = "record.pcm";
    const char* outputFileName = "output.pcm";
    FILE* inputFile = fopen(inputFileName, "rb");
    if (!inputFile) {
        perror("Error opening input file");
        return 1;
    }
    FILE* outputFile = fopen(outputFileName, "wb");
    if (!outputFile) {
        perror("Error opening output file");
        fclose(inputFile);
        return 1;
    }
    int numChannels = NUM_CHANNELS;
    int numSamples = 0;
    int samplesPerRead = SAMPLES_PER_READ;
    fseek(inputFile, 0, SEEK_END);
    long fileSize = ftell(inputFile);
    numSamples = fileSize / sizeof(uint16_t);
    fseek(inputFile, 0, SEEK_SET);
    printf("## numSample %d\n", numSamples);
    uint16_t* pcmData = (uint16_t*)malloc(numSamples * sizeof(uint16_t));
    if (!pcmData) {
        perror("Memory allocation error");
        fclose(inputFile);
        fclose(outputFile);
        return 1;
    }
    int samplesRead;
    while ((samplesRead = fread(pcmData, sizeof(uint16_t), samplesPerRead, inputFile)) > 0) {
        swapChannels(pcmData, samplesRead);
        size_t bytesWritten = fwrite(pcmData, sizeof(uint16_t), samplesRead, outputFile);
        if (bytesWritten != samplesRead) {
            perror("Error writing output file");
            break;
        }
    }
    // Clean up
    free(pcmData);
    fclose(inputFile);
    fclose(outputFile);
    printf("Channels swapped and output to %s\n", outputFileName);
    return 0;
}