The audio output is not correct.

Started by zahar,

zahar

Here is my initialization (creating channels, etc.)

bool speaker::Initialize(int rate)
{
    send.store(true);
    sampleRate = rate;
    frameSize = sampleRate / 50;

    codec = new CCodec();

    mixerStream = BASS_Mixer_StreamCreate(sampleRate, 2, BASS_MIXER_NONSTOP);
    if (!mixerStream) {
        printf("Error create mixerStream. Code: %d\n", BASS_ErrorGetCode());
    }

    outputStream = BASS_StreamCreate(sampleRate, 1, BASS_SAMPLE_FLOAT | BASS_STREAM_DECODE, STREAMPROC_PUSH, NULL);
    if (!outputStream) {
        printf("Error create outputStream. Code: %d\n", BASS_ErrorGetCode());
    }

    if (!BASS_ChannelPlay(mixerStream, FALSE)) {
        printf("Error play mixerStream. Code: %d\n", BASS_ErrorGetCode());
    }

    if (!BASS_Mixer_StreamAddChannel(mixerStream, outputStream, BASS_MIXER_CHAN_DOWNMIX)) {
        printf("Error add outStream to mixerStream. Code: %d\n", BASS_ErrorGetCode());
    }

    sideToneStream = BASS_StreamCreate(sampleRate, 1,
        BASS_SAMPLE_FLOAT | BASS_STREAM_DECODE,
        STREAMPROC_PUSH, NULL);
    if (!sideToneStream) {
        printf("Error create sideToneStream. Code: %d\n", BASS_ErrorGetCode());
    }
    else {
        if (!BASS_Mixer_StreamAddChannel(mixerStream, sideToneStream, BASS_MIXER_CHAN_DOWNMIX | BASS_MIXER_BUFFER)) {
            printf("Error add sideToneStream to mixer. Code: %d\n", BASS_ErrorGetCode());
            BASS_StreamFree(sideToneStream);
            sideToneStream = 0;
        }
        else {
            BASS_ChannelSetAttribute(sideToneStream, BASS_ATTRIB_VOL, 0.35f);
            BASS_ChannelSetAttribute(sideToneStream, BASS_ATTRIB_BUFFER, 0.10f);
            printf("[Speaker] Sidetone stream created and added to mixer\n");
        }
    }

    EnableLimiter(true);

    BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_BUFFER, 0.18f);
    BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_VOL, 1.0f);

    sendThread = std::thread(&speaker::SendAudioPackets, this);

    return true;
}

BASS Init:
BOOL __stdcall Hooks::BassInitializationHook(const int device, const DWORD frequency, const DWORD flags, const HWND window, const GUID* const dsguid) noexcept
{
    if (!BASS_Init(device, frequency, (BASS_DEVICE_MONO | BASS_DEVICE_3D) | flags, window, dsguid)) {
        printf("[Speaker] BASS initialization error: %d\n", BASS_ErrorGetCode());
        return false;
    }

    speak->Initialize(frequency);

    auto original = reinterpret_cast<BASS_Init_t>(hook.Target());
    return TRUE;
}

Push data in channel:
auto now = std::chrono::steady_clock::now();
auto elapsed_ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - last_tick).count();

if (elapsed_ms >= 20) {
    speak->ProcessSideTone();
    last_tick = now;
}

void speaker::ProcessSideTone()
{
    std::vector<float> toPush;

    {
        std::lock_guard<std::mutex> lock(this->sideToneMutex);
        if (sideToneBuffer.size() < 960) return;

        size_t pushSize = min(sideToneBuffer.size(), size_t(1440));
        toPush.assign(sideToneBuffer.begin(), sideToneBuffer.begin() + pushSize);
        sideToneBuffer.erase(sideToneBuffer.begin(), sideToneBuffer.begin() + pushSize);
    }

    if (!toPush.empty() && sideToneStream) {
        BASS_StreamPutData(sideToneStream, toPush.data(), toPush.size() * sizeof(float));

        DWORD avail = BASS_ChannelGetData(sideToneStream, NULL, BASS_DATA_AVAILABLE);
        float ms_left = avail / (sampleRate * 4.0f / 1000.0f);
        printf("[Sidetone] After push: buffer has %.1f ms left\n", ms_left);
    }
}

BOOL speaker::Record(HRECORD handle, const void* buffer, DWORD length, void* user)
{
    speaker* self = static_cast<speaker*>(user);
    if (!self || !self->codec || !self->codec->encoder || !AVSSync::microphone) {
        return TRUE;
    }

    if (!buffer || length == 0) {
        return TRUE;
    }

    const short* pcmBuffer = static_cast<const short*>(buffer);
    int sampleCount = length / sizeof(short);

    sampleCount = min(sampleCount, 960);
    std::vector<opus_int16> pcmData(sampleCount);
    std::vector<float> sidetoneTemp;
    sidetoneTemp.reserve(sampleCount);

    for (int i = 0; i < sampleCount; i++) {
        opus_int16 sample = static_cast<opus_int16>(pcmBuffer[i]);
        pcmData[i] = sample;

        if (self->enableSideTone) {
            float fsample = sample / 32768.0f;
            self->sideToneBuffer.push_back(fsample);
        }
    }

    static unsigned char encoded[4096];
    int encoded_len = opus_encode(self->codec->encoder,
        pcmData.data(),
        sampleCount,
        encoded,
        sizeof(encoded));
    if (encoded_len == OPUS_BAD_ARG) {
        printf("[Speaker] Opus: bad argument\n");
        return TRUE;
    }
    else if (encoded_len == OPUS_BUFFER_TOO_SMALL) {
        printf("[Speaker] Opus: buffer too small\n");
        return TRUE;
    }
    else if (encoded_len <= 0) {
        printf("[Speaker] Opus encode error: %d\n", encoded_len);
        return TRUE;
    }
    VoicePacketDataHeader packet;
    packet.type = VOICE_PACKET_VOICE_DATA;
    packet.playerId = samp->GetLocalPlayerID();
    packet.timestamp = static_cast<uint32_t>(
        std::chrono::duration_cast<std::chrono::milliseconds>(
            std::chrono::steady_clock::now().time_since_epoch()
        ).count()
        );
    static uint16_t localSequence = 0;
    packet.sequence = localSequence++;
    if (localSequence >= 65535) {
        localSequence = 0;
    }
    if (encoded_len > sizeof(packet.voiceData)) {
        printf("[Speaker] Encoded data too large: %d\n", encoded_len);
        return TRUE;
    }
    packet.dataSize = static_cast<uint16_t>(encoded_len);
    memcpy(packet.voiceData, encoded, encoded_len);
    {
        std::lock_guard<std::mutex> lock(self->queueMutex);
        if (self->packetsQueue.size() >= 100) {
            self->packetsQueue.pop();
        }
        self->packetsQueue.push(packet);
    }
    self->cv.notify_one();
    printf("[Speaker] Encoded: %d samples -> %d bytes\n", sampleCount, encoded_len);

    return TRUE;
}

For some reason, the sound I get at the output is cropped, generally not high-quality, in a word.
What am I doing wrong?!

Ian @ un4seen

Are you hearing lots of little cracks/stutters in the output? If so, it may be that you aren't feeding data to your "push" streams via BASS_StreamPutData quickly enough. Perhaps those streams could use a STREAMPROC callback function instead? BASS would then request data from the streams when it's needed.

Also note that these calls will be failing because they are on decoding channels (BASS_STREAM_DECODE), which don't have a playback buffer:

BASS_ChannelSetAttribute(sideToneStream, BASS_ATTRIB_BUFFER, 0.10f);
BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_BUFFER, 0.18f);

The BASS_ATTRIB_BUFFER value should instead be set on "mixerStream". I would also suggest changing the value to 0 (to disable playback buffering) if you want minimal latency. Please see the BASS_ATTRIB_BUFFER documentation for more info.

zahar

outputStream = BASS_StreamCreate(sampleRate, 1, BASS_SAMPLE_FLOAT | BASS_STREAM_DECODE, STREAMPROC_PUSH, NULL);

Oh, are you suggesting using your callbacks instead of STREAMPROC_PUSH?

zahar

Oh, you mentioned that data is not transferred to streams too fast, I have an explanation for this. There is a Record function, and for some reason it is called with an interval of one second, although there should definitely be more than one second between calls. How?

zahar

Also, if there is a similar topic with voice playback, then I think not to delay for a long time and send me there if there is already a solution to a similar problem as mine.


zahar

And another question, is there a BASS_ChannelGetData function, in theory, can I get the optimal amount of data to reproduce the sound without fragments? Or won't it work out that way because the Record stream is called every second?

zahar

UPDATE: I have reduced the recording delay to 50 MS. The question is, what is the right way to push data into the output stream now?

Ian @ un4seen

What parameters are you using in your BASS_RecordStart call? The default RECORDPROC callback period is 100ms but that can be changed in the HIWORD of the "flags" parameter.

If you're outputting the recording in realtime, and you want to minimize latency in that, then I would suggest removing the RECORDPROC and "push" stream, and instead use a STREAMPROC that pulls data from the recording when needed. Something like this:

input = BASS_RecordStart(freq, chans, 0, RECORDPROC_NONE, 0); // start recording without a callback
output = BASS_StreamCreate(freq, chans, 0, StreamProc, 0); // create output stream
BASS_ChannelSetAttribute(output, BASS_ATTRIB_BUFFER, 0); // disable playback buffering on it
BASS_ChannelStart(output); // start it

...

DWORD CALLBACK StreamProc(HSTREAM handle, void *buffer, DWORD length, void *user)
{
    return BASS_ChannelGetData(input, buffer, length); // get data from the recording
}

Please see the documentation for details on the mentioned functions.

zahar

Quote from: Ian @ un4seenWhat parameters are you using in your BASS_RecordStart call? The default RECORDPROC callback period is 100ms but that can be changed in the HIWORD of the "flags" parameter.

If you're outputting the recording in realtime, and you want to minimize latency in that, then I would suggest removing the RECORDPROC and "push" stream, and instead use a STREAMPROC that pulls data from the recording when needed. Something like this:

input = BASS_RecordStart(freq, chans, 0, RECORDPROC_NONE, 0); // start recording without a callback
output = BASS_StreamCreate(freq, chans, 0, StreamProc, 0); // create output stream
BASS_ChannelSetAttribute(output, BASS_ATTRIB_BUFFER, 0); // disable playback buffering on it
BASS_ChannelStart(output); // start it

...

DWORD CALLBACK StreamProc(HSTREAM handle, void *buffer, DWORD length, void *user)
{
    return BASS_ChannelGetData(input, buffer, length); // get data from the recording
}

Please see the documentation for details on the mentioned functions.

Hi. Jan, will this output sound?

zahar

outputStream = BASS_StreamCreate(
    sampleRate,
    1,
    BASS_SAMPLE_FLOAT,
    STREAMPROC_PUSH,
    this
);

if (outputStream) {
    BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_BUFFER, 0);
    BASS_ChannelStart(outputStream);

    BASS_ChannelPlay(outputStream, 0);

    BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_VOL, 1.0f);

    printf("Output stream successfully created!\n");
}

BOOL speaker::Record(HRECORD handle, const void* buffer, DWORD length, void* user)
{
    speaker* spk = static_cast<speaker*>(user);
    //if (!self || !self->codec || !self->codec->encoder || !AVSSync::microphone) {
    //}


    if (!buffer || length == 0) {
        return TRUE;
    }

    const float* floatBuffer = static_cast<const float*>(buffer);
    int sampleCount = length / sizeof(float);

    static auto lastCall = std::chrono::steady_clock::now();
    auto now = std::chrono::steady_clock::now();
    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastCall).count();
    lastCall = now;
    printf("[Record] Callback after %lld ms, total samples: %d\n", ms, sampleCount);

    if (spk->outputStream) {
        DWORD put = BASS_StreamPutData(spk->outputStream, buffer, length);

        printf("put: %d\n", put);

        if (put == -1)
        {
            printf("PutData failed / queue full: %d\n", BASS_ErrorGetCode());
        }
    }
}

I tried to push data through STREAMPROC_PUSH, the data is successfully streaming, but I can't hear my voice...

zahar

I was finally able to bring out the voice, but I hear it with some artifacts, crackling, I don't know what to call it at all..

if (!BASS_Init(device, 44100, (BASS_DEVICE_STEREO) | flags, window, dsguid)) {
    printf("[Speaker] BASS initialization error: %d\n", BASS_ErrorGetCode());
    return false;
}

outputStream = BASS_StreamCreate(
    sampleRate,
    2,
    BASS_SAMPLE_FLOAT,
    STREAMPROC_PUSH,
    this
);

if (outputStream) {
    BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_BUFFER, 0);
    BASS_ChannelStart(outputStream);
    BASS_ChannelSetAttribute(outputStream, BASS_ATTRIB_VOL, 1.0f);

    BASS_ChannelPlay(outputStream, 0);

    printf("Output stream successfully created!\n");
}

BOOL speaker::Record(HRECORD handle, const void* buffer, DWORD length, void* user)
{
    speaker* spk = static_cast<speaker*>(user);
    //if (!self || !self->codec || !self->codec->encoder || !AVSSync::microphone) {
    //}


    if (!buffer || length == 0) {
        return TRUE;
    }

    const float* floatBuffer = static_cast<const float*>(buffer);
    int sampleCount = length / sizeof(float);

    static auto lastCall = std::chrono::steady_clock::now();
    auto now = std::chrono::steady_clock::now();
    auto ms = std::chrono::duration_cast<std::chrono::milliseconds>(now - lastCall).count();
    lastCall = now;
    printf("[Record] Callback after %lld ms, total samples: %d\n", ms, sampleCount);

    if (spk->outputStream)
    {
        DWORD put = BASS_StreamPutData(spk->outputStream, buffer, length);

        static int counter = 0;
        if (++counter % 50 == 0)
        {
            printf("Put %u bytes → returned %u | error if -1: %d\n",
                length, put, put == -1 ? BASS_ErrorGetCode() : 0);

            //QWORD queued = BASS_ChannelGetLength(spk->outputStream, BASS_POS_BYTE);
            //printf("Buffered bytes: %llu (%.1f ms)\n", queued,
            //    BASS_ChannelBytes2Seconds(spk->outputStream, queued) * 1000);
        }

        if (put == -1)
        {
            printf("PutData error: %d\n", BASS_ErrorGetCode());
        }
    }
}

bool speaker::StartRecording(int device)
{
    printf("[Speaker] Starting recording...\n");

    if (!BASS_RecordInit(-1)) {
        printf("[Speaker] ERROR: RecordInit failed: %d\n", BASS_ErrorGetCode());
        return false;
    }

    BASS_SetConfig(BASS_CONFIG_REC_BUFFER, 100);

    recordStream = BASS_RecordStart(
        44100,
        1,
        BASS_SAMPLE_FLOAT,
        Record,
        this
    );

    if (!recordStream) {
        printf("[Speaker] ERROR: RecordStart failed: %d\n", BASS_ErrorGetCode());
        BASS_RecordFree();
        return false;
    }

    BASS_CHANNELINFO info;
    if (BASS_ChannelGetInfo(recordStream, &info)) {
        printf("[Speaker] Recording started: %d Hz, channels: %d, format: FLOAT\n",
            info.freq, info.chans);
    }

    return true;
}

Ian @ un4seen

Quote from: zaharHi. Jan, will this output sound?

Yes, it plays the data received from the recording.