Author Topic: read mp3 from byte[] and encode to pcm as byte[] without writing to a file.  (Read 881 times)

serkanp

  • Posts: 135
hi,
i am using c# dotnet core..
i have a server app that will get byte[] mp3 data ,
i want to encode to 16000hz mono pcm without writing to file, i just want to get encoded pcm byte[]

i tried this but not get success.
need help
 
Code: [Select]
     public async Task<byte[]> GetFromMp3File(byte[] bytes)
     {
         try
         {
             // Initialize BASS
             if (!Bass.BASS_Init(-1, 44100, BASSInit.BASS_DEVICE_DEFAULT, IntPtr.Zero))
                 throw new Exception("BASS initialization failed");
             int stream = Bass.BASS_StreamCreatePush(16000, 1, BASSFlag.BASS_STREAM_DECODE | BASSFlag.BASS_STREAM_AUTOFREE, IntPtr.Zero);
           
             if (stream == 0)
                 throw new Exception("Error creating BASS stream.");

             // Decode the MP3 stream to WAV
             using var inputStream = new MemoryStream(bytes);
             inputStream.Seek(0, SeekOrigin.Begin);
             using var wavStream = new MemoryStream();
            int encoder= BassEnc.BASS_Encode_Start(stream, null, BASSEncode.BASS_ENCODE_PCM , null, IntPtr.Zero);
             var err = Bass.BASS_ErrorGetCode();
             byte[] bufferOut = new byte[65536]; // adjust the size as needed
             byte[] bufferIn = new byte[65536]; // adjust the size as needed
             int readIn = 0;
             int totalReadIn = bytes.Length;
             
             while (true)
             {
                 if (readIn < totalReadIn)
                 {
                     readIn = await inputStream.ReadAsync(bufferIn, readIn, bufferIn.Length);
                     var putIn=Bass.BASS_StreamPutData(stream, bufferIn, readIn);
                 }

                 int bytesRead = Bass.BASS_ChannelGetData(encoder, bufferOut, bufferOut.Length);
                 if (bytesRead == 0) break;

               if(bytesRead>0)  await wavStream.WriteAsync(bufferOut, 0, bytesRead);
             }

             BassEnc.BASS_Encode_Stop(stream);
             Bass.BASS_StreamFree(stream);
             Bass.BASS_Free();
             wavStream.Seek(0, SeekOrigin.Begin);   
             var b= wavStream.ToArray();
             return b;

         }
         catch (Exception ex)
         {
             // _logger.LogError(ex, "Error in GetFromMp3");
             return new byte[0];
         }
     }


serkanp

  • Posts: 135
@ian need your assistance now :)
i solved to convert mp3 byte[] to 16000hz mono wav.

Code: [Select]
     public async Task<byte[]> test(byte[] bytes)
     {
         
         try
         {
             var result = Bass.BASS_Init(-1, 44100, BASSInit.BASS_DEVICE_DEFAULT, IntPtr.Zero);
             if (!result)
                 throw new Exception("BASS initialization failed");
             byte[] buffer;
             long length;
             using (var fs = new MemoryStream(bytes))
             {
                 length = fs.Length;
                 buffer = new byte[length];
                 fs.Read(buffer, 0, (int)length);
             }
             GCHandle _hGCFile = GCHandle.Alloc(buffer, GCHandleType.Pinned);
             int mixer=BassMix.BASS_Mixer_StreamCreate(16000, 1, BASSFlag.BASS_MIXER_CHAN_DOWNMIX | BASSFlag.BASS_STREAM_DECODE );
             int stream = Bass.BASS_StreamCreateFile(_hGCFile.AddrOfPinnedObject(),
                               0L, length, BASSFlag.BASS_SAMPLE_FLOAT | BASSFlag.BASS_STREAM_DECODE | BASSFlag.BASS_SAMPLE_FLOAT );
             var mixResult = BassMix.BASS_Mixer_StreamAddChannel(mixer, stream, BASSFlag.BASS_DEFAULT );
             using var myWave = new MemoryStream();
             ENCODEPROC eNCODEPROC = new ENCODEPROC(async (int handle, int channel, nint buffer, int length, nint user) =>
             {
                 var data = new byte[length];

                 Marshal.Copy(buffer, data, 0, length);
                 await myWave.WriteAsync(data, 0, length);
             });

             var encoder = BassEnc.BASS_Encode_Start(mixer, null, BASSEncode.BASS_ENCODE_PCM , eNCODEPROC, IntPtr.Zero);

             var inBuffer = new byte[32768];
             Bass.BASS_ChannelPlay(mixer, false);
             while (stream != 0 &&  Bass.BASS_ChannelIsActive(stream)== BASSActive.BASS_ACTIVE_PLAYING)
             {
                 var data= Bass.BASS_ChannelGetData(mixer, inBuffer, inBuffer.Length);

                 if (data == 0) break;
                 await Task.Delay(10);
             }
             Bass.BASS_StreamFree(stream);
             _hGCFile.Free();
             return myWave.ToArray();
             
         }
         catch (Exception ex)
         {
             // _logger.LogError(ex, "Error in GetFromMp3");
             return new byte[0];
         }
     }
now i have another problem.
i am using whisper ai to speech to text..

whisper did not like the generated file
then i used ffmpeg to convert the file

Code: [Select]
ffmpeg -i test2.wav -vn -ar 16000 -ac 1  test16khz.wav

now whisper says ok to test16khz.wav

then i compared files with ffmpeg:
Code: [Select]
Input #0, wav, from 'test16khz.wav':
  Duration: 00:00:09.48, bitrate: 256 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 16000 Hz, 1 channels, s16, 256 kb/s
 
  Input #0, wav, from 'test2.wav':
  Duration: 00:00:09.48, bitrate: 256 kb/s
  Stream #0:0: Audio: pcm_s16le ([1][0][0][0] / 0x0001), 16000 Hz, 1 channels, s16, 256 kb/s

then i compared files with bass:

Code: [Select]
var result = Bass.BASS_Init(-1, 44100, BASSInit.BASS_DEVICE_DEFAULT, IntPtr.Zero);
 if (!result)
     throw new Exception("BASS initialization failed");
 var f1= Bass.BASS_StreamCreateFile("test16khz.wav", 0, 0, BASSFlag.BASS_DEFAULT);
 var f2 = Bass.BASS_StreamCreateFile("test2.wav", 0, 0, BASSFlag.BASS_DEFAULT);
 var info=Bass.BASS_ChannelGetInfo(f1);
 var info2 = Bass.BASS_ChannelGetInfo(f2);

seems same..
but whisper says to test2.wav
Code: [Select]
Invalid wave file, the size is too small and couldn't read all the samples.

files are attached

radio42

  • Posts: 4840
It is probably, that your raw PCM byte[] doesn't contain correct WAV header information - that's all.
As you are using Bass.Net, you might try the WaveWriter class, which will do the job for you (I assume as ffmpeg).

However, OpenAI's Whister API also accepts MP3 files, as it has a quite strict size limitation of 25MB. So why do you want to convert it to WAV first? But that is a different question...

But just to understand your use case, why don't you write/encode to a file, as whisper ai needs a file as input anyhow? Wouldn't that make your life easier?
« Last Edit: 2 Oct '23 - 19:01 by radio42 »

Ian @ un4seen

  • Administrator
  • Posts: 26177
but whisper says to test2.wav
Code: [Select]
Invalid wave file, the size is too small and couldn't read all the samples.

files are attached

The problem is that the length in the WAV header is incorrect. When writing to a file, BASSenc will update the WAV header when it's finished, but that isn't possible with an ENCODEPROC. You could try using BASS_Encode_StartLimit (instead of BASS_Encode_Start) and set the "limit" parameter to the correct length, eg. the length of the mixer. You should also set the BASS_MIXER_END flag on the mixer and use the mixer handle in your loop's BASS_ChannelIsActive call.

serkanp

  • Posts: 135
It is probably, that your raw PCM byte[] doesn't contain correct WAV header information - that's all.
As you are using Bass.Net, you might try the WaveWriter class, which will do the job for you (I assume as ffmpeg).

However, OpenAI's Whister API also accepts MP3 files, as it has a quite strict size limitation of 25MB. So why do you want to convert it to WAV first? But that is a different question...

But just to understand your use case, why don't you write/encode to a file, as whisper ai needs a file as input anyhow? Wouldn't that make your life easier?

whisper.net does not accept mp3 :) it says only wav files accepted.. the examples that they provide uses naudio to convert mp3 to wav and it decreases the wav quality alot.. (i am already using it)
so i wanted to use bass for that.. and yes i am using bass.net :)
i'll try the wavewriter..  hope it will work on linux also..

edit:
wavewriter only writes to file.. without writing to file, is it possible to write it to stream and get byte[] result?
when i decompiled wavewriter i saw lots of work on writeWaveHeader functions etc..
and you are using a BinaryWriter to write to file..
so how to do it with a stream?

can you provide an example?
« Last Edit: 3 Oct '23 - 09:07 by serkanp »

serkanp

  • Posts: 135
but whisper says to test2.wav
Code: [Select]
Invalid wave file, the size is too small and couldn't read all the samples.

files are attached

The problem is that the length in the WAV header is incorrect. When writing to a file, BASSenc will update the WAV header when it's finished, but that isn't possible with an ENCODEPROC. You could try using BASS_Encode_StartLimit (instead of BASS_Encode_Start) and set the "limit" parameter to the correct length, eg. the length of the mixer. You should also set the BASS_MIXER_END flag on the mixer and use the mixer handle in your loop's BASS_ChannelIsActive call.

how do i get the correct length?  or the length of the mixer?
@radio42 bass.net does not have BassEnc.BASS_Encode_StartLimit feature by the way.. will you add it?
@ian can i get an example (ex: read byte[] mp3 and convert it to 16000hz , 1 channel pcm)? it can be c++ , i can convert it to c#
« Last Edit: 3 Oct '23 - 09:48 by serkanp »

serkanp

  • Posts: 135
It is probably, that your raw PCM byte[] doesn't contain correct WAV header information - that's all.
As you are using Bass.Net, you might try the WaveWriter class, which will do the job for you (I assume as ffmpeg).

However, OpenAI's Whister API also accepts MP3 files, as it has a quite strict size limitation of 25MB. So why do you want to convert it to WAV first? But that is a different question...

But just to understand your use case, why don't you write/encode to a file, as whisper ai needs a file as input anyhow? Wouldn't that make your life easier?

i got the wavewriter class and modified a bit, changed fileWriter to memorystream, but the byte[] is corrupted..
here is the modified version in attachment

and this is how i used:
Code: [Select]
            public async Task<byte[]> test2(byte[] bytes)
      {

          try
          {
              var result = Bass.BASS_Init(-1, 44100, BASSInit.BASS_DEVICE_DEFAULT, IntPtr.Zero);
              if (!result)
                  throw new Exception("BASS initialization failed");
              long length=bytes.Length;
              GCHandle _hGCFile = GCHandle.Alloc(bytes, GCHandleType.Pinned);
              int mixer = BassMix.BASS_Mixer_StreamCreate(16000, 1,   BASSFlag.BASS_STREAM_DECODE  | BASSFlag.BASS_MIXER_END );
              int stream = Bass.BASS_StreamCreateFile(_hGCFile.AddrOfPinnedObject(),
                                0L, length,  BASSFlag.BASS_STREAM_DECODE );
              var mixResult = BassMix.BASS_Mixer_StreamAddChannel(mixer, stream, BASSFlag.BASS_STREAM_DECODE );
               
              var inBuffer = new byte[32768];
              Bass.BASS_ChannelPlay(mixer, false);
             
             using WaveWriterStream waveWriter= new WaveWriterStream(mixer,true);
              while (mixer != 0 && Bass.BASS_ChannelIsActive(mixer) == BASSActive.BASS_ACTIVE_PLAYING)
              {
                  var dataRead = Bass.BASS_ChannelGetData(mixer, inBuffer, inBuffer.Length);
                  if (dataRead == 0 || dataRead==-1) break;
                  waveWriter.Write(inBuffer,dataRead);   
                 
                  await Task.Delay(10);
              }
              var arr= waveWriter.ToArray();
              Bass.BASS_StreamFree(stream);
             
              _hGCFile.Free();
           
              return arr;

          }
          catch (Exception ex)
          {
              // _logger.LogError(ex, "Error in GetFromMp3");
              return new byte[0];
          }
      }

« Last Edit: 3 Oct '23 - 10:12 by serkanp »

serkanp

  • Posts: 135
But just to understand your use case, why don't you write/encode to a file, as whisper ai needs a file as input anyhow? Wouldn't that make your life easier?


Code: [Select]

using var whisperFactory = WhisperFactory.FromPath(path);
using var processor = whisperFactory.CreateBuilder()
    .WithLanguage(language)
    //.WithThreads(50)
    .WithTemperature(0.5f)
.Build();


await foreach (var result in processor.ProcessAsync(stream)) //<--this is stream..
{
    Console.WriteLine($"{result.Start}->{result.End}: {result.Text}");
     

}

i am using a rest service.. mobile app sends mp3 or pcm file to rest service as byte[]
if i do what you say, first i have to write it to a file, then convert it to pcm if mp3,
then write pcm to file again, then send it to whisper.net and it will read file again..
then i will get the result from whisper.net then delete the file..
so there are alot of disk io ..
i have enough ram and cpu and gpu to do it.. why do i use disk io? :)

radio42

  • Posts: 4840
Quote
@radio42 bass.net does not have BassEnc.BASS_Encode_StartLimit feature by the way.. will you add it?
There is no extra BASS_Encode_StartLimit method, as the extra limit param is contained as an overload of the BASS_Encode_Start method - so nothing to add, it is already contained and available.
(in BASS there are no overloads available, hence Ian needs to add new methods when an extra param is added)



Quote
i am using a rest service.. mobile app sends mp3 or pcm file to rest service as byte[]
if i do what you say, first i have to write it to a file, then convert it to pcm if mp3,
then write pcm to file again, then send it to whisper.net and it will read file again..
The Whisper API accepts the WAV and MP3 format - so why do you want/need to convert that as an intermediate step? That was my question...
I.e. when you receive mp3 data from your mobile app, save it as an mp3 file and send it directly to Whisper.
And when you receive pcm data from your mobile app, save it as an wav file and send it directly to Whisper.
This might save IO and CPU...? But there is for sure a good reason you always want to convert all to a WAV file.

But when you want to save the extra step of saving the received data to a file (either wav or mp3), but send the data directly (so that the Whisper API - which I don't know in detail - doesn't have to read that file); then you might also do that without re-encoding the mp3?! I.e. take the received data and use it 1:1 to send it to Whisper?
« Last Edit: 3 Oct '23 - 11:22 by radio42 »

serkanp

  • Posts: 135
"whisper.net" only accepts 16000 hz wav pcm . not the python version.. c# version requires it..
and it only takes stream

radio42

  • Posts: 4840
Quote
"whisper.net" only accepts 16000 hz wav pcm . not the python version.. c# version requires it..
and it only takes stream
I didn't know that, but that is quite a limitation of the whisper.net version - then I do understand your issue,

Ian @ un4seen

  • Administrator
  • Posts: 26177
how do i get the correct length?  or the length of the mixer?

If the BASS_MIXER_END flag is set on the mixer then BASS_ChannelGetLength will give you the mixer's length based on its current source(s).

What is the MP3 data's sample rate? If it's already 16000 Hz then you probably don't need a mixer just to convert it to mono. You can use the BASS_SAMPLE_MONO flag with BASS_StreamCreateFile to decode stereo MP3 files in mono.

serkanp

  • Posts: 135
thank you @ian and @radio42

i solved with an easy method, as @radio42 suggested i used wavewriter that i modified in post #124

Code: [Select]
public byte[] ConvertToPCM(byte[] bytes)
 {
     try
     {
         GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
         int length = bytes.Length;
         int stream = Bass.BASS_StreamCreateFile(handle.AddrOfPinnedObject(), 0, length, BASSFlag.BASS_STREAM_DECODE);
         WaveWriterStream waveWriter = new WaveWriterStream(stream,16000,16, true);
         short[] data = new short[32768];
         var datalen = data.Length;
         while (Bass.BASS_ChannelIsActive(stream) == BASSActive.BASS_ACTIVE_PLAYING)
         {
             int count = Bass.BASS_ChannelGetData(stream, data, datalen);
             if (count > 0)
                 waveWriter.Write(data, count);
         }
         var outdata=waveWriter.ToArray();
         waveWriter.Close();
         Bass.BASS_StreamFree(stream);
         handle.Free();
         return outdata;
     }
     catch (Exception ex)
     {
         return null;
     }
 }

problem solved..
now i have to figure the same for recording from microphone to mp3 with internal mp3 encoders.. if i remember correct, there were examples to do that..
if i record pcm directly on android, 8-10 seconds of wav becomes ~1mb, if i do it as mp3, it will be ~40kb , so with rest service, it will be better ..

radio42

  • Posts: 4840
As you basically transcribe speech via the whisper api, in my tests even an 8000Hz, 8bit, mono wave file or the same 24kpbs mono MP3 file delivered good results. A higher quality did not get better results.

serkanp

  • Posts: 135
As you basically transcribe speech via the whisper api, in my tests even an 8000Hz, 8bit, mono wave file or the same 24kpbs mono MP3 file delivered good results. A higher quality did not get better results.

did you use
<PackageReference Include="Whisper.net" Version="1.4.7" />
<PackageReference Include="Whisper.net.Runtime" Version="1.4.7" />
and did it take mp3?
on my side, the async version processor.ProcessAsync , it does not accept mp3, only accepts wave pcm stream


radio42

  • Posts: 4840
No, I was using the regular Whisper API. That’s why I mentioned wav as well, as I understood, that the .Net lib only supports waves data…

serkanp

  • Posts: 135
@radio42
there is a very very strange behaviour with your waveWriter :)
if you use wavewriter more then one at the same runtime.. first one writes correctly but the second one buggy..
hereis testcode
Code: [Select]
    public byte[] ConvertToPCMStream(byte[] bytes)
    {
        try
        {
            var q = Bass.BASS_GetDeviceInfos();
            if (!q[0].IsInitialized)
            {
                var bassStats = Bass.BASS_Init(0, 16000, BASSInit.BASS_DEVICE_DEFAULT, IntPtr.Zero);

            }
            GCHandle handle = GCHandle.Alloc(bytes, GCHandleType.Pinned);
            var handlePtr = handle.AddrOfPinnedObject();
            int length = bytes.Length;
            int mixer = BassMix.BASS_Mixer_StreamCreate(16000, 1, BASSFlag.BASS_STREAM_DECODE | BASSFlag.BASS_MIXER_CHAN_DOWNMIX);
            var error = Bass.BASS_ErrorGetCode();
            int stream = Bass.BASS_StreamCreateFile(handlePtr, 0, length, BASSFlag.BASS_STREAM_DECODE);
            BassMix.BASS_Mixer_StreamAddChannel(mixer, stream, BASSFlag.BASS_STREAM_DECODE);
            WaveWriter waveWriter = new WaveWriter("c:\\data\\testwave.wav", mixer,16000,16, true);
            short[] data = new short[32768];
            var datalen = data.Length;

            while (Bass.BASS_ChannelIsActive(stream) == BASSActive.BASS_ACTIVE_PLAYING)
            {
                int count = Bass.BASS_ChannelGetData(mixer, data, datalen);
                if (count > 0)
                    waveWriter.Write(data, count);
                if (count == 0)
                {
                    break;
                }

            }
            waveWriter.Close();
            Bass.BASS_StreamFree(stream);
            Bass.BASS_StreamFree(mixer);
            handle.Free();
            waveWriter.Dispose();
            waveWriter = null;
           
            File.WriteAllBytes("c:\\data\\test2.mp3", bytes);
            return outdata;
        }
        catch (Exception ex)
        {
            return null;
        }
    }

now use the test.mp3 in attachment
(i also put test wave files on first pass and second pass)
if you loop 2-3 times , the first convert is perfect.. wave outputs same sound with mp3
on the second convert, output size is almost same but the output is corrupted..


but the wierd part
if you change
Code: [Select]
WaveWriter waveWriter = new WaveWriter("c:\\data\\testwave.wav", mixer,16000,16, true);

to this
Code: [Select]
WaveWriter waveWriter = new WaveWriter("c:\\data\\testwave.wav", mixer, true);

it works every time..

what do you think?

radio42

  • Posts: 4840
You are using the constructor incorrectly... the only difference is, that the 2nd constructor overload calls internally BASS_ChannelGetInfo on the given stream to determine the BitsPerSample, NumChannels and SampleRate! I.e. it uses correct values.

Whereas the 1st overload you are using does NOT even exist in the way you are using it!
I.e. the overload you are using is this one (you are using the mixer handle as the 'numChannels', which is incorrect!)
Code: [Select]
public WaveWriter(string fileName, int numChannels, int sampleRate, int bitsPerSample, bool rewrite)
The first time you are using he code might by change use a mixer handle of 1, which by chance matches the correct number of channels...

So it is not the WaveWriter class which contains an error ;-)

serkanp

  • Posts: 135
You are using the constructor incorrectly... the only difference is, that the 2nd constructor overload calls internally BASS_ChannelGetInfo on the given stream to determine the BitsPerSample, NumChannels and SampleRate! I.e. it uses correct values.

Whereas the 1st overload you are using does NOT even exist in the way you are using it!
I.e. the overload you are using is this one (you are using the mixer handle as the 'numChannels', which is incorrect!)
Code: [Select]
public WaveWriter(string fileName, int numChannels, int sampleRate, int bitsPerSample, bool rewrite)
The first time you are using he code might by change use a mixer handle of 1, which by chance matches the correct number of channels...

So it is not the WaveWriter class which contains an error ;-)


ahhh.. got it.. my mistake.. thanks for correction :)))