Author Topic: My BASS project with Google Speech Recognition  (Read 44 times)

Luiz

  • Posts: 11
Hi,
I modified the Record Test Demo to work with Google Cloud Speech Recognition.

My problem is that the app sometimes works but other times it stops with a AV and I have no detailed debug info, only CPU Hexa info.

I have attached  the project.

Please, add Bass dlls and Indy dlls: libeay32 and ssleay32  to project folder. I get rid off these dlls because the forum limits to send attachments.

Some help or advice will be appreciated.

Here is a snippet of the code(Please see full code in attachments):

Code: [Select]
(*
 *  BASS Recording example for Delphi
 *)

function RecordingCallback(Handle: HRECORD; buffer: Pointer; length: DWORD; user: Pointer): boolean; stdcall;
var level:single;
begin
BASS_ChannelGetLevelEx(handle, @level, 0.1, BASS_LEVEL_MONO); // get the current level
if (not recording and (level >= threshold)) then begin // sound started
   recording := TRUE;
   // start recording here
       Form1.Statusbar1.Panels[0].Text:='Recording';
  end;
if (recording) then begin
// store data here
    // Copy new buffer contents to the memory buffer
  Form1.WaveStream.Write(buffer^, length);
  if (level < threshold) then begin // too quiet
if (GetTickCount() - lastsound >= 2000) then begin // too quiet for 2 seconds
    //stop recording here
            recording := FALSE;
            Form1.Statusbar1.Panels[0].Text:='Starting speech goggle recognize';
            Form1.SendAudioFlacToGoogle;
            Form1.Statusbar1.Panels[0].Text:='Sending audio to goggle recognize';
end
end else
lastsound := GetTickCount();
  end;
  // Allow recording to continue
result:= recording; //TRUE;
end;


function FlacCallback(Handle: HENCODE; channel:DWORD;buffer: Pointer; length: DWORD; oofset: QWORD; user: Pointer): boolean; stdcall;
var level:dword;
begin
    // Copy new buffer contents to the memory buffer
Form1.FlacStream.Write(buffer^, length);
    // Allow recording to continue
Result := True;
end;

procedure TForm1.SendAudioFlacToGoogle;
var chanToRecog:HSTREAM;
var
i: integer;
  obj:TStrObj;
  Buffer: array [0..1024] of Byte;
begin
WaveStream.Position := 4;
i := WaveStream.Size - 8;
WaveStream.Write(i, 4);
i := i - $24;
WaveStream.Position := 40;
WaveStream.Write(i, 4);
WaveStream.Position := 0;
  //create stream from recorded data
  Form1.Statusbar1.Panels[0].Text:='Staring create stream from recorded data';
  chanToRecog := BASS_StreamCreateFile(True, WaveStream.Memory, 0, WaveStream.Size, BASS_MUSIC_DECODE or BASS_UNICODE);
  Form1.Statusbar1.Panels[0].Text:='Finish create stream from recorded data ';

  Form1.Statusbar1.Panels[0].Text:='Creating flac';
  FlacStream.Clear;
  BASSEnc_FLAC.BASS_Encode_FLAC_Start(chanToRecog, 0, BASS_ENCODE_FP_AUTO or BASS_UNICODE, @FlacCallBack, nil);
  Form1.Statusbar1.Panels[0].Text:='Starting encoder flac';

  while BASS_ChannelIsActive(chanToRecog) > 0 do
  begin
    BASS_ChannelGetData(chanToRecog, @Buffer, 1024);
    application.ProcessMessages;
  end;
  BASS_Encode_Stop(chanToRecog);
  BASS_StreamFree(chanToRecog);
  Form1.Statusbar1.Panels[0].Text:='Finishing flac creation';

  //flac ok - make base64 flac ands send to thread recognition
  FlacStream.position:=0;
  obj:=TStrObj.Create(TIdEncoderMIME.EncodeStream(FlacStream));
  Form1.Statusbar1.Panels[0].Text:='Adding flac to recognition thread';
  //add flac in base64 to thread recognition queue
  VoiceThread.AdicionarItem(obj);
  Form1.Statusbar1.Panels[0].Text:='Restarting Recording';
  Form1.StartRecording;

end;


(* Initialize BASS, form controls, memory stream *)
procedure TForm1.FormCreate(Sender: TObject);
var
  i: Integer;
  dName: PAnsiChar;
  level: Single;
begin
// check the correct BASS was loaded
  Memo1.Clear;
  Memo2.Clear;
if (HIWORD(BASS_GetVersion) <> BASSVERSION) then
begin
MessageBox(0,'An incorrect version of BASS.DLL was loaded', nil,MB_ICONERROR);
Halt;
end;
if (not BASS_RecordInit(-1)) or (not BASS_Init(-1, 44100, 0, Handle, nil)) then
begin
BASS_RecordFree;
BASS_Free();
MessageDlg('Cannot start default recording device!', mtError, [mbOk], 0);
Halt;
end;
WaveStream := TMemoryStream.Create;
  FlacStream:= TMemoryStream.Create;
i := 0;
dName := BASS_RecordGetInputName(i);
while dName <> nil do
begin
ComboBox1.Items.Add(StrPas(dName));
// is this one currently "on"?
if (BASS_RecordGetInput(i, level) and BASS_INPUT_OFF) = 0 then
        ComboBox1.ItemIndex := i;
Inc(i);
dName := BASS_RecordGetInputName(i);
end;
    ComboBox1Change(Self); // display info

  VoiceThread:=TVoiceThread.Create(TheCallback);
  VoiceThread.Start;
end;


procedure TForm1.StopRecording;
var chanToRecog:HSTREAM;
i: integer;
  he:BassEnc.HENCODE;
  Buffer: array [0..1024] of Byte;
begin
//if BASS_ChannelIsActive(rchan) <> 0 then BASS_ChannelStop(rchan);
bGrava.Caption := 'Record';
// complete the WAV header
WaveStream.Position := 4;
i := WaveStream.Size - 8;
WaveStream.Write(i, 4);
i := i - $24;
WaveStream.Position := 40;
WaveStream.Write(i, 4);
WaveStream.Position := 0;
  //create stream from the recorded data
//chan := BASS_StreamCreateFile(True, WaveStream.Memory, 0, WaveStream.Size, 0);

  //create stream from the recorded data to encode wav to flac
  chanToRecog := BASS_StreamCreateFile(True, WaveStream.Memory, 0, WaveStream.Size, BASS_MUSIC_DECODE or BASS_UNICODE);

  FlacStream.Clear;
  BASSEnc_FLAC.BASS_Encode_FLAC_Start(chanToRecog, 0, BASS_ENCODE_FP_AUTO or BASS_UNICODE, @FlacCallBack, nil);

  while BASS_ChannelIsActive(chanToRecog) > 0 do
  begin
    BASS_ChannelGetData(chanToRecog, @Buffer, 1024);
    application.processmessages;
  end;
  //stop and free channel
  BASS_Encode_Stop(chanToRecog);
  BASS_StreamFree(chanToRecog);
end;

procedure TForm1.StartRecording;
begin
if ComboBox1.ItemIndex < 0 then Exit;
if WaveStream.Size > 0 then begin // free old recording
     //BASS_StreamFree(chan);
WaveStream.Clear;
     FlacStream.Clear;
end;
// generate header for WAV file
with WaveHdr do  begin
riff := 'RIFF';
len := 36;
cWavFmt := 'WAVEfmt ';
dwHdrLen := 16;
wFormat := 1;
wNumChannels := 2;
dwSampleRate := 16000;
wBlockAlign := 4;
dwBytesPerSec := 176400;
wBitsPerSample := 16;
cData := 'data';
dwDataLen := 0;
    end;
WaveStream.Write(WaveHdr, SizeOf(WAVHDR));
// start recording @ 44100hz 16-bit stereo
rchan := BASS_RecordStart(16000, 2, 0, @RecordingCallback, nil);
if rchan = 0 then begin
MessageDlg('Couldn''t start recording!', mtError, [mbOk], 0);
WaveStream.Clear;
    FlacStream.Clear;
end else bGrava.Caption := 'Stop';
end;

procedure TForm1.bGravaClick(Sender: TObject);
begin
if BASS_ChannelIsActive(rchan) <> 0 then
    StopRecording
  else StartRecording;
end;



{ TVoiceThread }

procedure TVoiceThread.AdicionarItem(poItem: TObject);
begin
  //Enter Critical section
  Self.FCritical.Enter;
  try
    //Put item in Queue
    Self.FQueue.Push(poItem);
    //Trigger TEvent
    Self.FEvent.SetEvent;
  finally
   //Exit Critical section
    Self.FCritical.Release;
  end;
end;

procedure TVoiceThread.AfterConstruction;
begin
  inherited;
  //Crticial secion to access queue objects
  Self.FCritical := TCriticalSection.Create;

  //Queue to be processed
  Self.FQueue    := TObjectQueue.Create;

  //Trigger
  Self.FEvent    := TEvent.Create(nil,False,True,'_exemploevent');
end;

procedure TVoiceThread.BeforeDestruction;
begin
  inherited;
  Self.FCritical.Free;
  Self.FQueue.Free;
  Self.FEvent.Free;
  Self.HTTPClient.Free;
end;

constructor TVoiceThread.Create(aCallback: TMyCallback);
begin
  inherited Create(true);
  FCallback := aCallback;
  HTTPClient := TIdHTTP.Create(nil);
  SSL := TIdSSLIOHandlerSocketOpenSSL.Create(HTTPClient);
  SSL.SSLOptions.SSLVersions := [sslvTLSv1, sslvTLSv1_1, sslvTLSv1_2];
  HTTPClient.IOHandler := SSL;
  HTTPClient.AllowCookies := True;
  HTTPClient.HandleRedirects := true;
  HTTPClient.Request.ContentType := 'application/json';
end;

procedure TVoiceThread.Execute;
var eEvent : TWaitResult;
    s:string;
    ob:TStrObj;
begin
  inherited;
  while not (Self.Terminated) do  begin
      //wait event trigger
      eEvent := Self.FEvent.WaitFor(INFINITE);

      case eEvent of
      //Process Queue when event was triggered
      wrSignaled: begin
       //Check if exists items in the queue
        if (Self.FQueue.Count > 0) then begin
         //Enter in  Critical Section
          Self.FCritical.Enter;
          try
            while (Self.FQueue.Count > 0) do begin
              Sleep(10);
              //decode stream
              ob:=TStrObj(Self.FQueue.Pop);
              s:=ob.Value;
              GoogleRecognize(s);
              ob.Free;
              //Self.FQueue.Pop.Free;
            end;
          finally
            //Exit Critical section
            Self.FCritical.Release;
          end;
         end;
      end;

      wrTimeout:begin
       //Timeout
       Continue;
      end;

      wrAbandoned,wrError,wrIOCompletion:begin
       Abort;
      end;

     end;
  end;
end;

procedure TForm1.TheCallback(const msg : String);
begin
  Memo2.Text:=Memo2.Text+msg;
end;

procedure TVoiceThread.GoogleRecognize(base64:string);
var
  s,Response, url: String;
  st:TstringStream;
  Binary: AnsiString;
  Jso,JsoAlt:TJsonObject;
  Jsa:TJsonArray;
begin
    url:='https://speech.googleapis.com/v1p1beta1/speech:recognize?key='+GoogleAPIKey;
    s:='{"config":'+
    '{'+
    '  "encoding":"FLAC",'+
    '  "sampleRateHertz": 16000,'+
    '  "audioChannelCount": 2,'+
    '  "enableSeparateRecognitionPerChannel": true,'+
    '  "languageCode": "pt-BR"'+
    '},'+
    '"audio": {'+
     ' "content":"'+base64  +'"'+
     '}'+
   '}';

    st := TStringStream.Create(s);
    st.Position := 0;
    try
     try
      Response := HTTPClient.Post(url,st);
      try
       Jso := TJSONObject.ParseJSONValue( TEncoding.UTF8.GetBytes(Response), 0) as TJSONObject;
       Jsa := jso.Get('results').JsonValue as TJsonArray;
       JsoAlt := Jsa.Items[0] as TJSONObject; //alternatives
       Jsa := JsoAlt.GetValue<TJsonArray>('alternatives');
       JsoAlt := Jsa.Items[0] as TJSONObject; //prim objeto de alternative
       UpdateTexto(JsoAlt.GetValue<string>('transcript'));
      finally
        Jso.Free;
      end;
     except
       //on e:exception do showmessage(e.Message);
     end;
    finally
      st.free;
    end;
end;

procedure TVoiceThread.UpdateTexto(s: string);
begin
  if not Assigned(FCallback) then
    Exit;
  Self.Queue(  // Executed later in the main thread
    procedure
    begin
      FCallback(s);
    end
  );
end;

{ TStrObj }

constructor TStrObj.Create(s: string);
begin
  Value:=s;
end;

end.
« Last Edit: 10 Sep '18 - 13:21 by Luiz »