createScriptProcessor and inputBuffer are deprecated, throws error
#clint.js gives deprecation error due to inputBuffer and createScriptProcessor.
// Request access to the microphone
navigator.mediaDevices.getUserMedia({ audio: true })
.then(stream => {
let audioContext = new AudioContext();
let source = audioContext.createMediaStreamSource(stream);
let processor = audioContext.createScriptProcessor(256, 1, 1);
source.connect(processor);
processor.connect(audioContext.destination);
mic_available = true;
start_msg()
processor.onaudioprocess = function(e) {
let inputData = e.inputBuffer.getChannelData(0);
let outputData = new Int16Array(inputData.length);
// Convert to 16-bit PCM
for (let i = 0; i < inputData.length; i++) {
outputData[i] = Math.max(-32768, Math.min(32767, inputData[i] * 32768));
}
// Send the 16-bit PCM data to the server
if (socket.readyState === WebSocket.OPEN) {
// Create a JSON string with metadata
let metadata = JSON.stringify({ sampleRate: audioContext.sampleRate });
// Convert metadata to a byte array
let metadataBytes = new TextEncoder().encode(metadata);
// Create a buffer for metadata length (4 bytes for 32-bit integer)
let metadataLength = new ArrayBuffer(4);
let metadataLengthView = new DataView(metadataLength);
// Set the length of the metadata in the first 4 bytes
metadataLengthView.setInt32(0, metadataBytes.byteLength, true); // true for little-endian
// Combine metadata length, metadata, and audio data into a single message
let combinedData = new Blob([metadataLength, metadataBytes, outputData.buffer]);
socket.send(combinedData);
}
};
Should only be a warning, not an error. The script should work nevertheless (tested on Firebox 130.0 and Chrome 128.0.6613.120, Windows 11 64-Bit).
If anybody knows how to get rid of that warning, I'm happy to hear ideas or take PRs.
could'nt resolve the following error: Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory
(/home/azureuser/realtime_translator/venv) azureuser@VoiceCloning:~/realtime_translator$ python server.py Starting server, please wait... Initializing RealtimeSTT... RealtimeSTT initialized Server started. Press Ctrl+C to stop the server. /home/azureuser/realtime_translator/server.py:109: DeprecationWarning: There is no current event loop asyncio.get_event_loop().run_until_complete(start_server) /home/azureuser/realtime_translator/server.py:110: DeprecationWarning: There is no current event loop asyncio.get_event_loop().run_forever() Client connected Could not load library libcudnn_ops_infer.so.8. Error: libcudnn_ops_infer.so.8: cannot open shared object file: No such file or directory Aborted (core dumped) (/home/azureuser/realtime_translator/venv) azureuser@VoiceCloning:~/realtime_translator$ nvcc --version nvcc: NVIDIA (R) Cuda compiler driver Copyright (c) 2005-2019 NVIDIA Corporation Built on Sun_Jul_28_19:07:16_PDT_2019 Cuda compilation tools, release 10.1, V10.1.243
ok, it started working. i used following to resolve it. ''' sudo apt-get install libcudnn8 '''
Should only be a warning, not an error. The script should work nevertheless (tested on Firebox 130.0 and Chrome 128.0.6613.120, Windows 11 64-Bit).
If anybody knows how to get rid of that warning, I'm happy to hear ideas or take PRs.
i left language parameter empty and spoke hindi language, some times it translated and sometime it gave wrong sentences.
please tell what parameters i should ensure for best realtime STT in any of the supported language
ok, it started working. i used following to resolve it. ''' sudo apt-get install libcudnn8 '''
That should be the correct way to solve that. faster_whisper still needs cuDNN 8 on Linux.
i left language parameter empty and spoke hindi language, some times it translated and sometime it gave wrong sentences.
please tell what parameters i should ensure for best realtime STT in any of the supported language
- At first change your realtime_model_type. "tiny.en" as default is only good for english. For hindi you need a higher model. Try medium or if this does not work good enough, try large or large-v2/large-v3.
- Set the language parameter "hi" for Hindi as default. Whisper performance is way better with a fixed language.
- Maybe try large-v3 for the final transcription, since with a fixed language parameter set this sometimes delivers better performance than large-v2 for the final transcription.
So my first take to improve this would be to open server.py and change recorder_config to:
recorder_config = {
'spinner': False,
'use_microphone': False,
'model': 'large-v3',
'language': 'hi',
'silero_sensitivity': 0.4,
'webrtc_sensitivity': 2,
'post_speech_silence_duration': 0.7,
'min_length_of_recording': 0,
'min_gap_between_recordings': 0,
'enable_realtime_transcription': True,
'realtime_processing_pause': 0,
'realtime_model_type': 'medium',
'on_realtime_transcription_stabilized': text_detected,
}
You can see all available language codes here btw
it's not giving good output for Hindi; can I also use model checkpoint from hugging face?
It should be possible to load any huggingface model by specifying user and model name with "username/modelname".
Please also try higher beam sizes for transcription and float16 precision in recorder_config:
'beam_size': 10,
'beam_size_realtime': 7,
'compute_type': 'float16',
i was trying to get "vasista22/whisper-hindi-medium" from hugging face but it gave following error. i checked, this was working for normal wisper inference code but here it gave me error error: Starting server, please wait... Initializing RealtimeSTT... config.json: 100%|████████████████| 1.29k/1.29k [00:00<00:00, 9.02MB/s] preprocessor_config.json: 100%|██████| 185k/185k [00:00<00:00, 889kB/s] RealTimeSTT: root - ERROR - Error initializing faster_whisper realtime transcription model: Unable to open file 'model.bin' in model '/home/azureuser/.cache/huggingface/hub/models--vasista22--whisper-hindi-medium/snapshots/d53532a4dc1d0d89e484ed8f7acfb2228a7d3785' Traceback (most recent call last): File "/home/azureuser/realtime_translator/RealtimeSTT/audio_recorder.py", line 513, in init self.realtime_model_type = faster_whisper.WhisperModel( File "/home/azureuser/realtime_translator/venv/lib/python3.10/site-packages/faster_whisper/transcribe.py", line 145, in init self.model = ctranslate2.models.Whisper( RuntimeError: Unable to open file 'model.bin' in model '/home/azureuser/.cache/huggingface/hub/models--vasista22--whisper-hindi-medium/snapshots/d53532a4dc1d0d89e484ed8f7acfb2228a7d3785' Exception in thread Thread-1 (recorder_thread): Traceback (most recent call last): File "/home/azureuser/realtime_translator/venv/lib/python3.10/threading.py", line 1009, in _bootstrap_inner self.run() File "/home/azureuser/realtime_translator/venv/lib/python3.10/threading.py", line 946, in run self._target(*self._args, **self._kwargs) File "/home/azureuser/realtime_translator/server.py", line 53, in recorder_thread recorder = AudioToTextRecorder(**recorder_config) File "/home/azureuser/realtime_translator/RealtimeSTT/audio_recorder.py", line 513, in init self.realtime_model_type = faster_whisper.WhisperModel( File "/home/azureuser/realtime_translator/venv/lib/python3.10/site-packages/faster_whisper/transcribe.py", line 145, in init self.model = ctranslate2.models.Whisper( RuntimeError: Unable to open file 'model.bin' in model '/home/azureuser/.cache/huggingface/hub/models--vasista22--whisper-hindi-medium/snapshots/d53532a4dc1d0d89e484ed8f7acfb2228a7d3785' Exception in thread Thread-2 (_transcription_worker): Traceback (most recent call last): File "/home/azureuser/realtime_translator/venv/lib/python3.10/threading.py", line 1009, in _bootstrap_inner self.run() File "/home/azureuser/realtime_translator/venv/lib/python3.10/threading.py", line 946, in run self._target(*self._args, **self._kwargs) File "/home/azureuser/realtime_translator/RealtimeSTT/audio_recorder.py", line 775, in _transcription_worker audio, language = conn.recv() File "/home/azureuser/realtime_translator/venv/lib/python3.10/multiprocessing/connection.py", line 255, in recv buf = self._recv_bytes() File "/home/azureuser/realtime_translator/venv/lib/python3.10/multiprocessing/connection.py", line 419, in _recv_bytes buf = self._recv(4) File "/home/azureuser/realtime_translator/venv/lib/python3.10/multiprocessing/connection.py", line 388, in _recv raise EOFError EOFError
Maybe it needs to be converted to CTranslate2 before, not sure. This is out of scope for RealtimeSTT, I'd discuss this in the faster_whisper repo.