useSpeechToText
Speech to text hook. Powered by the Web Speech API for speech recognition.
Experimental DO NOT use this hook in production.
Web Speech API may not be supported by all browsers. Browser
compatibility.
Import
import {useSpeechToText} from '@kaiverse/k/hooks'
Usage
import {useSpeechToText} from '@kaiverse/k/hooks'
const {transcript, isSpeechAPIAvailable, isListening, startListening, stopListening} = useSpeechToText({ lang: 'en', onStart: () => console.log('Start listening'), onUnMatch: () => console.error('Cannot recognize speech.'), onError: (event) => console.error(event.error), })
if (!isSpeechAPIAvailable) { return <div>Browser doesn't support the Web Speech API</div>}
return ( <div> <button type="button" onClick={startListening}> Turn on mic </button> <button type="button" onClick={stopListening}> Stop </button> <p> {isListening ? 'Listening...' : 'Not listening'} <br /> {transcript} </p> </div>)
import {useSpeechToText, type SpeechToTextHookErrorCode} from '@kaiverse/k/hooks'import {IconCheck, IconCopy, IconMicrophone, IconMicrophoneOff} from '@tabler/icons-react'import {useCallback, useRef, type MouseEventHandler} from 'react'
export default function SpeechToTextDemo() { const speechErrRef = useRef<HTMLParagraphElement>(null) const transcriptInputRef = useRef<HTMLTextAreaElement>(null)
const writeErr = (err: string) => { if (!speechErrRef.current) { return }
speechErrRef.current.textContent = err }
const {transcript, isSpeechAPIAvailable, isListening, startListening, stopListening} = useSpeechToText({ lang: 'en', onStart: () => { if (speechErrRef.current?.textContent) speechErrRef.current.textContent = null }, onUnMatch: () => writeErr('Cannot recognize speech.'), onError: (event) => writeErr( `Error occurred in recognition: ${SPEECH_ERROR_MAPPING[event.error as SpeechToTextHookErrorCode] || event.error}`, ), })
const handleCopyClipboard = useCallback<MouseEventHandler<HTMLButtonElement>>((e) => { transcriptInputRef.current?.focus() const transcriptVal = transcriptInputRef.current?.value.trim() if (!transcriptVal) { return }
const targetClasses = e.currentTarget.classList navigator.clipboard .writeText(transcriptVal) .then(() => { targetClasses.add(...BTN_COPIED) setTimeout(() => targetClasses.remove(...BTN_COPIED), 1500) }) .catch(console.log) }, [])
if (!isSpeechAPIAvailable) { return <div>Browser doesn't support the Web Speech API</div> }
return ( <> <p className="mb-4"> Language of the Speech Recognition:{' '} <strong> <code>en</code> - English </strong> </p> <div> {isListening ? ( <div className="flex items-center gap-4"> <div>Listening...</div> <button className="btn btn-neutral" type="button" onClick={stopListening}> <IconMicrophoneOff size={20} /> Stop </button> </div> ) : ( <button className="btn btn-neutral btn-outline" type="button" onClick={startListening}> <IconMicrophone size={20} /> Turn on mic </button> )}
<p ref={speechErrRef} className="mt-2 min-h-6 text-red-500"></p> </div> <label className="group relative"> Transcript <textarea ref={transcriptInputRef} className="block min-h-12 w-full resize-none rounded-md p-2 shadow-sm [field-sizing:content]" name="transcript" defaultValue={transcript} wrap="soft" placeholder="The transcript will be displayed here" readOnly /> {!transcript || ( <button className="btn btn-ghost btn-outline btn-square btn-sm absolute top-8 right-2 hidden h-fit group-focus-within:block" type="button" onClick={handleCopyClipboard} > <IconCopy size={16} /> <IconCheck className="hidden mx-auto" size={16} /> </button> )} </label> </> )}
const SPEECH_ERROR_MAPPING: Partial<Record<SpeechToTextHookErrorCode, string>> = { 'audio-capture': 'Cannot detect your microphone! Please check your bluetooth/cable connection.', 'no-speech': 'No speech', 'language-not-supported': 'Language not supported',}
const BTN_COPIED = [ 'text-green-500', 'pointer-events-none', 'opacity-80', '[&>.tabler-icon-copy]:hidden', '[&>.tabler-icon-check]:block',]
Type Definition
function useSpeechToText(options: SpeechToTextHookOptions): { isSpeechAPIAvailable: boolean isListening: boolean startListening: () => void stopListening: () => void transcript: string}
Special Types
Name | Type | Description |
---|---|---|
SpeechToTextHookErrorCode | SpeechRecognitionErrorCode | Possible error codes that can be returned by the Web Speech API. Enum: SpeechRecognitionErrorCode . |
SpeechToTextHookOptions | See useSpeechToText Options below | useSpeechToText options. |
useSpeechToText
Options
Name | Type | Default | Description |
---|---|---|---|
lang | string | HTML lang attribute value | Language of the speech. If not specified, and the HTML lang attribute isn’t set either then the user agent’s language setting will be used. Read more. |
onStart | () => void | — | Callback function that is called when the speech recognition service has begun listening to incoming audio. |
onUnMatch | (event: SpeechRecognitionEvent) => void | — | Fired when the speech recognition service returns a final result with no significant recognition. |
onError | (event: SpeechRecognitionErrorEvent) => void | — | Fired when a speech recognition error occurs. |
onTranscriptChange | (transcript: string) => void | — | Fired when the speech recognition service returns a final result with significant recognition. |
Return Types
Name | Type | Description |
---|---|---|
isSpeechAPIAvailable | boolean | Flag to check if the Web Speech API is available. |
isListening | boolean | Is the service is listening to incoming audio or not. |
startListening | () => void | Turn on microphone and start listening. |
stopListening | () => void | Turn off microphone and stop listening. |
transcript | string | The result of the speech recognition. |