diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml index e1d23d0b..b0544d67 100644 --- a/app/src/main/AndroidManifest.xml +++ b/app/src/main/AndroidManifest.xml @@ -76,5 +76,23 @@ + + + + + + + + diff --git a/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt b/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt index d063b748..d645e831 100644 --- a/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt +++ b/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt @@ -31,7 +31,9 @@ import javax.inject.Singleton interface SttInputDeviceWrapper { val uiState: StateFlow - fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) + fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean + + fun stopListening() fun onClick(eventListener: (InputEvent) -> Unit) } @@ -98,8 +100,12 @@ class SttInputDeviceWrapperImpl( } - override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) { - sttInputDevice?.tryLoad(thenStartListeningEventListener) + override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean { + return sttInputDevice?.tryLoad(thenStartListeningEventListener) ?: false + } + + override fun stopListening() { + sttInputDevice?.stopListening() } override fun onClick(eventListener: (InputEvent) -> Unit) { diff --git a/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt b/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt index be67a900..4c6eb80e 100644 --- a/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt +++ b/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt @@ -6,7 +6,9 @@ import org.stypox.dicio.ui.home.SttState interface SttInputDevice { val uiState: StateFlow - fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) + fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean + + fun stopListening() fun onClick(eventListener: (InputEvent) -> Unit) diff --git a/app/src/main/kotlin/org/stypox/dicio/io/input/stt_service/SttService.kt b/app/src/main/kotlin/org/stypox/dicio/io/input/stt_service/SttService.kt new file mode 100644 index 00000000..9396c939 --- /dev/null +++ b/app/src/main/kotlin/org/stypox/dicio/io/input/stt_service/SttService.kt @@ -0,0 +1,139 @@ +package org.stypox.dicio.io.input.stt_service + +import android.content.Intent +import android.os.Build +import android.os.Bundle +import android.os.RemoteException +import android.speech.RecognitionService +import android.speech.RecognizerIntent +import android.speech.SpeechRecognizer +import android.util.Log +import dagger.hilt.android.AndroidEntryPoint +import org.stypox.dicio.di.LocaleManager +import org.stypox.dicio.di.SttInputDeviceWrapper +import org.stypox.dicio.io.input.InputEvent +import java.util.Locale +import javax.inject.Inject + + +// TODO this class is really simple at the moment, but many more things could be implemented, e.g.: +// - allowing an SttInputDevice to download/support multiple languages +// - handling more EXTRAs, e.g. EXTRA_LANGUAGE, EXTRA_LANGUAGE_PREFERENCE, +// EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE, EXTRA_LANGUAGE_MODEL, LANGUAGE_MODEL_FREE_FORM, +// LANGUAGE_MODEL_WEB_SEARCH, EXTRA_SEGMENTED_SESSION, +// EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS, +// EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS, +// EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, EXTRA_AUDIO_SOURCE, EXTRA_AUDIO_SOURCE_CHANNEL_COUNT, +// EXTRA_AUDIO_SOURCE_ENCODING, EXTRA_AUDIO_SOURCE_SAMPLING_RATE, EXTRA_BIASING_STRINGS, +// EXTRA_ENABLE_BIASING_DEVICE_CONTEXT +// - if the SttInputDevice is already busy (e.g. another service is using it, or another part of +// Dicio is using it), that needs to be reported with ERROR_BUSY +@AndroidEntryPoint +class SttService : RecognitionService() { + + @Inject + lateinit var sttInputDevice: SttInputDeviceWrapper + + @Inject + lateinit var localeManager: LocaleManager + + override fun onStartListening(recognizerIntent: Intent, listener: Callback) { + val wantedLanguageExtra = recognizerIntent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE) + // "und" is "Undetermined", see https://www.loc.gov/standards/iso639-2/php/code_list.php + if (wantedLanguageExtra != null && wantedLanguageExtra != "und") { + val appLanguage = localeManager.locale.value.language + val wantedLanguage = Locale(wantedLanguageExtra).language + if (appLanguage != wantedLanguage) { + Log.e(TAG, "Unsupported language: app=$appLanguage wanted=$wantedLanguageExtra") + // From the javadoc of ERROR_LANGUAGE_UNAVAILABLE: Requested language is supported, + // but not available currently (e.g. not downloaded yet). + logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) } + return + } + } + + var beginningOfSpeech = true + val willStartListening = sttInputDevice.tryLoad { inputEvent -> + when (inputEvent) { + is InputEvent.Error -> { + logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SERVER) } + } + + is InputEvent.Final -> { + if (beginningOfSpeech) { + logRemoteExceptions { listener.beginningOfSpeech() } + beginningOfSpeech = false + } + + val results = Bundle() + results.putStringArrayList( + SpeechRecognizer.RESULTS_RECOGNITION, + ArrayList(inputEvent.utterances.map { it.first }) + ) + results.putFloatArray( + SpeechRecognizer.CONFIDENCE_SCORES, + inputEvent.utterances.map { it.second }.toFloatArray() + ) + + logRemoteExceptions { listener.results(results) } + logRemoteExceptions { listener.endOfSpeech() } + } + + InputEvent.None -> { + logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SPEECH_TIMEOUT) } + logRemoteExceptions { listener.endOfSpeech() } + } + + is InputEvent.Partial -> { + if (beginningOfSpeech) { + logRemoteExceptions { listener.beginningOfSpeech() } + beginningOfSpeech = false + } + + val partResult = Bundle() + partResult.putStringArrayList( + SpeechRecognizer.RESULTS_RECOGNITION, + arrayListOf(inputEvent.utterance) + ) + + logRemoteExceptions { listener.partialResults(partResult) } + } + } + } + + if (!willStartListening) { + Log.w(TAG, "Could not start STT recognizer") + logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) } + } + } + + override fun onCancel(listener: Callback) { + sttInputDevice.stopListening() + } + + override fun onStopListening(listener: Callback) { + sttInputDevice.stopListening() + } + + companion object { + val TAG = SttService::class.simpleName + + /** + * From the javadoc of [SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE]: Requested language is + * supported, but not available currently (e.g. not downloaded yet). + */ + val ERROR_LANGUAGE_UNAVAILABLE = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) { + SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE + } else { + SpeechRecognizer.ERROR_SERVER + } + + fun logRemoteExceptions(f: () -> Unit) { + try { + return f() + } catch (e: RemoteException) { + Log.e(TAG, "Remote exception", e) + } + } + } +} diff --git a/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt b/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt index b05fa198..b4f1594a 100644 --- a/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt +++ b/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt @@ -211,13 +211,20 @@ class VoskInputDevice( * * @param thenStartListeningEventListener if not `null`, causes the [VoskInputDevice] to start * listening after it has finished loading, and the received input events are sent there + * @return `true` if the input device will start listening (or be ready to do so in case + * `thenStartListeningEventListener == null`) at some point, + * `false` if manual user intervention is required to start listening */ - override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) { + override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean { val s = _state.value if (s == NotLoaded) { load(thenStartListeningEventListener) + return true } else if (thenStartListeningEventListener != null && s is Loaded) { startListening(s.speechService, thenStartListeningEventListener) + return true + } else { + return false } } @@ -252,6 +259,16 @@ class VoskInputDevice( } } + /** + * If the recognizer is currently listening, stops listening. Otherwise does nothing. + */ + override fun stopListening() { + when (val s = _state.value) { + is Listening -> stopListening(s.speechService, s.eventListener, true) + else -> {} + } + } + /** * Downloads the model zip file. Sets the state to [Downloading], and periodically updates it * with downloading progress, until either [ErrorDownloading] or [Downloaded] are set as state. diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml index 5abf4568..96a7324f 100644 --- a/app/src/main/res/values/strings.xml +++ b/app/src/main/res/values/strings.xml @@ -162,4 +162,5 @@ Timer %1$s expires in %2$s The last timer expires in %1$s OK, no timer was canceled + Dicio offline speech recognition diff --git a/app/src/main/res/xml/stt_service_metadata.xml b/app/src/main/res/xml/stt_service_metadata.xml new file mode 100644 index 00000000..d3965b3b --- /dev/null +++ b/app/src/main/res/xml/stt_service_metadata.xml @@ -0,0 +1,5 @@ + + +