diff --git a/app/src/main/AndroidManifest.xml b/app/src/main/AndroidManifest.xml
index e1d23d0b..b0544d67 100644
--- a/app/src/main/AndroidManifest.xml
+++ b/app/src/main/AndroidManifest.xml
@@ -76,5 +76,23 @@
+
+
+
+
+
+
+
+
diff --git a/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt b/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt
index d063b748..d645e831 100644
--- a/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt
+++ b/app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt
@@ -31,7 +31,9 @@ import javax.inject.Singleton
interface SttInputDeviceWrapper {
val uiState: StateFlow
- fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?)
+ fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean
+
+ fun stopListening()
fun onClick(eventListener: (InputEvent) -> Unit)
}
@@ -98,8 +100,12 @@ class SttInputDeviceWrapperImpl(
}
- override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) {
- sttInputDevice?.tryLoad(thenStartListeningEventListener)
+ override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean {
+ return sttInputDevice?.tryLoad(thenStartListeningEventListener) ?: false
+ }
+
+ override fun stopListening() {
+ sttInputDevice?.stopListening()
}
override fun onClick(eventListener: (InputEvent) -> Unit) {
diff --git a/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt b/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt
index be67a900..4c6eb80e 100644
--- a/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt
+++ b/app/src/main/kotlin/org/stypox/dicio/io/input/SttInputDevice.kt
@@ -6,7 +6,9 @@ import org.stypox.dicio.ui.home.SttState
interface SttInputDevice {
val uiState: StateFlow
- fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?)
+ fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean
+
+ fun stopListening()
fun onClick(eventListener: (InputEvent) -> Unit)
diff --git a/app/src/main/kotlin/org/stypox/dicio/io/input/stt_service/SttService.kt b/app/src/main/kotlin/org/stypox/dicio/io/input/stt_service/SttService.kt
new file mode 100644
index 00000000..9396c939
--- /dev/null
+++ b/app/src/main/kotlin/org/stypox/dicio/io/input/stt_service/SttService.kt
@@ -0,0 +1,139 @@
+package org.stypox.dicio.io.input.stt_service
+
+import android.content.Intent
+import android.os.Build
+import android.os.Bundle
+import android.os.RemoteException
+import android.speech.RecognitionService
+import android.speech.RecognizerIntent
+import android.speech.SpeechRecognizer
+import android.util.Log
+import dagger.hilt.android.AndroidEntryPoint
+import org.stypox.dicio.di.LocaleManager
+import org.stypox.dicio.di.SttInputDeviceWrapper
+import org.stypox.dicio.io.input.InputEvent
+import java.util.Locale
+import javax.inject.Inject
+
+
+// TODO this class is really simple at the moment, but many more things could be implemented, e.g.:
+// - allowing an SttInputDevice to download/support multiple languages
+// - handling more EXTRAs, e.g. EXTRA_LANGUAGE, EXTRA_LANGUAGE_PREFERENCE,
+// EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE, EXTRA_LANGUAGE_MODEL, LANGUAGE_MODEL_FREE_FORM,
+// LANGUAGE_MODEL_WEB_SEARCH, EXTRA_SEGMENTED_SESSION,
+// EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS,
+// EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS,
+// EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, EXTRA_AUDIO_SOURCE, EXTRA_AUDIO_SOURCE_CHANNEL_COUNT,
+// EXTRA_AUDIO_SOURCE_ENCODING, EXTRA_AUDIO_SOURCE_SAMPLING_RATE, EXTRA_BIASING_STRINGS,
+// EXTRA_ENABLE_BIASING_DEVICE_CONTEXT
+// - if the SttInputDevice is already busy (e.g. another service is using it, or another part of
+// Dicio is using it), that needs to be reported with ERROR_BUSY
+@AndroidEntryPoint
+class SttService : RecognitionService() {
+
+ @Inject
+ lateinit var sttInputDevice: SttInputDeviceWrapper
+
+ @Inject
+ lateinit var localeManager: LocaleManager
+
+ override fun onStartListening(recognizerIntent: Intent, listener: Callback) {
+ val wantedLanguageExtra = recognizerIntent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE)
+ // "und" is "Undetermined", see https://www.loc.gov/standards/iso639-2/php/code_list.php
+ if (wantedLanguageExtra != null && wantedLanguageExtra != "und") {
+ val appLanguage = localeManager.locale.value.language
+ val wantedLanguage = Locale(wantedLanguageExtra).language
+ if (appLanguage != wantedLanguage) {
+ Log.e(TAG, "Unsupported language: app=$appLanguage wanted=$wantedLanguageExtra")
+ // From the javadoc of ERROR_LANGUAGE_UNAVAILABLE: Requested language is supported,
+ // but not available currently (e.g. not downloaded yet).
+ logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) }
+ return
+ }
+ }
+
+ var beginningOfSpeech = true
+ val willStartListening = sttInputDevice.tryLoad { inputEvent ->
+ when (inputEvent) {
+ is InputEvent.Error -> {
+ logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SERVER) }
+ }
+
+ is InputEvent.Final -> {
+ if (beginningOfSpeech) {
+ logRemoteExceptions { listener.beginningOfSpeech() }
+ beginningOfSpeech = false
+ }
+
+ val results = Bundle()
+ results.putStringArrayList(
+ SpeechRecognizer.RESULTS_RECOGNITION,
+ ArrayList(inputEvent.utterances.map { it.first })
+ )
+ results.putFloatArray(
+ SpeechRecognizer.CONFIDENCE_SCORES,
+ inputEvent.utterances.map { it.second }.toFloatArray()
+ )
+
+ logRemoteExceptions { listener.results(results) }
+ logRemoteExceptions { listener.endOfSpeech() }
+ }
+
+ InputEvent.None -> {
+ logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SPEECH_TIMEOUT) }
+ logRemoteExceptions { listener.endOfSpeech() }
+ }
+
+ is InputEvent.Partial -> {
+ if (beginningOfSpeech) {
+ logRemoteExceptions { listener.beginningOfSpeech() }
+ beginningOfSpeech = false
+ }
+
+ val partResult = Bundle()
+ partResult.putStringArrayList(
+ SpeechRecognizer.RESULTS_RECOGNITION,
+ arrayListOf(inputEvent.utterance)
+ )
+
+ logRemoteExceptions { listener.partialResults(partResult) }
+ }
+ }
+ }
+
+ if (!willStartListening) {
+ Log.w(TAG, "Could not start STT recognizer")
+ logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) }
+ }
+ }
+
+ override fun onCancel(listener: Callback) {
+ sttInputDevice.stopListening()
+ }
+
+ override fun onStopListening(listener: Callback) {
+ sttInputDevice.stopListening()
+ }
+
+ companion object {
+ val TAG = SttService::class.simpleName
+
+ /**
+ * From the javadoc of [SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE]: Requested language is
+ * supported, but not available currently (e.g. not downloaded yet).
+ */
+ val ERROR_LANGUAGE_UNAVAILABLE = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
+ SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE
+ } else {
+ SpeechRecognizer.ERROR_SERVER
+ }
+
+ fun logRemoteExceptions(f: () -> Unit) {
+ try {
+ return f()
+ } catch (e: RemoteException) {
+ Log.e(TAG, "Remote exception", e)
+ }
+ }
+ }
+}
diff --git a/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt b/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt
index b05fa198..b4f1594a 100644
--- a/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt
+++ b/app/src/main/kotlin/org/stypox/dicio/io/input/vosk/VoskInputDevice.kt
@@ -211,13 +211,20 @@ class VoskInputDevice(
*
* @param thenStartListeningEventListener if not `null`, causes the [VoskInputDevice] to start
* listening after it has finished loading, and the received input events are sent there
+ * @return `true` if the input device will start listening (or be ready to do so in case
+ * `thenStartListeningEventListener == null`) at some point,
+ * `false` if manual user intervention is required to start listening
*/
- override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) {
+ override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean {
val s = _state.value
if (s == NotLoaded) {
load(thenStartListeningEventListener)
+ return true
} else if (thenStartListeningEventListener != null && s is Loaded) {
startListening(s.speechService, thenStartListeningEventListener)
+ return true
+ } else {
+ return false
}
}
@@ -252,6 +259,16 @@ class VoskInputDevice(
}
}
+ /**
+ * If the recognizer is currently listening, stops listening. Otherwise does nothing.
+ */
+ override fun stopListening() {
+ when (val s = _state.value) {
+ is Listening -> stopListening(s.speechService, s.eventListener, true)
+ else -> {}
+ }
+ }
+
/**
* Downloads the model zip file. Sets the state to [Downloading], and periodically updates it
* with downloading progress, until either [ErrorDownloading] or [Downloaded] are set as state.
diff --git a/app/src/main/res/values/strings.xml b/app/src/main/res/values/strings.xml
index 5abf4568..96a7324f 100644
--- a/app/src/main/res/values/strings.xml
+++ b/app/src/main/res/values/strings.xml
@@ -162,4 +162,5 @@
Timer %1$s expires in %2$s
The last timer expires in %1$s
OK, no timer was canceled
+ Dicio offline speech recognition
diff --git a/app/src/main/res/xml/stt_service_metadata.xml b/app/src/main/res/xml/stt_service_metadata.xml
new file mode 100644
index 00000000..d3965b3b
--- /dev/null
+++ b/app/src/main/res/xml/stt_service_metadata.xml
@@ -0,0 +1,5 @@
+
+
+