Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add RecognitionService, so Dicio appears under Voice Input #227

Merged
merged 5 commits into from
Jul 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,23 @@
<action android:name="android.speech.action.RECOGNIZE_SPEECH" />
</intent-filter>
</activity>

<service
android:name=".io.input.stt_service.SttService"
android:description="@string/stt_service_label"
android:directBootAware="true"
android:exported="true"
android:foregroundServiceType="microphone"
android:icon="@mipmap/ic_launcher"
android:label="@string/stt_service_label"
android:permission="android.permission.RECORD_AUDIO">
<intent-filter>
<action android:name="android.speech.RecognitionService"/>
<category android:name="android.intent.category.DEFAULT" />
</intent-filter>
<meta-data
android:name="android.speech"
android:resource="@xml/stt_service_metadata" />
</service>
</application>
</manifest>
12 changes: 9 additions & 3 deletions app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ import javax.inject.Singleton
interface SttInputDeviceWrapper {
val uiState: StateFlow<SttState?>

fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?)
fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean

fun stopListening()

fun onClick(eventListener: (InputEvent) -> Unit)
}
Expand Down Expand Up @@ -98,8 +100,12 @@ class SttInputDeviceWrapperImpl(
}


override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) {
sttInputDevice?.tryLoad(thenStartListeningEventListener)
override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean {
return sttInputDevice?.tryLoad(thenStartListeningEventListener) ?: false
}

override fun stopListening() {
sttInputDevice?.stopListening()
}

override fun onClick(eventListener: (InputEvent) -> Unit) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import org.stypox.dicio.ui.home.SttState
interface SttInputDevice {
val uiState: StateFlow<SttState>

fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?)
fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean

fun stopListening()

fun onClick(eventListener: (InputEvent) -> Unit)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package org.stypox.dicio.io.input.stt_service

import android.content.Intent
import android.os.Build
import android.os.Bundle
import android.os.RemoteException
import android.speech.RecognitionService
import android.speech.RecognizerIntent
import android.speech.SpeechRecognizer
import android.util.Log
import dagger.hilt.android.AndroidEntryPoint
import org.stypox.dicio.di.LocaleManager
import org.stypox.dicio.di.SttInputDeviceWrapper
import org.stypox.dicio.io.input.InputEvent
import java.util.Locale
import javax.inject.Inject


// TODO this class is really simple at the moment, but many more things could be implemented, e.g.:
// - allowing an SttInputDevice to download/support multiple languages
// - handling more EXTRAs, e.g. EXTRA_LANGUAGE, EXTRA_LANGUAGE_PREFERENCE,
// EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE, EXTRA_LANGUAGE_MODEL, LANGUAGE_MODEL_FREE_FORM,
// LANGUAGE_MODEL_WEB_SEARCH, EXTRA_SEGMENTED_SESSION,
// EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS,
// EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS,
// EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, EXTRA_AUDIO_SOURCE, EXTRA_AUDIO_SOURCE_CHANNEL_COUNT,
// EXTRA_AUDIO_SOURCE_ENCODING, EXTRA_AUDIO_SOURCE_SAMPLING_RATE, EXTRA_BIASING_STRINGS,
// EXTRA_ENABLE_BIASING_DEVICE_CONTEXT
// - if the SttInputDevice is already busy (e.g. another service is using it, or another part of
// Dicio is using it), that needs to be reported with ERROR_BUSY
@AndroidEntryPoint
class SttService : RecognitionService() {

@Inject
lateinit var sttInputDevice: SttInputDeviceWrapper

@Inject
lateinit var localeManager: LocaleManager

override fun onStartListening(recognizerIntent: Intent, listener: Callback) {
val wantedLanguageExtra = recognizerIntent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE)
// "und" is "Undetermined", see https://www.loc.gov/standards/iso639-2/php/code_list.php
if (wantedLanguageExtra != null && wantedLanguageExtra != "und") {
val appLanguage = localeManager.locale.value.language
val wantedLanguage = Locale(wantedLanguageExtra).language
if (appLanguage != wantedLanguage) {
Log.e(TAG, "Unsupported language: app=$appLanguage wanted=$wantedLanguageExtra")
// From the javadoc of ERROR_LANGUAGE_UNAVAILABLE: Requested language is supported,
// but not available currently (e.g. not downloaded yet).
logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) }
return
}
}

var beginningOfSpeech = true
val willStartListening = sttInputDevice.tryLoad { inputEvent ->
when (inputEvent) {
is InputEvent.Error -> {
logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SERVER) }
}

is InputEvent.Final -> {
if (beginningOfSpeech) {
logRemoteExceptions { listener.beginningOfSpeech() }
beginningOfSpeech = false
}

val results = Bundle()
results.putStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION,
ArrayList(inputEvent.utterances.map { it.first })
)
results.putFloatArray(
SpeechRecognizer.CONFIDENCE_SCORES,
inputEvent.utterances.map { it.second }.toFloatArray()
)

logRemoteExceptions { listener.results(results) }
logRemoteExceptions { listener.endOfSpeech() }
}

InputEvent.None -> {
logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SPEECH_TIMEOUT) }
logRemoteExceptions { listener.endOfSpeech() }
}

is InputEvent.Partial -> {
if (beginningOfSpeech) {
logRemoteExceptions { listener.beginningOfSpeech() }
beginningOfSpeech = false
}

val partResult = Bundle()
partResult.putStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION,
arrayListOf(inputEvent.utterance)
)

logRemoteExceptions { listener.partialResults(partResult) }
}
}
}

if (!willStartListening) {
Log.w(TAG, "Could not start STT recognizer")
logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) }
}
}

override fun onCancel(listener: Callback) {
sttInputDevice.stopListening()
}

override fun onStopListening(listener: Callback) {
sttInputDevice.stopListening()
}

companion object {
val TAG = SttService::class.simpleName

/**
* From the javadoc of [SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE]: Requested language is
* supported, but not available currently (e.g. not downloaded yet).
*/
val ERROR_LANGUAGE_UNAVAILABLE = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE
} else {
SpeechRecognizer.ERROR_SERVER
}

fun logRemoteExceptions(f: () -> Unit) {
try {
return f()
} catch (e: RemoteException) {
Log.e(TAG, "Remote exception", e)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,20 @@ class VoskInputDevice(
*
* @param thenStartListeningEventListener if not `null`, causes the [VoskInputDevice] to start
* listening after it has finished loading, and the received input events are sent there
* @return `true` if the input device will start listening (or be ready to do so in case
* `thenStartListeningEventListener == null`) at some point,
* `false` if manual user intervention is required to start listening
*/
override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) {
override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean {
val s = _state.value
if (s == NotLoaded) {
load(thenStartListeningEventListener)
return true
} else if (thenStartListeningEventListener != null && s is Loaded) {
startListening(s.speechService, thenStartListeningEventListener)
return true
} else {
return false
}
}

Expand Down Expand Up @@ -252,6 +259,16 @@ class VoskInputDevice(
}
}

/**
* If the recognizer is currently listening, stops listening. Otherwise does nothing.
*/
override fun stopListening() {
when (val s = _state.value) {
is Listening -> stopListening(s.speechService, s.eventListener, true)
else -> {}
}
}

/**
* Downloads the model zip file. Sets the state to [Downloading], and periodically updates it
* with downloading progress, until either [ErrorDownloading] or [Downloaded] are set as state.
Expand Down
1 change: 1 addition & 0 deletions app/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,5 @@
<string name="skill_timer_query_name">Timer %1$s expires in %2$s</string>
<string name="skill_timer_query_last">The last timer expires in %1$s</string>
<string name="skill_timer_none_canceled">OK, no timer was canceled</string>
<string name="stt_service_label">Dicio offline speech recognition</string>
</resources>
5 changes: 5 additions & 0 deletions app/src/main/res/xml/stt_service_metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- TODO actually set a proper settingsActivity -->
<recognition-service
xmlns:android="http://schemas.android.com/apk/res/android"
android:settingsActivity="org.stypox.dicio.MainActivity" />