Skip to content

Commit

Permalink
Merge pull request #227 from Stypox/stt-service-kt
Browse files Browse the repository at this point in the history
Add RecognitionService, so Dicio appears under Voice Input
  • Loading branch information
Stypox authored Jul 28, 2024
2 parents fa2bc9f + d2229d3 commit cf5dac2
Show file tree
Hide file tree
Showing 7 changed files with 193 additions and 5 deletions.
18 changes: 18 additions & 0 deletions app/src/main/AndroidManifest.xml
Original file line number Diff line number Diff line change
Expand Up @@ -76,5 +76,23 @@
<action android:name="android.speech.action.RECOGNIZE_SPEECH" />
</intent-filter>
</activity>

<service
android:name=".io.input.stt_service.SttService"
android:description="@string/stt_service_label"
android:directBootAware="true"
android:exported="true"
android:foregroundServiceType="microphone"
android:icon="@mipmap/ic_launcher"
android:label="@string/stt_service_label"
android:permission="android.permission.RECORD_AUDIO">
<intent-filter>
<action android:name="android.speech.RecognitionService"/>
<category android:name="android.intent.category.DEFAULT" />
</intent-filter>
<meta-data
android:name="android.speech"
android:resource="@xml/stt_service_metadata" />
</service>
</application>
</manifest>
12 changes: 9 additions & 3 deletions app/src/main/kotlin/org/stypox/dicio/di/SttInputDeviceWrapper.kt
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ import javax.inject.Singleton
interface SttInputDeviceWrapper {
val uiState: StateFlow<SttState?>

fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?)
fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean

fun stopListening()

fun onClick(eventListener: (InputEvent) -> Unit)
}
Expand Down Expand Up @@ -98,8 +100,12 @@ class SttInputDeviceWrapperImpl(
}


override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) {
sttInputDevice?.tryLoad(thenStartListeningEventListener)
override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean {
return sttInputDevice?.tryLoad(thenStartListeningEventListener) ?: false
}

override fun stopListening() {
sttInputDevice?.stopListening()
}

override fun onClick(eventListener: (InputEvent) -> Unit) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,9 @@ import org.stypox.dicio.ui.home.SttState
interface SttInputDevice {
val uiState: StateFlow<SttState>

fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?)
fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean

fun stopListening()

fun onClick(eventListener: (InputEvent) -> Unit)

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
package org.stypox.dicio.io.input.stt_service

import android.content.Intent
import android.os.Build
import android.os.Bundle
import android.os.RemoteException
import android.speech.RecognitionService
import android.speech.RecognizerIntent
import android.speech.SpeechRecognizer
import android.util.Log
import dagger.hilt.android.AndroidEntryPoint
import org.stypox.dicio.di.LocaleManager
import org.stypox.dicio.di.SttInputDeviceWrapper
import org.stypox.dicio.io.input.InputEvent
import java.util.Locale
import javax.inject.Inject


// TODO this class is really simple at the moment, but many more things could be implemented, e.g.:
// - allowing an SttInputDevice to download/support multiple languages
// - handling more EXTRAs, e.g. EXTRA_LANGUAGE, EXTRA_LANGUAGE_PREFERENCE,
// EXTRA_ONLY_RETURN_LANGUAGE_PREFERENCE, EXTRA_LANGUAGE_MODEL, LANGUAGE_MODEL_FREE_FORM,
// LANGUAGE_MODEL_WEB_SEARCH, EXTRA_SEGMENTED_SESSION,
// EXTRA_SPEECH_INPUT_COMPLETE_SILENCE_LENGTH_MILLIS,
// EXTRA_SPEECH_INPUT_POSSIBLY_COMPLETE_SILENCE_LENGTH_MILLIS,
// EXTRA_SPEECH_INPUT_MINIMUM_LENGTH_MILLIS, EXTRA_AUDIO_SOURCE, EXTRA_AUDIO_SOURCE_CHANNEL_COUNT,
// EXTRA_AUDIO_SOURCE_ENCODING, EXTRA_AUDIO_SOURCE_SAMPLING_RATE, EXTRA_BIASING_STRINGS,
// EXTRA_ENABLE_BIASING_DEVICE_CONTEXT
// - if the SttInputDevice is already busy (e.g. another service is using it, or another part of
// Dicio is using it), that needs to be reported with ERROR_BUSY
@AndroidEntryPoint
class SttService : RecognitionService() {

@Inject
lateinit var sttInputDevice: SttInputDeviceWrapper

@Inject
lateinit var localeManager: LocaleManager

override fun onStartListening(recognizerIntent: Intent, listener: Callback) {
val wantedLanguageExtra = recognizerIntent.getStringExtra(RecognizerIntent.EXTRA_LANGUAGE)
// "und" is "Undetermined", see https://www.loc.gov/standards/iso639-2/php/code_list.php
if (wantedLanguageExtra != null && wantedLanguageExtra != "und") {
val appLanguage = localeManager.locale.value.language
val wantedLanguage = Locale(wantedLanguageExtra).language
if (appLanguage != wantedLanguage) {
Log.e(TAG, "Unsupported language: app=$appLanguage wanted=$wantedLanguageExtra")
// From the javadoc of ERROR_LANGUAGE_UNAVAILABLE: Requested language is supported,
// but not available currently (e.g. not downloaded yet).
logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) }
return
}
}

var beginningOfSpeech = true
val willStartListening = sttInputDevice.tryLoad { inputEvent ->
when (inputEvent) {
is InputEvent.Error -> {
logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SERVER) }
}

is InputEvent.Final -> {
if (beginningOfSpeech) {
logRemoteExceptions { listener.beginningOfSpeech() }
beginningOfSpeech = false
}

val results = Bundle()
results.putStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION,
ArrayList(inputEvent.utterances.map { it.first })
)
results.putFloatArray(
SpeechRecognizer.CONFIDENCE_SCORES,
inputEvent.utterances.map { it.second }.toFloatArray()
)

logRemoteExceptions { listener.results(results) }
logRemoteExceptions { listener.endOfSpeech() }
}

InputEvent.None -> {
logRemoteExceptions { listener.error(SpeechRecognizer.ERROR_SPEECH_TIMEOUT) }
logRemoteExceptions { listener.endOfSpeech() }
}

is InputEvent.Partial -> {
if (beginningOfSpeech) {
logRemoteExceptions { listener.beginningOfSpeech() }
beginningOfSpeech = false
}

val partResult = Bundle()
partResult.putStringArrayList(
SpeechRecognizer.RESULTS_RECOGNITION,
arrayListOf(inputEvent.utterance)
)

logRemoteExceptions { listener.partialResults(partResult) }
}
}
}

if (!willStartListening) {
Log.w(TAG, "Could not start STT recognizer")
logRemoteExceptions { listener.error(ERROR_LANGUAGE_UNAVAILABLE) }
}
}

override fun onCancel(listener: Callback) {
sttInputDevice.stopListening()
}

override fun onStopListening(listener: Callback) {
sttInputDevice.stopListening()
}

companion object {
val TAG = SttService::class.simpleName

/**
* From the javadoc of [SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE]: Requested language is
* supported, but not available currently (e.g. not downloaded yet).
*/
val ERROR_LANGUAGE_UNAVAILABLE = if (Build.VERSION.SDK_INT >= Build.VERSION_CODES.S) {
SpeechRecognizer.ERROR_LANGUAGE_UNAVAILABLE
} else {
SpeechRecognizer.ERROR_SERVER
}

fun logRemoteExceptions(f: () -> Unit) {
try {
return f()
} catch (e: RemoteException) {
Log.e(TAG, "Remote exception", e)
}
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -211,13 +211,20 @@ class VoskInputDevice(
*
* @param thenStartListeningEventListener if not `null`, causes the [VoskInputDevice] to start
* listening after it has finished loading, and the received input events are sent there
* @return `true` if the input device will start listening (or be ready to do so in case
* `thenStartListeningEventListener == null`) at some point,
* `false` if manual user intervention is required to start listening
*/
override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?) {
override fun tryLoad(thenStartListeningEventListener: ((InputEvent) -> Unit)?): Boolean {
val s = _state.value
if (s == NotLoaded) {
load(thenStartListeningEventListener)
return true
} else if (thenStartListeningEventListener != null && s is Loaded) {
startListening(s.speechService, thenStartListeningEventListener)
return true
} else {
return false
}
}

Expand Down Expand Up @@ -252,6 +259,16 @@ class VoskInputDevice(
}
}

/**
* If the recognizer is currently listening, stops listening. Otherwise does nothing.
*/
override fun stopListening() {
when (val s = _state.value) {
is Listening -> stopListening(s.speechService, s.eventListener, true)
else -> {}
}
}

/**
* Downloads the model zip file. Sets the state to [Downloading], and periodically updates it
* with downloading progress, until either [ErrorDownloading] or [Downloaded] are set as state.
Expand Down
1 change: 1 addition & 0 deletions app/src/main/res/values/strings.xml
Original file line number Diff line number Diff line change
Expand Up @@ -162,4 +162,5 @@
<string name="skill_timer_query_name">Timer %1$s expires in %2$s</string>
<string name="skill_timer_query_last">The last timer expires in %1$s</string>
<string name="skill_timer_none_canceled">OK, no timer was canceled</string>
<string name="stt_service_label">Dicio offline speech recognition</string>
</resources>
5 changes: 5 additions & 0 deletions app/src/main/res/xml/stt_service_metadata.xml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
<?xml version="1.0" encoding="utf-8"?>
<!-- TODO actually set a proper settingsActivity -->
<recognition-service
xmlns:android="http://schemas.android.com/apk/res/android"
android:settingsActivity="org.stypox.dicio.MainActivity" />

0 comments on commit cf5dac2

Please sign in to comment.