Skip to content

Commit 6a34546

Browse files
authored
Wake word detection within Assist conversation (#6497)
* Introduce VoiceAudioRecorder and set sample rate to 16kHz * Make AssistActivity supports wake word using AssistAudioStrategy * Make sure enabling wake word set the wake word model * Pick Okay Nabu as default * Prefer 16kHz and fallback with downsampling if not available
1 parent e2961a1 commit 6a34546

File tree

29 files changed

+1969
-497
lines changed

29 files changed

+1969
-497
lines changed

app/src/main/kotlin/io/homeassistant/companion/android/assist/AssistActivity.kt

Lines changed: 18 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ import androidx.core.content.ContextCompat
1515
import androidx.core.content.getSystemService
1616
import androidx.lifecycle.lifecycleScope
1717
import dagger.hilt.android.AndroidEntryPoint
18+
import dagger.hilt.android.lifecycle.withCreationCallback
1819
import io.homeassistant.companion.android.BaseActivity
1920
import io.homeassistant.companion.android.assist.service.AssistVoiceInteractionService
2021
import io.homeassistant.companion.android.assist.ui.AssistSheetView
@@ -23,12 +24,26 @@ import io.homeassistant.companion.android.common.data.servers.ServerManager
2324
import io.homeassistant.companion.android.launch.LaunchActivity
2425
import io.homeassistant.companion.android.util.compose.HomeAssistantAppTheme
2526
import io.homeassistant.companion.android.webview.WebViewActivity
27+
import javax.inject.Inject
2628
import kotlinx.coroutines.launch
2729

2830
@AndroidEntryPoint
2931
class AssistActivity : BaseActivity() {
3032

31-
private val viewModel: AssistViewModel by viewModels()
33+
@Inject
34+
lateinit var audioStrategyFactory: AssistAudioStrategyFactory
35+
36+
private val wakeWordPhrase: String? by lazy {
37+
intent.getStringExtra(EXTRA_FROM_WAKE_WORD_PHRASE)
38+
}
39+
40+
private val viewModel: AssistViewModel by viewModels(
41+
extrasProducer = {
42+
defaultViewModelCreationExtras.withCreationCallback<AssistViewModel.Factory> { factory ->
43+
factory.create(audioStrategyFactory.create(applicationContext, wakeWordPhrase))
44+
}
45+
},
46+
)
3247

3348
private var contextIsLocked = true
3449

@@ -157,6 +172,8 @@ class AssistActivity : BaseActivity() {
157172
override fun onDestroy() {
158173
super.onDestroy()
159174
viewModel.onDestroy()
175+
// This is a safety net: if the listener did not properly start, we still want to
176+
// resume.
160177
AssistVoiceInteractionService.resumeListening(this)
161178
}
162179

Lines changed: 58 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,58 @@
1+
package io.homeassistant.companion.android.assist
2+
3+
import android.content.Context
4+
import androidx.core.content.getSystemService
5+
import io.homeassistant.companion.android.assist.service.AssistVoiceInteractionService
6+
import io.homeassistant.companion.android.assist.wakeword.WakeWordListenerFactory
7+
import io.homeassistant.companion.android.common.assist.AssistAudioStrategy
8+
import io.homeassistant.companion.android.common.assist.DefaultAssistAudioStrategy
9+
import io.homeassistant.companion.android.common.util.VoiceAudioRecorder
10+
import io.homeassistant.companion.android.settings.assist.AssistConfigManager
11+
import javax.inject.Inject
12+
13+
/**
14+
* Creates [AssistAudioStrategy] instances based on whether wake word detection is needed.
15+
*
16+
* Encapsulates the strategy construction logic.
17+
*/
18+
class AssistAudioStrategyFactory @Inject constructor(
19+
private val voiceAudioRecorder: VoiceAudioRecorder,
20+
private val wakeWordListenerFactory: WakeWordListenerFactory,
21+
private val assistConfigManager: AssistConfigManager,
22+
) {
23+
24+
/**
25+
* Creates an [AssistAudioStrategy] based on the requested mode.
26+
*
27+
* When [wakeWordPhrase] is not null, creates a [WakeWordAssistAudioStrategy] that listens for
28+
* wake words before starting the Assist pipeline. The strategy's `onListenerStopped` callback
29+
* automatically resumes the background wake word service via
30+
* [AssistVoiceInteractionService.resumeListening].
31+
*
32+
* When [wakeWordPhrase] is null, creates a [DefaultAssistAudioStrategy] that streams audio
33+
* directly to the pipeline with audio focus management.
34+
*
35+
* @param context Used to obtain the system [android.media.AudioManager] and to resume the
36+
* background wake word listener when the strategy stops
37+
* @param wakeWordPhrase Wake word phrase from an external source (e.g. Intent extra). When
38+
* provided, a [WakeWordAssistAudioStrategy] is created and the phrase is used to resolve
39+
* the matching wake word model from available models
40+
* @return The configured [AssistAudioStrategy]
41+
*/
42+
fun create(context: Context, wakeWordPhrase: String?): AssistAudioStrategy = if (wakeWordPhrase != null) {
43+
WakeWordAssistAudioStrategy(
44+
voiceAudioRecorder = voiceAudioRecorder,
45+
wakeWordListenerFactory = wakeWordListenerFactory,
46+
assistConfigManager = assistConfigManager,
47+
wakeWordPhrase = wakeWordPhrase,
48+
onListenerStopped = {
49+
AssistVoiceInteractionService.resumeListening(context)
50+
},
51+
)
52+
} else {
53+
DefaultAssistAudioStrategy(
54+
voiceAudioRecorder = voiceAudioRecorder,
55+
audioManager = context.getSystemService(),
56+
)
57+
}
58+
}

app/src/main/kotlin/io/homeassistant/companion/android/assist/AssistViewModel.kt

Lines changed: 38 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,19 @@ import androidx.compose.runtime.mutableStateListOf
88
import androidx.compose.runtime.mutableStateOf
99
import androidx.compose.runtime.setValue
1010
import androidx.lifecycle.viewModelScope
11+
import dagger.assisted.Assisted
12+
import dagger.assisted.AssistedFactory
13+
import dagger.assisted.AssistedInject
1114
import dagger.hilt.android.lifecycle.HiltViewModel
1215
import io.homeassistant.companion.android.assist.ui.AssistMessage
1316
import io.homeassistant.companion.android.assist.ui.AssistUiPipeline
1417
import io.homeassistant.companion.android.common.R as commonR
18+
import io.homeassistant.companion.android.common.assist.AssistAudioStrategy
1519
import io.homeassistant.companion.android.common.assist.AssistEvent
1620
import io.homeassistant.companion.android.common.assist.AssistViewModelBase
1721
import io.homeassistant.companion.android.common.data.servers.ServerManager
1822
import io.homeassistant.companion.android.common.data.websocket.impl.entities.AssistPipelineResponse
19-
import io.homeassistant.companion.android.common.util.AudioRecorder
2023
import io.homeassistant.companion.android.common.util.AudioUrlPlayer
21-
import javax.inject.Inject
2224
import kotlin.time.Duration.Companion.seconds
2325
import kotlinx.coroutines.CancellationException
2426
import kotlinx.coroutines.Job
@@ -29,13 +31,29 @@ import timber.log.Timber
2931
@VisibleForTesting
3032
internal val CLOSE_INACTIVE = 30.seconds
3133

32-
@HiltViewModel
33-
class AssistViewModel @Inject constructor(
34-
val serverManager: ServerManager,
35-
private val audioRecorder: AudioRecorder,
34+
@HiltViewModel(assistedFactory = AssistViewModel.Factory::class)
35+
class AssistViewModel @AssistedInject constructor(
36+
serverManager: ServerManager,
37+
@Assisted initialAudioStrategy: AssistAudioStrategy,
3638
audioUrlPlayer: AudioUrlPlayer,
3739
application: Application,
38-
) : AssistViewModelBase(serverManager, audioRecorder, audioUrlPlayer, application) {
40+
) : AssistViewModelBase(serverManager, initialAudioStrategy, audioUrlPlayer, application) {
41+
42+
@AssistedFactory
43+
interface Factory {
44+
fun create(audioStrategy: AssistAudioStrategy): AssistViewModel
45+
}
46+
47+
init {
48+
viewModelScope.launch {
49+
audioStrategy.wakeWordDetected.collect { detectedPhrase ->
50+
if (inputMode != AssistInputMode.VOICE_ACTIVE) {
51+
wakeWordPhrase = detectedPhrase
52+
onMicrophoneInput()
53+
}
54+
}
55+
}
56+
}
3957

4058
private var filteredServerId: Int? = null
4159
private val allPipelines = mutableMapOf<Int, List<AssistPipelineResponse>>()
@@ -379,27 +397,24 @@ class AssistViewModel @Inject constructor(
379397

380398
stopPlayback()
381399

382-
val recording = try {
383-
recorderProactive || audioRecorder.startRecording()
384-
} catch (e: Exception) {
385-
Timber.e(e, "Exception while starting recording")
386-
false
387-
}
388-
389-
if (recording) {
390-
if (!recorderProactive) setupRecorder()
391-
inputMode = AssistInputMode.VOICE_ACTIVE
392-
if (proactive == true) _conversation.add(AssistMessage.placeholder(isInput = true))
393-
if (proactive != true) runAssistPipeline(null)
394-
} else {
395-
_conversation.add(
396-
AssistMessage(app.getString(commonR.string.assist_error), isInput = false, isError = true),
400+
if (!recorderProactive) {
401+
audioStrategy.requestFocus()
402+
setupRecorder(
403+
onError = {
404+
stopRecording()
405+
_conversation.add(
406+
AssistMessage(app.getString(commonR.string.assist_error), isInput = false, isError = true),
407+
)
408+
},
397409
)
398410
}
411+
inputMode = AssistInputMode.VOICE_ACTIVE
412+
if (proactive == true) _conversation.add(AssistMessage.placeholder(isInput = true))
413+
if (proactive != true) runAssistPipeline(null)
399414

400415
restartInactivityTimer()
401416

402-
recorderProactive = recording && proactive == true
417+
recorderProactive = proactive == true
403418
}
404419

405420
private fun runAssistPipeline(text: String?) {
Lines changed: 99 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,99 @@
1+
package io.homeassistant.companion.android.assist
2+
3+
import android.annotation.SuppressLint
4+
import android.media.AudioManager
5+
import io.homeassistant.companion.android.assist.wakeword.MicroWakeWordModelConfig
6+
import io.homeassistant.companion.android.assist.wakeword.WakeWordListenerFactory
7+
import io.homeassistant.companion.android.common.assist.AssistAudioFocus
8+
import io.homeassistant.companion.android.common.assist.AssistAudioFocusImpl
9+
import io.homeassistant.companion.android.common.assist.AssistAudioStrategy
10+
import io.homeassistant.companion.android.common.util.VoiceAudioRecorder
11+
import io.homeassistant.companion.android.settings.assist.AssistConfigManager
12+
import kotlinx.coroutines.channels.Channel
13+
import kotlinx.coroutines.channels.consumeEach
14+
import kotlinx.coroutines.flow.Flow
15+
import kotlinx.coroutines.flow.callbackFlow
16+
import timber.log.Timber
17+
18+
/**
19+
* Audio strategy that uses a shared [VoiceAudioRecorder] for both the Assist pipeline
20+
* and wake word detection.
21+
*
22+
* The same [voiceAudioRecorder] instance is shared with the [WakeWordListener][io.homeassistant.companion.android.assist.wakeword.WakeWordListener]
23+
* (via the [WakeWordListenerFactory]). [VoiceAudioRecorder.audioData] returns a shared flow
24+
* backed by a single [android.media.AudioRecord], so both the wake word listener and the
25+
* pipeline can collect concurrently without creating multiple recorder instances.
26+
*
27+
* The [wakeWordDetected] flow controls the listener's lifecycle: collecting it starts
28+
* the listener, and cancelling the collection stops it and releases all resources.
29+
*
30+
* @param voiceAudioRecorder Shared voice recorder
31+
* @param wakeWordListenerFactory Factory to create the wake word listener
32+
* @param assistConfigManager Assist configuration manager
33+
* @param wakeWordPhrase Wake word phrase from an external source.
34+
* When provided, the model whose [MicroWakeWordModelConfig.wakeWord] matches this phrase is
35+
* used for detection.
36+
* @param audioManager System audio manager for focus management. When `null`, focus
37+
* requests are no-ops.
38+
* @param onListenerStopped Called when the listener is fully stopped. Callers can use
39+
* this to resume other audio operations (e.g. a background wake word service).
40+
*/
41+
class WakeWordAssistAudioStrategy(
42+
private val voiceAudioRecorder: VoiceAudioRecorder,
43+
wakeWordListenerFactory: WakeWordListenerFactory,
44+
private val assistConfigManager: AssistConfigManager,
45+
private val wakeWordPhrase: String,
46+
audioManager: AudioManager? = null,
47+
onListenerStopped: () -> Unit = {},
48+
) : AssistAudioStrategy,
49+
AssistAudioFocus by AssistAudioFocusImpl(audioManager) {
50+
51+
private val wakeWordChannel = Channel<String>(Channel.CONFLATED)
52+
53+
private val listener by lazy {
54+
wakeWordListenerFactory.create(
55+
onWakeWordDetected = { modelConfig ->
56+
wakeWordChannel.trySend(modelConfig.wakeWord)
57+
},
58+
onListenerStopped = onListenerStopped,
59+
)
60+
}
61+
62+
override suspend fun audioData(): Flow<ShortArray> = voiceAudioRecorder.audioData()
63+
64+
/**
65+
* Cold flow that starts the wake word listener when collected.
66+
*
67+
* The collector's [kotlinx.coroutines.CoroutineScope] (from [callbackFlow]) is used as
68+
* the listener's scope — when the collection is canceled.
69+
*/
70+
@SuppressLint("MissingPermission")
71+
override val wakeWordDetected: Flow<String> = callbackFlow {
72+
val model = resolveModelFromPhrase()
73+
74+
listener.start(coroutineScope = this, modelConfig = model)
75+
76+
wakeWordChannel.consumeEach {
77+
trySend(it)
78+
}
79+
}
80+
81+
/**
82+
* Resolves the [MicroWakeWordModelConfig] to use for wake word detection.
83+
*
84+
* When [wakeWordPhrase] is set, searches available models for a matching
85+
* [MicroWakeWordModelConfig.wakeWord]. If no match is found or no phrase was provided,
86+
* falls back to the first available model.
87+
*/
88+
private suspend fun resolveModelFromPhrase(): MicroWakeWordModelConfig {
89+
val model = assistConfigManager.getAvailableModels().find { it.wakeWord == wakeWordPhrase }
90+
return if (model != null) {
91+
Timber.d("Resolved wake word model from phrase: '${model.wakeWord}'")
92+
model
93+
} else {
94+
val firstModel = assistConfigManager.getAvailableModels().first()
95+
Timber.w("Could not resolve wake word model for phrase: '$wakeWordPhrase' falling back to $firstModel")
96+
firstModel
97+
}
98+
}
99+
}

app/src/main/kotlin/io/homeassistant/companion/android/assist/service/AssistVoiceInteractionService.kt

Lines changed: 2 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,7 @@ import android.app.NotificationChannel
77
import android.app.NotificationManager
88
import android.app.PendingIntent
99
import android.content.BroadcastReceiver
10+
import android.content.ComponentName
1011
import android.content.Context
1112
import android.content.Intent
1213
import android.content.IntentFilter
@@ -27,9 +28,6 @@ import io.homeassistant.companion.android.common.R as commonR
2728
import io.homeassistant.companion.android.common.util.CHANNEL_ASSIST_LISTENING
2829
import io.homeassistant.companion.android.settings.assist.AssistConfigManager
2930
import javax.inject.Inject
30-
import kotlin.time.Clock
31-
import kotlin.time.Duration.Companion.seconds
32-
import kotlin.time.Instant
3331
import kotlinx.coroutines.CoroutineScope
3432
import kotlinx.coroutines.Dispatchers
3533
import kotlinx.coroutines.SupervisorJob
@@ -54,9 +52,6 @@ import timber.log.Timber
5452
*/
5553
@AndroidEntryPoint
5654
class AssistVoiceInteractionService : VoiceInteractionService() {
57-
@Inject
58-
lateinit var clock: Clock
59-
6055
@Inject
6156
lateinit var assistConfigManager: AssistConfigManager
6257

@@ -72,7 +67,6 @@ class AssistVoiceInteractionService : VoiceInteractionService() {
7267
onListenerFailed = ::onListenerFailed,
7368
)
7469
}
75-
private var lastTriggerTime: Instant? = null
7670
private var isServiceReady = false
7771

7872
private val actionReceiver = object : BroadcastReceiver() {
@@ -218,21 +212,10 @@ class AssistVoiceInteractionService : VoiceInteractionService() {
218212
}
219213

220214
private fun onWakeWordDetected(model: MicroWakeWordModelConfig) {
221-
// Always broadcast for observers (e.g. settings test mode) regardless of debounce
222215
sendBroadcast(
223216
Intent(ACTION_WAKE_WORD_DETECTED).setPackage(packageName),
224217
)
225218

226-
val now = clock.now()
227-
val lastTrigger = lastTriggerTime
228-
229-
// Debounce: only trigger if enough time has passed since last detection
230-
if (lastTrigger != null && (now - lastTrigger) <= DEBOUNCE_DURATION) {
231-
Timber.d("Wake word detected but within debounce period, ignoring")
232-
return
233-
}
234-
235-
lastTriggerTime = now
236219
Timber.i("Wake word '${model.wakeWord}' detected, launching Assist")
237220

238221
// Stop the listener before launching Assist to release the microphone.
@@ -340,13 +323,11 @@ class AssistVoiceInteractionService : VoiceInteractionService() {
340323

341324
private const val ACTION_WAKE_WORD_DETECTED = "io.homeassistant.companion.android.WAKE_WORD_DETECTED"
342325

343-
private val DEBOUNCE_DURATION = 3.seconds
344-
345326
/**
346327
* Check if this VoiceInteractionService is currently the active system assistant.
347328
*/
348329
fun isActiveService(context: Context): Boolean {
349-
val componentName = android.content.ComponentName(
330+
val componentName = ComponentName(
350331
context,
351332
AssistVoiceInteractionService::class.java,
352333
)

0 commit comments

Comments
 (0)