fix: voice engine rewrite — continuous=false restart-per-utterance, _scheduleRecStart, 12s heartbeat, clean TTS handoff

This commit is contained in:
2026-05-31 19:34:00 +00:00
parent af11d80216
commit 2ddce52c9a
+36 -30
View File
@@ -1012,13 +1012,21 @@ function showApp(name, greeting, silent = false) {
exitVoiceMode(); exitVoiceMode();
} }
}, 60000); }, 60000);
// Watchdog: reset isSpeaking if stuck (TTS error left it true) // Watchdog: reset isSpeaking if stuck; heartbeat keeps mic alive
setInterval(() => { setInterval(() => {
if (isSpeaking && !_ttsAudio && !window.speechSynthesis?.speaking) { if (isSpeaking && !_ttsAudio && !window.speechSynthesis?.speaking) {
isSpeaking = false; isSpeaking = false;
if (isListening) try { recognition?.start(); } catch(_) {} if (isListening) _scheduleRecStart(200);
} }
}, 5000); }, 4000);
// Heartbeat: if mic should be on but recognition has gone quiet, nudge it
setInterval(() => {
if (isListening && !isSpeaking) {
try {
recognition.start(); // throws if already running — that's fine
} catch(_) {}
}
}, 12000);
startListening(); startListening();
loadNetwork(); loadNetwork();
loadHA(); loadHA();
@@ -1759,32 +1767,29 @@ function initVoice() {
return; return;
} }
recognition = new SR(); recognition = new SR();
recognition.continuous = true; recognition.continuous = false; // restart-per-utterance — most reliable in Chrome
recognition.interimResults = false; recognition.interimResults = false;
recognition.lang = 'en-US'; recognition.lang = 'en-US';
recognition.maxAlternatives = 1;
recognition.onresult = (e) => { recognition.onresult = (e) => {
if (isSpeaking) return; // ignore mic during TTS playback if (isSpeaking) return;
const result = e.results[e.results.length - 1]; const transcript = (e.results[0][0].transcript || '').trim();
if (!result.isFinal) return;
const transcript = result[0].transcript.trim();
if (!transcript) return; if (!transcript) return;
const lc = transcript.toLowerCase(); const lc = transcript.toLowerCase();
if (!voiceMode) { if (!voiceMode) {
// Sleeping — full wake phrase required
if (WAKE_PHRASES.some(p => lc.includes(p))) enterVoiceMode(); if (WAKE_PHRASES.some(p => lc.includes(p))) enterVoiceMode();
} else if (!voiceMuted) { } else if (!voiceMuted) {
// Awake — "Jarvis <cmd>" triggers command; active window allows free speech voiceLastCmd = Date.now();
voiceLastCmd = Date.now(); // any detected speech resets 30-min sleep timer
const inWindow = voiceActive > 0 && (Date.now() - voiceActive) < VOICE_ACTIVE_MS; const inWindow = voiceActive > 0 && (Date.now() - voiceActive) < VOICE_ACTIVE_MS;
let cmd = null; let cmd = null;
if (lc.startsWith(CMD_PREFIX)) { if (lc.startsWith(CMD_PREFIX)) {
cmd = transcript.substring(CMD_PREFIX.length).trim(); cmd = transcript.substring(CMD_PREFIX.length).trim();
} else if (inWindow) { } else if (inWindow) {
cmd = transcript; // active window: no prefix needed cmd = transcript;
} }
if (cmd) { if (cmd) {
voiceActive = Date.now(); // reset 17s window voiceActive = Date.now();
document.getElementById('textInput').value = cmd; document.getElementById('textInput').value = cmd;
sendMessage(); sendMessage();
} }
@@ -1792,9 +1797,9 @@ function initVoice() {
}; };
recognition.onend = () => { recognition.onend = () => {
// Only restart when not speaking — _resumeMic() handles restart after TTS // Restart immediately unless TTS is playing or mic is off
if (isListening && !isSpeaking) { if (isListening && !isSpeaking) {
setTimeout(() => { try { recognition.start(); } catch(_) {} }, 150); _scheduleRecStart(100);
} }
}; };
@@ -1808,7 +1813,7 @@ function initVoice() {
updateMicBtn(); updateMicBtn();
addMessage('system', 'No microphone detected. Please connect a microphone and try again.'); addMessage('system', 'No microphone detected. Please connect a microphone and try again.');
} }
// no-speech and aborted are normal — onend will restart // no-speech / aborted / network: onend will fire and restart
}; };
} }
@@ -1861,6 +1866,16 @@ function toggleVoice() {
} }
} }
let _recTimer = null;
function _scheduleRecStart(ms = 100) {
clearTimeout(_recTimer);
_recTimer = setTimeout(() => {
if (isListening && !isSpeaking) {
try { recognition.start(); } catch(_) {}
}
}, ms);
}
function startListening() { function startListening() {
if (!recognition) { if (!recognition) {
if (!window.isSecureContext) { if (!window.isSecureContext) {
@@ -1871,7 +1886,7 @@ function startListening() {
return; return;
} }
isListening = true; isListening = true;
try { recognition.start(); } catch(_) {} _scheduleRecStart(50);
} }
function stopListening() { function stopListening() {
@@ -1879,6 +1894,7 @@ function stopListening() {
voiceMode = false; voiceMode = false;
voiceMuted = false; voiceMuted = false;
updateMicBtn(); updateMicBtn();
clearTimeout(_recTimer);
try { recognition.abort(); } catch(_) {} try { recognition.abort(); } catch(_) {}
} }
@@ -1917,13 +1933,8 @@ async function speak(text) {
const _resumeMic = () => { const _resumeMic = () => {
isSpeaking = false; isSpeaking = false;
reactor?.classList.remove('speaking'); reactor?.classList.remove('speaking');
if (isListening) { // onend will fire from the abort we did before TTS, and restart cleanly
// Abort any stale session then restart cleanly if (isListening) _scheduleRecStart(400);
setTimeout(() => {
try { recognition?.abort(); } catch(_) {}
setTimeout(() => { try { recognition?.start(); } catch(_) {} }, 150);
}, 200);
}
}; };
try { try {
const res = await fetch('/api/tts', { const res = await fetch('/api/tts', {
@@ -1956,12 +1967,7 @@ function _speakFallback(text) {
utter.onend = () => { utter.onend = () => {
reactor?.classList.remove('speaking'); reactor?.classList.remove('speaking');
isSpeaking = false; isSpeaking = false;
if (isListening) { if (isListening) _scheduleRecStart(400);
setTimeout(() => {
try { recognition?.abort(); } catch(_) {}
setTimeout(() => { try { recognition?.start(); } catch(_) {} }, 150);
}, 200);
}
}; };
synth.speak(utter); synth.speak(utter);
} }