fix: voice engine rewrite — continuous=false restart-per-utterance, _scheduleRecStart, 12s heartbeat, clean TTS handoff

This commit is contained in:
2026-05-31 19:34:00 +00:00
parent af11d80216
commit 2ddce52c9a
+36 -30
View File
@@ -1012,13 +1012,21 @@ function showApp(name, greeting, silent = false) {
exitVoiceMode();
}
}, 60000);
// Watchdog: reset isSpeaking if stuck (TTS error left it true)
// Watchdog: reset isSpeaking if stuck; heartbeat keeps mic alive
setInterval(() => {
if (isSpeaking && !_ttsAudio && !window.speechSynthesis?.speaking) {
isSpeaking = false;
if (isListening) try { recognition?.start(); } catch(_) {}
if (isListening) _scheduleRecStart(200);
}
}, 5000);
}, 4000);
// Heartbeat: if mic should be on but recognition has gone quiet, nudge it
setInterval(() => {
if (isListening && !isSpeaking) {
try {
recognition.start(); // throws if already running — that's fine
} catch(_) {}
}
}, 12000);
startListening();
loadNetwork();
loadHA();
@@ -1759,32 +1767,29 @@ function initVoice() {
return;
}
recognition = new SR();
recognition.continuous = true;
recognition.continuous = false; // restart-per-utterance — most reliable in Chrome
recognition.interimResults = false;
recognition.lang = 'en-US';
recognition.maxAlternatives = 1;
recognition.onresult = (e) => {
if (isSpeaking) return; // ignore mic during TTS playback
const result = e.results[e.results.length - 1];
if (!result.isFinal) return;
const transcript = result[0].transcript.trim();
if (isSpeaking) return;
const transcript = (e.results[0][0].transcript || '').trim();
if (!transcript) return;
const lc = transcript.toLowerCase();
if (!voiceMode) {
// Sleeping — full wake phrase required
if (WAKE_PHRASES.some(p => lc.includes(p))) enterVoiceMode();
} else if (!voiceMuted) {
// Awake — "Jarvis <cmd>" triggers command; active window allows free speech
voiceLastCmd = Date.now(); // any detected speech resets 30-min sleep timer
voiceLastCmd = Date.now();
const inWindow = voiceActive > 0 && (Date.now() - voiceActive) < VOICE_ACTIVE_MS;
let cmd = null;
if (lc.startsWith(CMD_PREFIX)) {
cmd = transcript.substring(CMD_PREFIX.length).trim();
} else if (inWindow) {
cmd = transcript; // active window: no prefix needed
cmd = transcript;
}
if (cmd) {
voiceActive = Date.now(); // reset 17s window
voiceActive = Date.now();
document.getElementById('textInput').value = cmd;
sendMessage();
}
@@ -1792,9 +1797,9 @@ function initVoice() {
};
recognition.onend = () => {
// Only restart when not speaking — _resumeMic() handles restart after TTS
// Restart immediately unless TTS is playing or mic is off
if (isListening && !isSpeaking) {
setTimeout(() => { try { recognition.start(); } catch(_) {} }, 150);
_scheduleRecStart(100);
}
};
@@ -1808,7 +1813,7 @@ function initVoice() {
updateMicBtn();
addMessage('system', 'No microphone detected. Please connect a microphone and try again.');
}
// no-speech and aborted are normal — onend will restart
// no-speech / aborted / network: onend will fire and restart
};
}
@@ -1861,6 +1866,16 @@ function toggleVoice() {
}
}
let _recTimer = null;
function _scheduleRecStart(ms = 100) {
clearTimeout(_recTimer);
_recTimer = setTimeout(() => {
if (isListening && !isSpeaking) {
try { recognition.start(); } catch(_) {}
}
}, ms);
}
function startListening() {
if (!recognition) {
if (!window.isSecureContext) {
@@ -1871,7 +1886,7 @@ function startListening() {
return;
}
isListening = true;
try { recognition.start(); } catch(_) {}
_scheduleRecStart(50);
}
function stopListening() {
@@ -1879,6 +1894,7 @@ function stopListening() {
voiceMode = false;
voiceMuted = false;
updateMicBtn();
clearTimeout(_recTimer);
try { recognition.abort(); } catch(_) {}
}
@@ -1917,13 +1933,8 @@ async function speak(text) {
const _resumeMic = () => {
isSpeaking = false;
reactor?.classList.remove('speaking');
if (isListening) {
// Abort any stale session then restart cleanly
setTimeout(() => {
try { recognition?.abort(); } catch(_) {}
setTimeout(() => { try { recognition?.start(); } catch(_) {} }, 150);
}, 200);
}
// onend will fire from the abort we did before TTS, and restart cleanly
if (isListening) _scheduleRecStart(400);
};
try {
const res = await fetch('/api/tts', {
@@ -1956,12 +1967,7 @@ function _speakFallback(text) {
utter.onend = () => {
reactor?.classList.remove('speaking');
isSpeaking = false;
if (isListening) {
setTimeout(() => {
try { recognition?.abort(); } catch(_) {}
setTimeout(() => { try { recognition?.start(); } catch(_) {} }, 150);
}, 200);
}
if (isListening) _scheduleRecStart(400);
};
synth.speak(utter);
}