fix: voice engine rewrite — continuous=false restart-per-utterance, _scheduleRecStart, 12s heartbeat, clean TTS handoff

2026-06-30 17:50:23 -05:00 · 2026-05-31 19:34:00 +00:00
parent af11d80216
commit 2ddce52c9a
1 changed files with 36 additions and 30 deletions
@@ -1012,13 +1012,21 @@ function showApp(name, greeting, silent = false) {
      exitVoiceMode();
    }
  }, 60000);
-  // Watchdog: reset isSpeaking if stuck (TTS error left it true)
+  // Watchdog: reset isSpeaking if stuck; heartbeat keeps mic alive
  setInterval(() => {
    if (isSpeaking && !_ttsAudio && !window.speechSynthesis?.speaking) {
      isSpeaking = false;
-      if (isListening) try { recognition?.start(); } catch(_) {}
+      if (isListening) _scheduleRecStart(200);
    }
-  }, 5000);
+  }, 4000);
+  // Heartbeat: if mic should be on but recognition has gone quiet, nudge it
+  setInterval(() => {
+    if (isListening && !isSpeaking) {
+      try {
+        recognition.start(); // throws if already running — that's fine
+      } catch(_) {}
+    }
+  }, 12000);
  startListening();
  loadNetwork();
  loadHA();
@@ -1759,32 +1767,29 @@ function initVoice() {
    return;
  }
  recognition = new SR();
-  recognition.continuous = true;
+  recognition.continuous = false;   // restart-per-utterance — most reliable in Chrome
  recognition.interimResults = false;
  recognition.lang = 'en-US';
+  recognition.maxAlternatives = 1;

  recognition.onresult = (e) => {
-    if (isSpeaking) return; // ignore mic during TTS playback
-    const result = e.results[e.results.length - 1];
-    if (!result.isFinal) return;
-    const transcript = result[0].transcript.trim();
+    if (isSpeaking) return;
+    const transcript = (e.results[0][0].transcript || '').trim();
    if (!transcript) return;
    const lc = transcript.toLowerCase();
    if (!voiceMode) {
-      // Sleeping — full wake phrase required
      if (WAKE_PHRASES.some(p => lc.includes(p))) enterVoiceMode();
    } else if (!voiceMuted) {
-      // Awake — "Jarvis <cmd>" triggers command; active window allows free speech
-      voiceLastCmd = Date.now(); // any detected speech resets 30-min sleep timer
+      voiceLastCmd = Date.now();
      const inWindow = voiceActive > 0 && (Date.now() - voiceActive) < VOICE_ACTIVE_MS;
      let cmd = null;
      if (lc.startsWith(CMD_PREFIX)) {
        cmd = transcript.substring(CMD_PREFIX.length).trim();
      } else if (inWindow) {
-        cmd = transcript; // active window: no prefix needed
+        cmd = transcript;
      }
      if (cmd) {
-        voiceActive = Date.now(); // reset 17s window
+        voiceActive = Date.now();
        document.getElementById('textInput').value = cmd;
        sendMessage();
      }
@@ -1792,9 +1797,9 @@ function initVoice() {
  };

  recognition.onend = () => {
-    // Only restart when not speaking — _resumeMic() handles restart after TTS
+    // Restart immediately unless TTS is playing or mic is off
    if (isListening && !isSpeaking) {
-      setTimeout(() => { try { recognition.start(); } catch(_) {} }, 150);
+      _scheduleRecStart(100);
    }
  };

@@ -1808,7 +1813,7 @@ function initVoice() {
      updateMicBtn();
      addMessage('system', 'No microphone detected. Please connect a microphone and try again.');
    }
-    // no-speech and aborted are normal — onend will restart
+    // no-speech / aborted / network: onend will fire and restart
  };
 }

@@ -1861,6 +1866,16 @@ function toggleVoice() {
  }
 }

+let _recTimer = null;
+function _scheduleRecStart(ms = 100) {
+  clearTimeout(_recTimer);
+  _recTimer = setTimeout(() => {
+    if (isListening && !isSpeaking) {
+      try { recognition.start(); } catch(_) {}
+    }
+  }, ms);
+}
+
 function startListening() {
  if (!recognition) {
    if (!window.isSecureContext) {
@@ -1871,7 +1886,7 @@ function startListening() {
    return;
  }
  isListening = true;
-  try { recognition.start(); } catch(_) {}
+  _scheduleRecStart(50);
 }

 function stopListening() {
@@ -1879,6 +1894,7 @@ function stopListening() {
  voiceMode = false;
  voiceMuted = false;
  updateMicBtn();
+  clearTimeout(_recTimer);
  try { recognition.abort(); } catch(_) {}
 }

@@ -1917,13 +1933,8 @@ async function speak(text) {
  const _resumeMic = () => {
    isSpeaking = false;
    reactor?.classList.remove('speaking');
-    if (isListening) {
-      // Abort any stale session then restart cleanly
-      setTimeout(() => {
-        try { recognition?.abort(); } catch(_) {}
-        setTimeout(() => { try { recognition?.start(); } catch(_) {} }, 150);
-      }, 200);
-    }
+    // onend will fire from the abort we did before TTS, and restart cleanly
+    if (isListening) _scheduleRecStart(400);
  };
  try {
    const res = await fetch('/api/tts', {
@@ -1956,12 +1967,7 @@ function _speakFallback(text) {
  utter.onend   = () => {
    reactor?.classList.remove('speaking');
    isSpeaking = false;
-    if (isListening) {
-      setTimeout(() => {
-        try { recognition?.abort(); } catch(_) {}
-        setTimeout(() => { try { recognition?.start(); } catch(_) {} }, 150);
-      }, 200);
-    }
+    if (isListening) _scheduleRecStart(400);
  };
  synth.speak(utter);
 }