livekit · chenghao-mou · Jun 16, 2026 · Jun 17, 2026 · Jun 17, 2026 · Jun 17, 2026
@@ -490,7 +490,13 @@ async def drive_thru_agent(ctx: JobContext) -> None:
             voice="Sarah",
             extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},
         ),
-        expressive=presets.CUSTOMER_SERVICE,
+        expressive={
+            **presets.CUSTOMER_SERVICE,
+            "backchannel": {
+                "frequency": 0.8,
+                "source": ["mm-hmm", "yep", "got it", "uh huh", "gotcha"],
+            },
+        },
         max_tool_steps=10,
         # Flip user_state to "away" after 10s of mutual silence so we can
         # check whether they're still there (default is 15s).
@@ -577,6 +583,13 @@ def _on_user_state_changed(ev: UserStateChangedEvent) -> None:
     await session.start(agent=DriveThruAgent(userdata=userdata), room=ctx.room)
     await background_audio.start(room=ctx.room, agent_session=session)
 
+    session.generate_reply(
+        instructions=(
+            "Warmly greet the customer with something like "
+            "\"Hey, welcome to McDonald's! What can I get for you?\""
+        )
+    )
+
 
 if __name__ == "__main__":
     cli.run_app(server)
@@ -297,7 +297,7 @@ async def frontdesk_agent(ctx: JobContext):
             voice="Nadia",
             extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},
         ),
-        expressive=presets.CUSTOMER_SERVICE,
+        expressive={**presets.CUSTOMER_SERVICE, "backchannel": True},
         max_tool_steps=1,
         # Flip user_state to "away" after 10s of mutual silence so we can
         # check whether they're still there (default is 15s).

@@ -763,7 +763,7 @@ async def entrypoint(ctx: JobContext):
             voice="Luna",
             extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},
         ),
-        expressive=presets.HEALTHCARE,
+        expressive={**presets.HEALTHCARE, "backchannel": True},
         preemptive_generation=True,
         # Flip user_state to "away" after 10s of mutual silence so we can
         # check whether they're still there (default is 15s).

@@ -60,8 +60,6 @@
     tool_use_judge,
 )
 from livekit.agents.voice import UserStateChangedEvent, presets
-from livekit.plugins import silero
-from livekit.plugins.turn_detector.multilingual import MultilingualModel
 
 load_dotenv()
 
@@ -674,9 +672,7 @@ async def hotel_receptionist_agent(ctx: JobContext) -> None:
             voice="Ashley",
             extra_kwargs={"delivery_mode": "CREATIVE", "speaking_rate": 1.1},
         ),
-        expressive=presets.CUSTOMER_SERVICE,
-        turn_detection=MultilingualModel(),
-        vad=silero.VAD.load(),
+        expressive={**presets.CUSTOMER_SERVICE, "backchannel": True},
         max_tool_steps=5,
         # Flip user_state to "away" after 10s of mutual silence so we can
         # check whether they're still there (default is 15s).

@@ -79,7 +79,7 @@ async def entrypoint(ctx: JobContext) -> None:
             voice="Sarah",
             extra_kwargs={"delivery_mode": "CREATIVE"},
         ),
-        expressive=presets.CONVERSATIONAL,
+        expressive={**presets.CONVERSATIONAL, "backchannel": True},
         # Flip user_state to "away" after 10s of mutual silence so we can
         # check whether they're still there (default is 15s).
         user_away_timeout=10.0,

@@ -356,7 +356,7 @@ async def entrypoint(ctx: JobContext):
         tts=inference.TTS(
             "inworld/inworld-tts-2", voice="Nate", extra_kwargs={"delivery_mode": "CREATIVE"}
         ),
-        expressive=presets.CONVERSATIONAL,
+        expressive={**presets.CONVERSATIONAL, "backchannel": True},
         preemptive_generation=True,
         # Flip user_state to "away" after 10s of mutual silence so we can
         # check whether they're still there (default is 15s).

@@ -102,6 +102,7 @@ async def entrypoint(ctx: JobContext) -> None:
             "filter_markdown",
             text_transforms.replace({"LiveKit": "<<ˈ|l|aɪ|v|k|ɪ|t>>"}),
         ],
+        expressive={"backchannel": True},
     )
 
     @session.on("metrics_collected")

@@ -84,6 +84,8 @@
     AgentStateChangedEvent,
     AgentTask,
     AudioRecognition,
+    BackchannelConfig,
+    BackchannelOptions,
     CloseEvent,
     CloseReason,
     ConversationItemAddedEvent,
@@ -223,6 +225,8 @@ def __getattr__(name: str) -> typing.Any:
     "BackgroundAudioPlayer",
     "BuiltinAudioClip",
     "AudioConfig",
+    "BackchannelConfig",
+    "BackchannelOptions",
     "PlayHandle",
     "FlushSentinel",
     "LanguageCode",

@@ -7,6 +7,7 @@
     VoiceActivityVideoSampler,
 )
 from .audio_recognition import AudioRecognition
+from .backchannel import BackchannelConfig, BackchannelOptions
 from .events import (
     AgentEvent,
     AgentFalseInterruptionEvent,
@@ -40,6 +41,8 @@
     "Agent",
     "ModelSettings",
     "ExpressiveOptions",
+    "BackchannelConfig",
+    "BackchannelOptions",
     "presets",
     "AgentTask",
     "SpeechHandle",

@@ -94,6 +94,7 @@
 if TYPE_CHECKING:
     from ..llm import mcp
     from .agent_session import AgentSession, ExpressiveOptions
+    from .backchannel import _BackchannelEmitter
 
 
 _AgentActivityContextVar = contextvars.ContextVar["AgentActivity"]("agents_activity")
@@ -251,6 +252,9 @@ def __init__(self, agent: Agent, sess: AgentSession) -> None:
             self._interruption_by_audio_activity_enabled
         )
 
+        # short acknowledgments emitted during the user's pauses (None when disabled)
+        self._backchannel_emitter = self._resolve_backchannel_emitter()
+
         # speeches that audio playout finished but not done because of tool calls
         self._background_speeches: set[SpeechHandle] = set()
 
@@ -2073,9 +2077,8 @@ def on_eot_prediction(self, ev: EotPredictionEvent) -> None:
             host._on_eot_prediction(ev)
 
     def on_agent_backchannel_opportunity(self, ev: _AgentBackchannelOpportunityEvent) -> None:
-        # TODO: consume the backchannel opportunity internally (e.g. trigger a
-        # backchannel phrase). Kept internal for now — not surfaced as a public event.
-        pass
+        if self._backchannel_emitter is not None:
+            self._backchannel_emitter.maybe_emit(ev, self)
 
     def on_end_of_turn(self, info: _EndOfTurnInfo) -> bool:
         # IMPORTANT: This method is sync to avoid it being cancelled by the AudioRecognition
@@ -2380,6 +2383,33 @@ def retrieve_chat_ctx(self) -> llm.ChatContext:
 
     # endregion
 
+    def _resolve_backchannel_emitter(self) -> _BackchannelEmitter | None:
+        """Build the per-activity backchannel emitter, or ``None`` when disabled or
+        unsupported by the active turn detector."""
+        from .backchannel import _BackchannelEmitter, resolve_backchannel_options
+
+        expr = self._agent.expressive
+        if not utils.is_given(expr):
+            expr = self._session.options.expressive
+        if not expr:
+            return None  # expressive off → no backchannel
+
+        backchannel = expr.get("backchannel", NOT_GIVEN) if isinstance(expr, dict) else NOT_GIVEN
+        options = resolve_backchannel_options(backchannel)
+        if options is None:
+            return None
+
+        # only the cloud turn detector supplies the backchannel signal; the local
+        # mini model never emits it, so the opportunity hook would never fire
+        if not isinstance(self._turn_detection, inference.TurnDetector):
+            logger.warning(
+                "backchannel is enabled but the active turn detector does not provide a "
+                "backchannel signal (requires the LiveKit cloud turn detector); disabling it"
+            )
+            return None
+
+        return _BackchannelEmitter(options)
+
     def _resolve_expressive_options(self) -> ExpressiveOptions | None:
         """Resolve expressive from agent (overrides session). Returns None if disabled."""
         from . import presets

@@ -83,6 +83,8 @@
     from ..cli.tcp_console import TcpAudioInput, TcpAudioOutput
     from ..inference import LLMModels, STTModels, TTSModels
     from ..llm import mcp
+    from .backchannel import BackchannelConfig, BackchannelOptions
+    from .background_audio import AudioSource
     from .presets import Preset
     from .transcription.text_transforms import TextTransforms
 
@@ -160,6 +162,11 @@ class ExpressiveOptions(TypedDict, total=False):
     tts_instructions_template: Instructions | str
     tts_instructions_append: str
     audio_recognition_instructions_template: Instructions | str
+    backchannel: NotGivenOr[bool | list[str | AudioSource | BackchannelConfig] | BackchannelOptions]
+    """Short acknowledgments ("mm-hmm", "yeah") emitted during the user's pauses.
+    ``NOT_GIVEN``/``True`` → default two-tier set; ``False`` → off; a list → custom
+    clips; a ``BackchannelOptions`` → full control. Requires the LiveKit cloud turn
+    detector (it supplies the backchannel signal)."""
 
 
 DEFAULT_EXPRESSIVE_OPTIONS: ExpressiveOptions = ExpressiveOptions(

@@ -1511,17 +1511,34 @@ async def _bounce_eou_task(
                             if (
                                 backchannel_probability is not None
                                 and backchannel_threshold is not None
-                                and backchannel_probability >= backchannel_threshold
                             ):
-                                self._hooks.on_agent_backchannel_opportunity(
-                                    _AgentBackchannelOpportunityEvent(
-                                        probability=backchannel_probability,
-                                        threshold=backchannel_threshold,
-                                        end_of_turn_probability=end_of_turn_probability,
-                                        end_of_turn_threshold=unlikely_threshold,
-                                        language=self._last_language,
+                                if backchannel_probability >= backchannel_threshold:
+                                    logger.debug(
+                                        "backchannel opportunity",
+                                        extra={
+                                            "backchannel_probability": backchannel_probability,
+                                            "backchannel_threshold": backchannel_threshold,
+                                            "end_of_turn_probability": end_of_turn_probability,
+                                            "end_of_turn_threshold": unlikely_threshold,
+                                        },
+                                    )
+                                    self._hooks.on_agent_backchannel_opportunity(
+                                        _AgentBackchannelOpportunityEvent(
+                                            probability=backchannel_probability,
+                                            threshold=backchannel_threshold,
+                                            end_of_turn_probability=end_of_turn_probability,
+                                            end_of_turn_threshold=unlikely_threshold,
+                                            language=self._last_language,
+                                        )
+                                    )
+                                else:
+                                    logger.debug(
+                                        "backchannel skipped: below threshold",
+                                        extra={
+                                            "backchannel_probability": backchannel_probability,
+                                            "backchannel_threshold": backchannel_threshold,
+                                        },
                                     )
-                                )
                         if (
                             prediction_event is not None
                             and prediction_event.detection_delay is not None