From 017eac174c58a0cce1521b13ac2dd7d2018f0e2c Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 08:38:59 +0530
Subject: [PATCH 01/20] init

---
 pydantic_ai_slim/pydantic_ai/_agent_graph.py  | 35 +++++++++---
 pydantic_ai_slim/pydantic_ai/_run_context.py  |  2 +
 pydantic_ai_slim/pydantic_ai/_tool_manager.py | 20 +++++++
 .../pydantic_ai/agent/__init__.py             |  6 ++
 pydantic_ai_slim/pydantic_ai/tools.py         |  3 +
 .../pydantic_ai/toolsets/abstract.py          |  2 +
 .../pydantic_ai/toolsets/combined.py          |  1 +
 .../pydantic_ai/toolsets/function.py          | 15 +++++
 tests/test_tools.py                           | 56 +++++++++++++++++++
 9 files changed, 133 insertions(+), 7 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index 043c27c4f5..4400443920 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -95,6 +95,7 @@ class GraphAgentState:
     retries: int = 0
     run_step: int = 0
     run_id: str = dataclasses.field(default_factory=lambda: str(uuid.uuid4()))
+    tool_usage: dict[str, int] = dataclasses.field(default_factory=dict)
 
     def increment_retries(
         self,
@@ -821,6 +822,7 @@ def build_run_context(ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT
         else DEFAULT_INSTRUMENTATION_VERSION,
         run_step=ctx.state.run_step,
         run_id=ctx.state.run_id,
+        tool_usage=ctx.state.tool_usage,
     )
     validation_context = build_validation_context(ctx.deps.validation_context, run_context)
     run_context = replace(run_context, validation_context=validation_context)
@@ -1020,14 +1022,33 @@ async def _call_tools(
         projected_usage.tool_calls += len(tool_calls)
         usage_limits.check_before_tool_call(projected_usage)
 
+    calls_to_run: list[_messages.ToolCallPart] = []
+
+    # For each tool, check how many calls are going to be made and if it is over the limit
+    tool_call_counts: defaultdict[str, int] = defaultdict(int)
+    for call in tool_calls:
+        tool_call_counts[call.tool_name] += 1
+
     for call in tool_calls:
-        yield _messages.FunctionToolCallEvent(call)
+        current_tool_use = tool_manager.get_current_use_of_tool(call.tool_name)
+        max_tool_use = tool_manager.get_max_use_of_tool(call.tool_name)
+        if max_tool_use is not None and current_tool_use + tool_call_counts[call.tool_name] > max_tool_use:
+            return_part = _messages.ToolReturnPart(
+                tool_name=call.tool_name,
+                content=f'Tool call limit reached for tool "{call.tool_name}".',
+                tool_call_id=call.tool_call_id,
+            )
+            output_parts.append(return_part)
+            yield _messages.FunctionToolResultEvent(return_part)
+        else:
+            yield _messages.FunctionToolCallEvent(call)
+            calls_to_run.append(call)
 
     with tracer.start_as_current_span(
         'running tools',
         attributes={
-            'tools': [call.tool_name for call in tool_calls],
-            'logfire.msg': f'running {len(tool_calls)} tool{"" if len(tool_calls) == 1 else "s"}',
+            'tools': [call.tool_name for call in calls_to_run],
+            'logfire.msg': f'running {len(calls_to_run)} tool{"" if len(calls_to_run) == 1 else "s"}',
         },
     ):
 
@@ -1061,8 +1082,8 @@ async def handle_call_or_result(
 
                 return _messages.FunctionToolResultEvent(tool_part, content=tool_user_content)
 
-        if tool_manager.should_call_sequentially(tool_calls):
-            for index, call in enumerate(tool_calls):
+        if tool_manager.should_call_sequentially(calls_to_run):
+            for index, call in enumerate(calls_to_run):
                 if event := await handle_call_or_result(
                     _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
                     index,
@@ -1075,7 +1096,7 @@ async def handle_call_or_result(
                     _call_tool(tool_manager, call, tool_call_results.get(call.tool_call_id)),
                     name=call.tool_name,
                 )
-                for call in tool_calls
+                for call in calls_to_run
             ]
 
             pending = tasks
@@ -1092,7 +1113,7 @@ async def handle_call_or_result(
     output_parts.extend([user_parts_by_index[k] for k in sorted(user_parts_by_index)])
 
     _populate_deferred_calls(
-        tool_calls, deferred_calls_by_index, deferred_metadata_by_index, output_deferred_calls, output_deferred_metadata
+        calls_to_run, deferred_calls_by_index, deferred_metadata_by_index, output_deferred_calls, output_deferred_metadata
     )
 
 
diff --git a/pydantic_ai_slim/pydantic_ai/_run_context.py b/pydantic_ai_slim/pydantic_ai/_run_context.py
index b605bd8b54..d2013d055e 100644
--- a/pydantic_ai_slim/pydantic_ai/_run_context.py
+++ b/pydantic_ai_slim/pydantic_ai/_run_context.py
@@ -48,6 +48,8 @@ class RunContext(Generic[RunContextAgentDepsT]):
     """Instrumentation settings version, if instrumentation is enabled."""
     retries: dict[str, int] = field(default_factory=dict)
     """Number of retries for each tool so far."""
+    tool_usage: dict[str, int] = field(default_factory=dict)
+    """Number of calls for each tool so far."""
     tool_call_id: str | None = None
     """The ID of the tool call."""
     tool_name: str | None = None
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
index 9a9f93e1ff..8e6f5ad21d 100644
--- a/pydantic_ai_slim/pydantic_ai/_tool_manager.py
+++ b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -161,6 +161,8 @@ async def _call_tool(
                 partial_output=allow_partial,
             )
 
+            self.ctx.tool_usage[name] = self.ctx.tool_usage.get(name, 0) + 1
+
             pyd_allow_partial = 'trailing-strings' if allow_partial else 'off'
             validator = tool.args_validator
             if isinstance(call.args, str):
@@ -274,3 +276,21 @@ async def _call_function_tool(
                 )
 
         return tool_result
+
+    def get_max_use_of_tool(self, tool_name: str) -> int | None:
+        """Get the maximum number of uses allowed for a given tool, or `None` if unlimited."""
+        if self.tools is None:
+            raise ValueError('ToolManager has not been prepared for a run step yet')  # pragma: no cover
+
+        tool = self.tools.get(tool_name, None)
+        if tool is None:
+            return None
+
+        return tool.max_uses
+    
+    def get_current_use_of_tool(self, tool_name: str) -> int:
+        """Get the current number of uses of a given tool."""
+        if self.ctx is None:
+            raise ValueError('ToolManager has not been prepared for a run step yet')  # pragma: no cover
+
+        return self.ctx.tool_usage.get(tool_name, 0)
\ No newline at end of file
diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
index 19edb4a619..aed4a0811a 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
@@ -1032,6 +1032,7 @@ def tool(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
+        max_uses: int | None = None,
     ) -> Callable[[ToolFuncContext[AgentDepsT, ToolParams]], ToolFuncContext[AgentDepsT, ToolParams]]: ...
 
     def tool(
@@ -1050,6 +1051,7 @@ def tool(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
+        max_uses: int | None = None,
     ) -> Any:
         """Decorator to register a tool function which takes [`RunContext`][pydantic_ai.tools.RunContext] as its first argument.
 
@@ -1119,6 +1121,7 @@ def tool_decorator(
                 sequential=sequential,
                 requires_approval=requires_approval,
                 metadata=metadata,
+                max_uses=max_uses,
             )
             return func_
 
@@ -1143,6 +1146,7 @@ def tool_plain(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
+        max_uses: int | None = None,
     ) -> Callable[[ToolFuncPlain[ToolParams]], ToolFuncPlain[ToolParams]]: ...
 
     def tool_plain(
@@ -1161,6 +1165,7 @@ def tool_plain(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
+        max_uses: int | None = None,
     ) -> Any:
         """Decorator to register a tool function which DOES NOT take `RunContext` as an argument.
 
@@ -1228,6 +1233,7 @@ def tool_decorator(func_: ToolFuncPlain[ToolParams]) -> ToolFuncPlain[ToolParams
                 sequential=sequential,
                 requires_approval=requires_approval,
                 metadata=metadata,
+                max_uses=max_uses,
             )
             return func_
 
diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py
index dcd860b019..a801fc886d 100644
--- a/pydantic_ai_slim/pydantic_ai/tools.py
+++ b/pydantic_ai_slim/pydantic_ai/tools.py
@@ -264,6 +264,7 @@ class Tool(Generic[ToolAgentDepsT]):
     function: ToolFuncEither[ToolAgentDepsT]
     takes_ctx: bool
     max_retries: int | None
+    max_uses: int | None
     name: str
     description: str | None
     prepare: ToolPrepareFunc[ToolAgentDepsT] | None
@@ -286,6 +287,7 @@ def __init__(
         *,
         takes_ctx: bool | None = None,
         max_retries: int | None = None,
+        max_uses: int | None = None,
         name: str | None = None,
         description: str | None = None,
         prepare: ToolPrepareFunc[ToolAgentDepsT] | None = None,
@@ -364,6 +366,7 @@ async def prep_my_tool(
         )
         self.takes_ctx = self.function_schema.takes_ctx
         self.max_retries = max_retries
+        self.max_uses = max_uses
         self.name = name or function.__name__
         self.description = description or self.function_schema.description
         self.prepare = prepare
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py b/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py
index 98d9cd224f..a75bcdbd30 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py
@@ -52,6 +52,8 @@ class ToolsetTool(Generic[AgentDepsT]):
     """The tool definition for this tool, including the name, description, and parameters."""
     max_retries: int
     """The maximum number of retries to attempt if the tool call fails."""
+    max_uses: int
+    """The maximum number of uses allowed for this tool."""
     args_validator: SchemaValidator | SchemaValidatorProt
     """The Pydantic Core validator for the tool's arguments.
 
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/combined.py b/pydantic_ai_slim/pydantic_ai/toolsets/combined.py
index e095e4aa1f..0c4e9bd98b 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/combined.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/combined.py
@@ -77,6 +77,7 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[
                     toolset=tool_toolset,
                     tool_def=tool.tool_def,
                     max_retries=tool.max_retries,
+                    max_uses=tool.max_uses,
                     args_validator=tool.args_validator,
                     source_toolset=toolset,
                     source_tool=tool,
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/function.py b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
index e185ed0273..2c4f6aaebb 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/function.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
@@ -35,6 +35,7 @@ class FunctionToolset(AbstractToolset[AgentDepsT]):
 
     tools: dict[str, Tool[Any]]
     max_retries: int
+    max_uses: int | None
     _id: str | None
     docstring_format: DocstringFormat
     require_parameter_descriptions: bool
@@ -45,6 +46,7 @@ def __init__(
         tools: Sequence[Tool[AgentDepsT] | ToolFuncEither[AgentDepsT, ...]] = [],
         *,
         max_retries: int = 1,
+        max_uses: int | None = None,
         docstring_format: DocstringFormat = 'auto',
         require_parameter_descriptions: bool = False,
         schema_generator: type[GenerateJsonSchema] = GenerateToolJsonSchema,
@@ -60,6 +62,8 @@ def __init__(
             tools: The tools to add to the toolset.
             max_retries: The maximum number of retries for each tool during a run.
                 Applies to all tools, unless overridden when adding a tool.
+            max_uses: The maximum number of uses allowed for each tool during a run.
+                Applies to all tools, unless overridden when adding a tool.
             docstring_format: Format of tool docstring, see [`DocstringFormat`][pydantic_ai.tools.DocstringFormat].
                 Defaults to `'auto'`, such that the format is inferred from the structure of the docstring.
                 Applies to all tools, unless overridden when adding a tool.
@@ -81,6 +85,7 @@ def __init__(
         """
         self.max_retries = max_retries
         self._id = id
+        self.max_uses = max_uses
         self.docstring_format = docstring_format
         self.require_parameter_descriptions = require_parameter_descriptions
         self.schema_generator = schema_generator
@@ -111,6 +116,7 @@ def tool(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat | None = None,
         require_parameter_descriptions: bool | None = None,
@@ -129,6 +135,7 @@ def tool(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat | None = None,
         require_parameter_descriptions: bool | None = None,
@@ -205,6 +212,7 @@ def tool_decorator(
                 name=name,
                 description=description,
                 retries=retries,
+                max_uses=max_uses,
                 prepare=prepare,
                 docstring_format=docstring_format,
                 require_parameter_descriptions=require_parameter_descriptions,
@@ -225,6 +233,7 @@ def add_function(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat | None = None,
         require_parameter_descriptions: bool | None = None,
@@ -248,6 +257,7 @@ def add_function(
             description: The description of the tool, defaults to the function docstring.
             retries: The number of retries to allow for this tool, defaults to the agent's default retries,
                 which defaults to 1.
+            max_uses: The maximum number of uses allowed for this tool during a run. Defaults to None (unlimited).
             prepare: custom method to prepare the tool definition for each step, return `None` to omit this
                 tool from a given step. This is useful if you want to customise a tool at call time,
                 or omit it completely from a step. See [`ToolPrepareFunc`][pydantic_ai.tools.ToolPrepareFunc].
@@ -287,6 +297,7 @@ def add_function(
             name=name,
             description=description,
             max_retries=retries,
+            max_uses=max_uses,
             prepare=prepare,
             docstring_format=docstring_format,
             require_parameter_descriptions=require_parameter_descriptions,
@@ -308,6 +319,8 @@ def add_tool(self, tool: Tool[AgentDepsT]) -> None:
             raise UserError(f'Tool name conflicts with existing tool: {tool.name!r}')
         if tool.max_retries is None:
             tool.max_retries = self.max_retries
+        if tool.max_uses is None:
+            tool.max_uses = self.max_uses
         if self.metadata is not None:
             tool.metadata = self.metadata | (tool.metadata or {})
         self.tools[tool.name] = tool
@@ -316,6 +329,7 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[
         tools: dict[str, ToolsetTool[AgentDepsT]] = {}
         for original_name, tool in self.tools.items():
             max_retries = tool.max_retries if tool.max_retries is not None else self.max_retries
+            max_uses = tool.max_uses if tool.max_uses is not None else self.max_uses
             run_context = replace(
                 ctx,
                 tool_name=original_name,
@@ -337,6 +351,7 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[
                 toolset=self,
                 tool_def=tool_def,
                 max_retries=max_retries,
+                max_uses=max_uses,
                 args_validator=tool.function_schema.validator,
                 call_func=tool.function_schema.call,
                 is_async=tool.function_schema.is_async,
diff --git a/tests/test_tools.py b/tests/test_tools.py
index bcdf537994..c577b35022 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -1309,6 +1309,62 @@ def infinite_retry_tool(ctx: RunContext[None]) -> int:
     assert call_last_attempt == snapshot([False, False, False, False, False, True])
 
 
+def test_tool_max_uses():
+    agent = Agent(TestModel(), output_type=[str, DeferredToolRequests])
+
+    @agent.tool(max_uses=1)
+    def tool_with_max_use(ctx: RunContext[None]) -> str:
+        return 'Used'
+
+    # Force the agent to use this tool now
+
+    result = agent.run_sync('Hello')
+    assert result.output == snapshot('{"tool_with_max_use":"Used"}')
+    messages = result.all_messages()
+    assert messages == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='Hello',
+                        timestamp=IsDatetime(),
+                    )
+                ],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[
+                    ToolCallPart(
+                        tool_name='tool_with_max_use', args={}, tool_call_id='pyd_ai_tool_call_id__tool_with_max_use'
+                    )
+                ],
+                usage=RequestUsage(input_tokens=51, output_tokens=2),
+                model_name='test',
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='tool_with_max_use',
+                        content='Used',
+                        tool_call_id='pyd_ai_tool_call_id__tool_with_max_use',
+                        timestamp=IsDatetime(),
+                    )
+                ],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[TextPart(content='{"tool_with_max_use":"Used"}')],
+                usage=RequestUsage(input_tokens=52, output_tokens=6),
+                model_name='test',
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+        ]
+    )
+
+
 def test_tool_raises_call_deferred():
     agent = Agent(TestModel(), output_type=[str, DeferredToolRequests])
 

From 190e23026b03cdeee57fe1c5382afdb931f79014 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 14:53:37 +0530
Subject: [PATCH 02/20] docstrings

---
 pydantic_ai_slim/pydantic_ai/agent/__init__.py    | 2 ++
 pydantic_ai_slim/pydantic_ai/tools.py             | 1 +
 pydantic_ai_slim/pydantic_ai/toolsets/function.py | 1 +
 tests/test_tools.py                               | 2 --
 4 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
index aed4a0811a..9a8d8ac68c 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
@@ -1101,6 +1101,7 @@ async def spam(ctx: RunContext[str], y: float) -> float:
             requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False.
                 See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info.
             metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization.
+            max_uses: Optional maximum number of times this tool can be used during a run. Defaults to None (unlimited).
         """
 
         def tool_decorator(
@@ -1215,6 +1216,7 @@ async def spam(ctx: RunContext[str]) -> float:
             requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False.
                 See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info.
             metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization.
+            max_uses: Optional maximum number of times this tool can be used during a run. Defaults to None (unlimited).
         """
 
         def tool_decorator(func_: ToolFuncPlain[ToolParams]) -> ToolFuncPlain[ToolParams]:
diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py
index a801fc886d..bfccf18a97 100644
--- a/pydantic_ai_slim/pydantic_ai/tools.py
+++ b/pydantic_ai_slim/pydantic_ai/tools.py
@@ -339,6 +339,7 @@ async def prep_my_tool(
             takes_ctx: Whether the function takes a [`RunContext`][pydantic_ai.tools.RunContext] first argument,
                 this is inferred if unset.
             max_retries: Maximum number of retries allowed for this tool, set to the agent default if `None`.
+            max_uses: The maximum number of uses allowed for this tool during a run. Defaults to None (unlimited).
             name: Name of the tool, inferred from the function if `None`.
             description: Description of the tool, inferred from the function if `None`.
             prepare: custom method to prepare the tool definition for each step, return `None` to omit this
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/function.py b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
index 2c4f6aaebb..3a766283f7 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/function.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
@@ -181,6 +181,7 @@ async def spam(ctx: RunContext[str], y: float) -> float:
             description: The description of the tool,defaults to the function docstring.
             retries: The number of retries to allow for this tool, defaults to the agent's default retries,
                 which defaults to 1.
+            max_uses: The maximum number of uses allowed for this tool during a run. Defaults to None (unlimited).
             prepare: custom method to prepare the tool definition for each step, return `None` to omit this
                 tool from a given step. This is useful if you want to customise a tool at call time,
                 or omit it completely from a step. See [`ToolPrepareFunc`][pydantic_ai.tools.ToolPrepareFunc].
diff --git a/tests/test_tools.py b/tests/test_tools.py
index c577b35022..a96af78cc9 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -1316,8 +1316,6 @@ def test_tool_max_uses():
     def tool_with_max_use(ctx: RunContext[None]) -> str:
         return 'Used'
 
-    # Force the agent to use this tool now
-
     result = agent.run_sync('Hello')
     assert result.output == snapshot('{"tool_with_max_use":"Used"}')
     messages = result.all_messages()

From c9d7aede070c8eb4646a614decbaf828dc3c3060 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 15:00:38 +0530
Subject: [PATCH 03/20] docstrings + args

---
 pydantic_ai_slim/pydantic_ai/agent/__init__.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
index 9a8d8ac68c..2102f6206c 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
@@ -1024,6 +1024,7 @@ def tool(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat = 'auto',
         require_parameter_descriptions: bool = False,
@@ -1032,7 +1033,6 @@ def tool(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
-        max_uses: int | None = None,
     ) -> Callable[[ToolFuncContext[AgentDepsT, ToolParams]], ToolFuncContext[AgentDepsT, ToolParams]]: ...
 
     def tool(
@@ -1043,6 +1043,7 @@ def tool(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat = 'auto',
         require_parameter_descriptions: bool = False,
@@ -1051,7 +1052,6 @@ def tool(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
-        max_uses: int | None = None,
     ) -> Any:
         """Decorator to register a tool function which takes [`RunContext`][pydantic_ai.tools.RunContext] as its first argument.
 
@@ -1088,6 +1088,7 @@ async def spam(ctx: RunContext[str], y: float) -> float:
             description: The description of the tool, defaults to the function docstring.
             retries: The number of retries to allow for this tool, defaults to the agent's default retries,
                 which defaults to 1.
+            max_uses: The maximum number of uses allowed for this tool during a run. Defaults to None (unlimited).
             prepare: custom method to prepare the tool definition for each step, return `None` to omit this
                 tool from a given step. This is useful if you want to customise a tool at call time,
                 or omit it completely from a step. See [`ToolPrepareFunc`][pydantic_ai.tools.ToolPrepareFunc].
@@ -1101,7 +1102,6 @@ async def spam(ctx: RunContext[str], y: float) -> float:
             requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False.
                 See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info.
             metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization.
-            max_uses: Optional maximum number of times this tool can be used during a run. Defaults to None (unlimited).
         """
 
         def tool_decorator(
@@ -1114,6 +1114,7 @@ def tool_decorator(
                 name=name,
                 description=description,
                 retries=retries,
+                max_uses=max_uses,
                 prepare=prepare,
                 docstring_format=docstring_format,
                 require_parameter_descriptions=require_parameter_descriptions,
@@ -1122,7 +1123,6 @@ def tool_decorator(
                 sequential=sequential,
                 requires_approval=requires_approval,
                 metadata=metadata,
-                max_uses=max_uses,
             )
             return func_
 
@@ -1139,6 +1139,7 @@ def tool_plain(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat = 'auto',
         require_parameter_descriptions: bool = False,
@@ -1147,7 +1148,6 @@ def tool_plain(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
-        max_uses: int | None = None,
     ) -> Callable[[ToolFuncPlain[ToolParams]], ToolFuncPlain[ToolParams]]: ...
 
     def tool_plain(
@@ -1158,6 +1158,7 @@ def tool_plain(
         name: str | None = None,
         description: str | None = None,
         retries: int | None = None,
+        max_uses: int | None = None,
         prepare: ToolPrepareFunc[AgentDepsT] | None = None,
         docstring_format: DocstringFormat = 'auto',
         require_parameter_descriptions: bool = False,
@@ -1166,7 +1167,6 @@ def tool_plain(
         sequential: bool = False,
         requires_approval: bool = False,
         metadata: dict[str, Any] | None = None,
-        max_uses: int | None = None,
     ) -> Any:
         """Decorator to register a tool function which DOES NOT take `RunContext` as an argument.
 
@@ -1203,6 +1203,7 @@ async def spam(ctx: RunContext[str]) -> float:
             description: The description of the tool, defaults to the function docstring.
             retries: The number of retries to allow for this tool, defaults to the agent's default retries,
                 which defaults to 1.
+            max_uses: The maximum number of uses allowed for this tool during a run. Defaults to None (unlimited).
             prepare: custom method to prepare the tool definition for each step, return `None` to omit this
                 tool from a given step. This is useful if you want to customise a tool at call time,
                 or omit it completely from a step. See [`ToolPrepareFunc`][pydantic_ai.tools.ToolPrepareFunc].
@@ -1216,7 +1217,6 @@ async def spam(ctx: RunContext[str]) -> float:
             requires_approval: Whether this tool requires human-in-the-loop approval. Defaults to False.
                 See the [tools documentation](../deferred-tools.md#human-in-the-loop-tool-approval) for more info.
             metadata: Optional metadata for the tool. This is not sent to the model but can be used for filtering and tool behavior customization.
-            max_uses: Optional maximum number of times this tool can be used during a run. Defaults to None (unlimited).
         """
 
         def tool_decorator(func_: ToolFuncPlain[ToolParams]) -> ToolFuncPlain[ToolParams]:
@@ -1227,6 +1227,7 @@ def tool_decorator(func_: ToolFuncPlain[ToolParams]) -> ToolFuncPlain[ToolParams
                 name=name,
                 description=description,
                 retries=retries,
+                max_uses=max_uses,
                 prepare=prepare,
                 docstring_format=docstring_format,
                 require_parameter_descriptions=require_parameter_descriptions,
@@ -1235,7 +1236,6 @@ def tool_decorator(func_: ToolFuncPlain[ToolParams]) -> ToolFuncPlain[ToolParams
                 sequential=sequential,
                 requires_approval=requires_approval,
                 metadata=metadata,
-                max_uses=max_uses,
             )
             return func_
 

From 909716be502471c1cf58a04176f39004c43ad435 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 15:07:15 +0530
Subject: [PATCH 04/20] documentation

---
 docs/tools-advanced.md                        | 2 +-
 docs/tools.md                                 | 2 +-
 pydantic_ai_slim/pydantic_ai/_tool_manager.py | 1 -
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/docs/tools-advanced.md b/docs/tools-advanced.md
index b498f247f6..f304e56f0c 100644
--- a/docs/tools-advanced.md
+++ b/docs/tools-advanced.md
@@ -379,7 +379,7 @@ If a tool requires sequential/serial execution, you can pass the [`sequential`][
 Async functions are run on the event loop, while sync functions are offloaded to threads. To get the best performance, _always_ use an async function _unless_ you're doing blocking I/O (and there's no way to use a non-blocking library instead) or CPU-bound work (like `numpy` or `scikit-learn` operations), so that simple functions are not offloaded to threads unnecessarily.
 
 !!! note "Limiting tool executions"
-    You can cap tool executions within a run using [`UsageLimits(tool_calls_limit=...)`](agents.md#usage-limits). The counter increments only after a successful tool invocation. Output tools (used for [structured output](output.md)) are not counted in the `tool_calls` metric.
+    You can cap the total number of tool executions within a run using [`UsageLimits(tool_calls_limit=...)`](agents.md#usage-limits). For finer control, you can limit how many times a *specific* tool can be called by setting the `max_uses` parameter when registering the tool (e.g., `@agent.tool(max_uses=3)` or `Tool(func, max_uses=3)`). Once a tool reaches its `max_uses` limit, it is automatically removed from the available tools for subsequent steps in the run. The `tool_calls` counter increments only after a successful tool invocation. Output tools (used for [structured output](output.md)) are not counted in the `tool_calls` metric.
 
 ## See Also
 
diff --git a/docs/tools.md b/docs/tools.md
index 40dcf5c810..7d5b3c3211 100644
--- a/docs/tools.md
+++ b/docs/tools.md
@@ -361,7 +361,7 @@ _(This example is complete, it can be run "as is")_
 
 For more tool features and integrations, see:
 
-- [Advanced Tool Features](tools-advanced.md) - Custom schemas, dynamic tools, tool execution and retries
+- [Advanced Tool Features](tools-advanced.md) - Custom schemas, dynamic tools, tool execution, retries, and usage limits
 - [Toolsets](toolsets.md) - Managing collections of tools
 - [Builtin Tools](builtin-tools.md) - Native tools provided by LLM providers
 - [Common Tools](common-tools.md) - Ready-to-use tool implementations
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
index 8e6f5ad21d..231b380d3e 100644
--- a/pydantic_ai_slim/pydantic_ai/_tool_manager.py
+++ b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -287,7 +287,6 @@ def get_max_use_of_tool(self, tool_name: str) -> int | None:
             return None
 
         return tool.max_uses
-    
     def get_current_use_of_tool(self, tool_name: str) -> int:
         """Get the current number of uses of a given tool."""
         if self.ctx is None:

From 3880ab690eed249310f4c8371072b8f7d1c7c51a Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 15:33:04 +0530
Subject: [PATCH 05/20] type

---
 pydantic_ai_slim/pydantic_ai/_tool_manager.py   | 17 ++++++++++++++---
 .../pydantic_ai/toolsets/abstract.py            |  2 +-
 2 files changed, 15 insertions(+), 4 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
index 231b380d3e..eac3abdc8a 100644
--- a/pydantic_ai_slim/pydantic_ai/_tool_manager.py
+++ b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -66,11 +66,22 @@ async def for_run_step(self, ctx: RunContext[AgentDepsT]) -> ToolManager[AgentDe
 
     @property
     def tool_defs(self) -> list[ToolDefinition]:
-        """The tool definitions for the tools in this tool manager."""
-        if self.tools is None:
+        """The tool definitions for the tools in this tool manager.
+
+        Tools that have reached their `max_uses` limit are filtered out.
+        """
+        if self.tools is None or self.ctx is None:
             raise ValueError('ToolManager has not been prepared for a run step yet')  # pragma: no cover
 
-        return [tool.tool_def for tool in self.tools.values()]
+        result: list[ToolDefinition] = []
+        for tool in self.tools.values():
+            # Filter out tools that have reached their max_uses limit
+            if tool.max_uses is not None:
+                current_uses = self.ctx.tool_usage.get(tool.tool_def.name, 0)
+                if current_uses >= tool.max_uses:
+                    continue
+            result.append(tool.tool_def)
+        return result
 
     def should_call_sequentially(self, calls: list[ToolCallPart]) -> bool:
         """Whether to require sequential tool calls for a list of tool calls."""
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py b/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py
index a75bcdbd30..a99f0f49e2 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/abstract.py
@@ -52,7 +52,7 @@ class ToolsetTool(Generic[AgentDepsT]):
     """The tool definition for this tool, including the name, description, and parameters."""
     max_retries: int
     """The maximum number of retries to attempt if the tool call fails."""
-    max_uses: int
+    max_uses: int | None
     """The maximum number of uses allowed for this tool."""
     args_validator: SchemaValidator | SchemaValidatorProt
     """The Pydantic Core validator for the tool's arguments.

From 1d62583b049f20826f92c514614fe20544a23299 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 15:53:27 +0530
Subject: [PATCH 06/20] fix

---
 pydantic_ai_slim/pydantic_ai/_agent_graph.py  | 22 +++++++++++++++----
 pydantic_ai_slim/pydantic_ai/_tool_manager.py |  3 ++-
 2 files changed, 20 insertions(+), 5 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index 4400443920..9650f7a2ab 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -1001,6 +1001,17 @@ async def process_tool_calls(  # noqa: C901
         output_final_result.append(final_result)
 
 
+def _projection_count_of_tool_usage(
+    tool_call_counts: defaultdict[str, int], tool_calls: list[_messages.ToolCallPart]
+) -> None:
+    """Populate a count of tool usage based on the provided tool calls for this run step.
+
+    We will use this to make sure the calls do not exceed tool usage limits.
+    """
+    for call in tool_calls:
+        tool_call_counts[call.tool_name] += 1
+
+
 async def _call_tools(
     tool_manager: ToolManager[DepsT],
     tool_calls: list[_messages.ToolCallPart],
@@ -1024,10 +1035,9 @@ async def _call_tools(
 
     calls_to_run: list[_messages.ToolCallPart] = []
 
-    # For each tool, check how many calls are going to be made and if it is over the limit
+    # For each tool, check how many calls are going to be made
     tool_call_counts: defaultdict[str, int] = defaultdict(int)
-    for call in tool_calls:
-        tool_call_counts[call.tool_name] += 1
+    _projection_count_of_tool_usage(tool_call_counts, tool_calls)
 
     for call in tool_calls:
         current_tool_use = tool_manager.get_current_use_of_tool(call.tool_name)
@@ -1113,7 +1123,11 @@ async def handle_call_or_result(
     output_parts.extend([user_parts_by_index[k] for k in sorted(user_parts_by_index)])
 
     _populate_deferred_calls(
-        calls_to_run, deferred_calls_by_index, deferred_metadata_by_index, output_deferred_calls, output_deferred_metadata
+        calls_to_run,
+        deferred_calls_by_index,
+        deferred_metadata_by_index,
+        output_deferred_calls,
+        output_deferred_metadata,
     )
 
 
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
index eac3abdc8a..a9959da8fe 100644
--- a/pydantic_ai_slim/pydantic_ai/_tool_manager.py
+++ b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -298,9 +298,10 @@ def get_max_use_of_tool(self, tool_name: str) -> int | None:
             return None
 
         return tool.max_uses
+
     def get_current_use_of_tool(self, tool_name: str) -> int:
         """Get the current number of uses of a given tool."""
         if self.ctx is None:
             raise ValueError('ToolManager has not been prepared for a run step yet')  # pragma: no cover
 
-        return self.ctx.tool_usage.get(tool_name, 0)
\ No newline at end of file
+        return self.ctx.tool_usage.get(tool_name, 0)

From 114d2ebf15ca11be1c8de8c3d66efb8e37c234bf Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 16:07:17 +0530
Subject: [PATCH 07/20] better test

---
 tests/test_tools.py | 78 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 65 insertions(+), 13 deletions(-)

diff --git a/tests/test_tools.py b/tests/test_tools.py
index a96af78cc9..8a744df002 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -23,6 +23,7 @@
     PrefixedToolset,
     RetryPromptPart,
     RunContext,
+    RunUsage,
     TextPart,
     Tool,
     ToolCallPart,
@@ -1310,14 +1311,40 @@ def infinite_retry_tool(ctx: RunContext[None]) -> int:
 
 
 def test_tool_max_uses():
-    agent = Agent(TestModel(), output_type=[str, DeferredToolRequests])
+    """Test that a tool with max_uses=2 can only be called twice, and the third call is rejected."""
+    call_count = 0
+
+    def my_model(messages: list[ModelMessage], info: AgentInfo) -> ModelResponse:
+        nonlocal call_count
+        call_count += 1
+
+        if call_count == 1:
+            # First round: call the tool twice (will succeed, uses up the limit)
+            return ModelResponse(
+                parts=[
+                    ToolCallPart(tool_name='tool_with_max_use', args={}, tool_call_id='call_1'),
+                    ToolCallPart(tool_name='tool_with_max_use', args={}, tool_call_id='call_2'),
+                ]
+            )
+        elif call_count == 2:
+            # Second round: try to call the tool again (should be rejected)
+            return ModelResponse(
+                parts=[
+                    ToolCallPart(tool_name='tool_with_max_use', args={}, tool_call_id='call_3'),
+                ]
+            )
+        else:
+            # Third round: return final output
+            return ModelResponse(parts=[TextPart(content='Done')])
+
+    agent = Agent(FunctionModel(my_model), output_type=str)
 
-    @agent.tool(max_uses=1)
+    @agent.tool(max_uses=2)
     def tool_with_max_use(ctx: RunContext[None]) -> str:
         return 'Used'
 
     result = agent.run_sync('Hello')
-    assert result.output == snapshot('{"tool_with_max_use":"Used"}')
+    assert result.output == snapshot('Done')
     messages = result.all_messages()
     assert messages == snapshot(
         [
@@ -1332,12 +1359,11 @@ def tool_with_max_use(ctx: RunContext[None]) -> str:
             ),
             ModelResponse(
                 parts=[
-                    ToolCallPart(
-                        tool_name='tool_with_max_use', args={}, tool_call_id='pyd_ai_tool_call_id__tool_with_max_use'
-                    )
+                    ToolCallPart(tool_name='tool_with_max_use', args={}, tool_call_id='call_1'),
+                    ToolCallPart(tool_name='tool_with_max_use', args={}, tool_call_id='call_2'),
                 ],
-                usage=RequestUsage(input_tokens=51, output_tokens=2),
-                model_name='test',
+                usage=RequestUsage(input_tokens=51, output_tokens=4),
+                model_name=IsStr(),
                 timestamp=IsDatetime(),
                 run_id=IsStr(),
             ),
@@ -1346,16 +1372,42 @@ def tool_with_max_use(ctx: RunContext[None]) -> str:
                     ToolReturnPart(
                         tool_name='tool_with_max_use',
                         content='Used',
-                        tool_call_id='pyd_ai_tool_call_id__tool_with_max_use',
+                        tool_call_id='call_1',
                         timestamp=IsDatetime(),
-                    )
+                    ),
+                    ToolReturnPart(
+                        tool_name='tool_with_max_use',
+                        content='Used',
+                        tool_call_id='call_2',
+                        timestamp=IsDatetime(),
+                    ),
                 ],
                 run_id=IsStr(),
             ),
             ModelResponse(
-                parts=[TextPart(content='{"tool_with_max_use":"Used"}')],
-                usage=RequestUsage(input_tokens=52, output_tokens=6),
-                model_name='test',
+                parts=[
+                    ToolCallPart(tool_name='tool_with_max_use', args={}, tool_call_id='call_3'),
+                ],
+                usage=RequestUsage(input_tokens=53, output_tokens=6),
+                model_name=IsStr(),
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='tool_with_max_use',
+                        content='Tool call limit reached for tool "tool_with_max_use".',
+                        tool_call_id='call_3',
+                        timestamp=IsDatetime(),
+                    ),
+                ],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[TextPart(content='Done')],
+                usage=RequestUsage(input_tokens=61, output_tokens=7),
+                model_name=IsStr(),
                 timestamp=IsDatetime(),
                 run_id=IsStr(),
             ),

From 64f359d06af22c42060413ce92bbd85545ffb754 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 16:07:52 +0530
Subject: [PATCH 08/20] better test

---
 tests/test_tools.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tests/test_tools.py b/tests/test_tools.py
index 8a744df002..264893db6a 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -23,7 +23,6 @@
     PrefixedToolset,
     RetryPromptPart,
     RunContext,
-    RunUsage,
     TextPart,
     Tool,
     ToolCallPart,

From 8ba5cc815ef5bf054a0baa0952b883a50e73447a Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Wed, 10 Dec 2025 18:00:56 +0530
Subject: [PATCH 09/20] passing max_uses

---
 pydantic_ai_slim/pydantic_ai/_output.py           | 1 +
 pydantic_ai_slim/pydantic_ai/mcp.py               | 1 +
 pydantic_ai_slim/pydantic_ai/toolsets/external.py | 1 +
 pydantic_ai_slim/pydantic_ai/toolsets/fastmcp.py  | 1 +
 4 files changed, 4 insertions(+)

diff --git a/pydantic_ai_slim/pydantic_ai/_output.py b/pydantic_ai_slim/pydantic_ai/_output.py
index e3adfbd190..df1a67fc12 100644
--- a/pydantic_ai_slim/pydantic_ai/_output.py
+++ b/pydantic_ai_slim/pydantic_ai/_output.py
@@ -972,6 +972,7 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[
                 toolset=self,
                 tool_def=tool_def,
                 max_retries=self.max_retries,
+                max_uses=None,
                 args_validator=self.processors[tool_def.name].validator,
             )
             for tool_def in self._tool_defs
diff --git a/pydantic_ai_slim/pydantic_ai/mcp.py b/pydantic_ai_slim/pydantic_ai/mcp.py
index 227b8e1399..cc682c6fd3 100644
--- a/pydantic_ai_slim/pydantic_ai/mcp.py
+++ b/pydantic_ai_slim/pydantic_ai/mcp.py
@@ -590,6 +590,7 @@ def tool_for_tool_def(self, tool_def: ToolDefinition) -> ToolsetTool[Any]:
             toolset=self,
             tool_def=tool_def,
             max_retries=self.max_retries,
+            max_uses=None,
             args_validator=TOOL_SCHEMA_VALIDATOR,
         )
 
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/external.py b/pydantic_ai_slim/pydantic_ai/toolsets/external.py
index 9ec4a0e0c7..adf4080bd8 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/external.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/external.py
@@ -36,6 +36,7 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[
                 toolset=self,
                 tool_def=replace(tool_def, kind='external'),
                 max_retries=0,
+                max_uses=None,
                 args_validator=TOOL_SCHEMA_VALIDATOR,
             )
             for tool_def in self.tool_defs
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/fastmcp.py b/pydantic_ai_slim/pydantic_ai/toolsets/fastmcp.py
index 2d907266fd..4c5a72c21d 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/fastmcp.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/fastmcp.py
@@ -170,6 +170,7 @@ def tool_for_tool_def(self, tool_def: ToolDefinition) -> ToolsetTool[AgentDepsT]
             tool_def=tool_def,
             toolset=self,
             max_retries=self.max_retries,
+            max_uses=None,
             args_validator=TOOL_SCHEMA_VALIDATOR,
         )
 

From ad40e365b4699d7c905054b70ec42a2c92def9a6 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:13:49 +0530
Subject: [PATCH 10/20] Adding hard / soft limit enforcement on UsageLimit
 directly, not sure if it should be somewhere else altogether to cover
 granular limit exceeded as well?

---
 pydantic_ai_slim/pydantic_ai/_agent_graph.py |  6 ++++--
 pydantic_ai_slim/pydantic_ai/tools.py        |  4 ++--
 pydantic_ai_slim/pydantic_ai/usage.py        | 18 +++++++++++++++---
 3 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index ea3a72dbb1..b307818e7d 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -1047,10 +1047,12 @@ async def _call_tools(
     deferred_calls_by_index: dict[int, Literal['external', 'unapproved']] = {}
     deferred_metadata_by_index: dict[int, dict[str, Any] | None] = {}
 
+    can_make_tool_calls: bool = True
+
     if usage_limits.tool_calls_limit is not None:
         projected_usage = deepcopy(usage)
         projected_usage.tool_calls += len(tool_calls)
-        usage_limits.check_before_tool_call(projected_usage)
+        can_make_tool_calls = cast(bool, usage_limits.check_before_tool_call(projected_usage))
 
     calls_to_run: list[_messages.ToolCallPart] = []
 
@@ -1061,7 +1063,7 @@ async def _call_tools(
     for call in tool_calls:
         current_tool_use = tool_manager.get_current_use_of_tool(call.tool_name)
         max_tool_use = tool_manager.get_max_use_of_tool(call.tool_name)
-        if max_tool_use is not None and current_tool_use + tool_call_counts[call.tool_name] > max_tool_use:
+        if (max_tool_use is not None and current_tool_use + tool_call_counts[call.tool_name] > max_tool_use) or not can_make_tool_calls:
             return_part = _messages.ToolReturnPart(
                 tool_name=call.tool_name,
                 content=f'Tool call limit reached for tool "{call.tool_name}".',
diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py
index 64e0b4ea66..3cc37ba822 100644
--- a/pydantic_ai_slim/pydantic_ai/tools.py
+++ b/pydantic_ai_slim/pydantic_ai/tools.py
@@ -9,10 +9,10 @@
 from pydantic_core import SchemaValidator, core_schema
 from typing_extensions import ParamSpec, Self, TypeVar
 
-from . import _function_schema, _utils
+from . import ToolReturnPart, _function_schema, _utils
 from ._run_context import AgentDepsT, RunContext
 from .builtin_tools import AbstractBuiltinTool
-from .exceptions import ModelRetry
+from .exceptions import ModelRetry, UsageLimitExceeded
 from .messages import RetryPromptPart, ToolCallPart, ToolReturn
 
 __all__ = (
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 586af8dcfc..95a8510aba 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -3,13 +3,13 @@
 import dataclasses
 from copy import copy
 from dataclasses import dataclass, fields
-from typing import Annotated, Any
+from typing import Annotated, Any, Literal
 
 from genai_prices.data_snapshot import get_snapshot
 from pydantic import AliasChoices, BeforeValidator, Field
 from typing_extensions import deprecated, overload
 
-from . import _utils
+from . import ToolReturnPart, _utils
 from .exceptions import UsageLimitExceeded
 
 __all__ = 'RequestUsage', 'RunUsage', 'Usage', 'UsageLimits'
@@ -259,6 +259,11 @@ class UsageLimits:
     """The maximum number of requests allowed to the model."""
     tool_calls_limit: int | None = None
     """The maximum number of successful tool calls allowed to be executed."""
+    tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'soft'
+    """ Whether to enforce the tool calls limit in a 'soft' or 'hard' manner.
+        Hard limits will raise UsageLimitExceeded before making a tool call that would exceed the limit.
+        Soft limits will return a ToolReturnPart indicating the limit would be exceeded so the tool call cannot be made. You can customize this response in PromptConfig
+    """
     input_tokens_limit: int | None = None
     """The maximum number of input/prompt tokens allowed."""
     output_tokens_limit: int | None = None
@@ -290,6 +295,7 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
+        tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'soft',
     ) -> None:
         self.request_limit = request_limit
         self.tool_calls_limit = tool_calls_limit
@@ -297,6 +303,7 @@ def __init__(
         self.output_tokens_limit = output_tokens_limit
         self.total_tokens_limit = total_tokens_limit
         self.count_tokens_before_request = count_tokens_before_request
+        self.tool_calls_limit_enforcement = tool_calls_limit_enforcement
 
     @overload
     @deprecated(
@@ -328,6 +335,7 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
+
         # deprecated:
         request_tokens_limit: int | None = None,
         response_tokens_limit: int | None = None,
@@ -385,13 +393,17 @@ def check_tokens(self, usage: RunUsage) -> None:
         if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
             raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')
 
-    def check_before_tool_call(self, projected_usage: RunUsage) -> None:
+    def check_before_tool_call(self, projected_usage: RunUsage) -> None | bool:
         """Raises a `UsageLimitExceeded` exception if the next tool call(s) would exceed the tool call limit."""
         tool_calls_limit = self.tool_calls_limit
         tool_calls = projected_usage.tool_calls
         if tool_calls_limit is not None and tool_calls > tool_calls_limit:
+            if self.tool_calls_limit_enforcement == 'soft':
+                return False
             raise UsageLimitExceeded(
                 f'The next tool call(s) would exceed the tool_calls_limit of {tool_calls_limit} ({tool_calls=}).'
             )
+        
+        return True
 
     __repr__ = _utils.dataclasses_no_defaults_repr

From 7554e0da81fdda4b615a6e4c60d49df427122a23 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:19:50 +0530
Subject: [PATCH 11/20] lint

---
 pydantic_ai_slim/pydantic_ai/_agent_graph.py | 5 ++++-
 pydantic_ai_slim/pydantic_ai/tools.py        | 4 ++--
 pydantic_ai_slim/pydantic_ai/usage.py        | 5 ++---
 3 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index b307818e7d..c28f042d33 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -1063,11 +1063,14 @@ async def _call_tools(
     for call in tool_calls:
         current_tool_use = tool_manager.get_current_use_of_tool(call.tool_name)
         max_tool_use = tool_manager.get_max_use_of_tool(call.tool_name)
-        if (max_tool_use is not None and current_tool_use + tool_call_counts[call.tool_name] > max_tool_use) or not can_make_tool_calls:
+        if (
+            max_tool_use is not None and current_tool_use + tool_call_counts[call.tool_name] > max_tool_use
+        ) or not can_make_tool_calls:
             return_part = _messages.ToolReturnPart(
                 tool_name=call.tool_name,
                 content=f'Tool call limit reached for tool "{call.tool_name}".',
                 tool_call_id=call.tool_call_id,
+                # TODO: Add return kind and prompt_config here once supported by #3656
             )
             output_parts.append(return_part)
             yield _messages.FunctionToolResultEvent(return_part)
diff --git a/pydantic_ai_slim/pydantic_ai/tools.py b/pydantic_ai_slim/pydantic_ai/tools.py
index 3cc37ba822..64e0b4ea66 100644
--- a/pydantic_ai_slim/pydantic_ai/tools.py
+++ b/pydantic_ai_slim/pydantic_ai/tools.py
@@ -9,10 +9,10 @@
 from pydantic_core import SchemaValidator, core_schema
 from typing_extensions import ParamSpec, Self, TypeVar
 
-from . import ToolReturnPart, _function_schema, _utils
+from . import _function_schema, _utils
 from ._run_context import AgentDepsT, RunContext
 from .builtin_tools import AbstractBuiltinTool
-from .exceptions import ModelRetry, UsageLimitExceeded
+from .exceptions import ModelRetry
 from .messages import RetryPromptPart, ToolCallPart, ToolReturn
 
 __all__ = (
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 95a8510aba..8532ce5590 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -9,7 +9,7 @@
 from pydantic import AliasChoices, BeforeValidator, Field
 from typing_extensions import deprecated, overload
 
-from . import ToolReturnPart, _utils
+from . import _utils
 from .exceptions import UsageLimitExceeded
 
 __all__ = 'RequestUsage', 'RunUsage', 'Usage', 'UsageLimits'
@@ -335,7 +335,6 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
-
         # deprecated:
         request_tokens_limit: int | None = None,
         response_tokens_limit: int | None = None,
@@ -403,7 +402,7 @@ def check_before_tool_call(self, projected_usage: RunUsage) -> None | bool:
             raise UsageLimitExceeded(
                 f'The next tool call(s) would exceed the tool_calls_limit of {tool_calls_limit} ({tool_calls=}).'
             )
-        
+
         return True
 
     __repr__ = _utils.dataclasses_no_defaults_repr

From 3352ab3f088699f7da4793f67f26533336bcc5be Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:31:39 +0530
Subject: [PATCH 12/20] casting to none

---
 pydantic_ai_slim/pydantic_ai/_agent_graph.py | 2 +-
 pydantic_ai_slim/pydantic_ai/usage.py        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index c28f042d33..deee95b9ae 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -1052,7 +1052,7 @@ async def _call_tools(
     if usage_limits.tool_calls_limit is not None:
         projected_usage = deepcopy(usage)
         projected_usage.tool_calls += len(tool_calls)
-        can_make_tool_calls = cast(bool, usage_limits.check_before_tool_call(projected_usage))
+        can_make_tool_calls = bool(usage_limits.check_before_tool_call(projected_usage))
 
     calls_to_run: list[_messages.ToolCallPart] = []
 
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 8532ce5590..b4c985ae88 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -262,7 +262,7 @@ class UsageLimits:
     tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'soft'
     """ Whether to enforce the tool calls limit in a 'soft' or 'hard' manner.
         Hard limits will raise UsageLimitExceeded before making a tool call that would exceed the limit.
-        Soft limits will return a ToolReturnPart indicating the limit would be exceeded so the tool call cannot be made. You can customize this response in PromptConfig
+        Soft limits will return a ToolReturnPart indicating the limit would be exceeded so the tool call cannot be made. You can customize this response using PromptTemplates.
     """
     input_tokens_limit: int | None = None
     """The maximum number of input/prompt tokens allowed."""

From 89265a4d2872c32f4d14dc2bb9a338da6363ef8c Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:35:07 +0530
Subject: [PATCH 13/20] default should be hard as per the existing behaviour

---
 pydantic_ai_slim/pydantic_ai/usage.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index b4c985ae88..9928278e1f 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -259,7 +259,7 @@ class UsageLimits:
     """The maximum number of requests allowed to the model."""
     tool_calls_limit: int | None = None
     """The maximum number of successful tool calls allowed to be executed."""
-    tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'soft'
+    tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'hard'
     """ Whether to enforce the tool calls limit in a 'soft' or 'hard' manner.
         Hard limits will raise UsageLimitExceeded before making a tool call that would exceed the limit.
         Soft limits will return a ToolReturnPart indicating the limit would be exceeded so the tool call cannot be made. You can customize this response using PromptTemplates.

From 776e542cf4b4d5cea7e748c36ef61d00b7175477 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:40:12 +0530
Subject: [PATCH 14/20] Adding test to cover soft limit on global tool usage
 limit exceeded

---
 pydantic_ai_slim/pydantic_ai/usage.py |  2 ++
 tests/test_usage_limits.py            | 49 ++++++++++++++++++++++++++-
 2 files changed, 50 insertions(+), 1 deletion(-)

diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 9928278e1f..092aefadc4 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -335,6 +335,7 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
+        tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'hard',
         # deprecated:
         request_tokens_limit: int | None = None,
         response_tokens_limit: int | None = None,
@@ -345,6 +346,7 @@ def __init__(
         self.output_tokens_limit = output_tokens_limit or response_tokens_limit
         self.total_tokens_limit = total_tokens_limit
         self.count_tokens_before_request = count_tokens_before_request
+        self.tool_calls_limit_enforcement = tool_calls_limit_enforcement
 
     def has_token_limits(self) -> bool:
         """Returns `True` if this instance places any limits on token counts.
diff --git a/tests/test_usage_limits.py b/tests/test_usage_limits.py
index ac17fd0be5..654458067f 100644
--- a/tests/test_usage_limits.py
+++ b/tests/test_usage_limits.py
@@ -17,6 +17,7 @@
     ModelRequest,
     ModelResponse,
     RunContext,
+    TextPart,
     ToolCallPart,
     ToolReturnPart,
     UsageLimitExceeded,
@@ -28,7 +29,7 @@
 from pydantic_ai.output import ToolOutput
 from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
 
-from .conftest import IsNow, IsStr
+from .conftest import IsDatetime, IsNow, IsStr
 
 pytestmark = pytest.mark.anyio
 
@@ -270,6 +271,52 @@ async def ret_a(x: str) -> str:
     assert result.usage() == snapshot(RunUsage(requests=2, input_tokens=103, output_tokens=14, tool_calls=1))
 
 
+async def test_tool_call_limit_soft() -> None:
+    test_agent = Agent(TestModel())
+
+    @test_agent.tool_plain
+    async def ret_a(x: str) -> str:
+        return f'{x}-apple'
+
+    result = await test_agent.run(
+        'Hello', usage_limits=UsageLimits(tool_calls_limit=0, tool_calls_limit_enforcement='soft')
+    )
+
+    assert result.all_messages() == snapshot(
+        [
+            ModelRequest(
+                parts=[UserPromptPart(content='Hello', timestamp=IsDatetime())],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[ToolCallPart(tool_name='ret_a', args={'x': 'a'}, tool_call_id=IsStr())],
+                usage=RequestUsage(input_tokens=51, output_tokens=5),
+                model_name='test',
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='ret_a',
+                        content='Tool call limit reached for tool "ret_a".',
+                        tool_call_id=IsStr(),
+                        timestamp=IsDatetime(),
+                    )
+                ],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[TextPart(content='{"ret_a":"Tool call limit reached for tool \\"ret_a\\"."}')],
+                usage=RequestUsage(input_tokens=59, output_tokens=16),
+                model_name='test',
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+        ]
+    )
+
+
 async def test_output_tool_not_counted() -> None:
     """Test that output tools are not counted in tool_calls usage metric."""
     test_agent = Agent(TestModel())

From f981f991d0bd083423ab28f63ba21881226a577a Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:40:41 +0530
Subject: [PATCH 15/20] renaming test

---
 tests/test_usage_limits.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/test_usage_limits.py b/tests/test_usage_limits.py
index 654458067f..96ff306a19 100644
--- a/tests/test_usage_limits.py
+++ b/tests/test_usage_limits.py
@@ -271,7 +271,7 @@ async def ret_a(x: str) -> str:
     assert result.usage() == snapshot(RunUsage(requests=2, input_tokens=103, output_tokens=14, tool_calls=1))
 
 
-async def test_tool_call_limit_soft() -> None:
+async def test_tool_call_soft_limit() -> None:
     test_agent = Agent(TestModel())
 
     @test_agent.tool_plain

From 34b830a2ba9135ed5d912904bd31fff37f17ca4c Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 20:56:42 +0530
Subject: [PATCH 16/20] Fixing inconsitency in one of the docstrings

---
 pydantic_ai_slim/pydantic_ai/toolsets/function.py | 2 --
 pydantic_ai_slim/pydantic_ai/usage.py             | 2 +-
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/function.py b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
index 30b6ec8291..d611d907d6 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/function.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
@@ -73,8 +73,6 @@ def __init__(
                 Applies to all tools, unless overridden when adding a tool.
             max_uses: The maximum number of uses allowed for each tool during a run.
                 Applies to all tools, unless overridden when adding a tool.
-            max_uses: The maximum number of uses allowed for each tool during a run.
-                Applies to all tools, unless overridden when adding a tool.
             timeout: Timeout in seconds for tool execution. If a tool takes longer than this,
                 a retry prompt is returned to the model. Individual tools can override this with their own timeout.
                 Defaults to None (no timeout).
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 092aefadc4..0a07fab953 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -295,7 +295,7 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
-        tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'soft',
+        tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'hard',
     ) -> None:
         self.request_limit = request_limit
         self.tool_calls_limit = tool_calls_limit

From 364295b4debd711bc3a4ded7f58ae6446219681a Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 21:45:09 +0530
Subject: [PATCH 17/20] Adding doc and max_uses to runContext for use of the
 users

---
 docs/tools-advanced.md                        | 30 +++++++++++++++++++
 pydantic_ai_slim/pydantic_ai/_run_context.py  |  2 ++
 pydantic_ai_slim/pydantic_ai/_tool_manager.py |  1 +
 .../pydantic_ai/toolsets/function.py          |  1 +
 4 files changed, 34 insertions(+)

diff --git a/docs/tools-advanced.md b/docs/tools-advanced.md
index 57f16a2abc..bd7bfcd0d1 100644
--- a/docs/tools-advanced.md
+++ b/docs/tools-advanced.md
@@ -413,6 +413,36 @@ Async functions are run on the event loop, while sync functions are offloaded to
 !!! note "Limiting tool executions"
     You can cap the total number of tool executions within a run using [`UsageLimits(tool_calls_limit=...)`](agents.md#usage-limits). For finer control, you can limit how many times a *specific* tool can be called by setting the `max_uses` parameter when registering the tool (e.g., `@agent.tool(max_uses=3)` or `Tool(func, max_uses=3)`). Once a tool reaches its `max_uses` limit, it is automatically removed from the available tools for subsequent steps in the run. The `tool_calls` counter increments only after a successful tool invocation. Output tools (used for [structured output](output.md)) are not counted in the `tool_calls` metric.
 
+#### Raising Hard Errors on Tool Usage Limits
+
+By default, when a tool reaches its `max_uses` limit, it is silently removed from the available tools. If you want to raise an error instead, you can use a [`prepare`](#tool-prepare) function to check the tool usage and raise a [`UsageLimitExceeded`][pydantic_ai.exceptions.UsageLimitExceeded] exception:
+
+```python {title="tool_max_uses_hard_error.py"}
+from typing import Any
+
+from pydantic_ai import Agent, RunContext, ToolDefinition
+from pydantic_ai.exceptions import UsageLimitExceeded
+
+agent = Agent('test')
+
+
+async def raise_on_limit(
+    ctx: RunContext[Any], tool_def: ToolDefinition
+) -> ToolDefinition | None:
+    if ctx.max_uses and ctx.tool_usage.get(tool_def.name, 0) >= ctx.max_uses:
+        raise UsageLimitExceeded(
+            f'Tool "{tool_def.name}" has reached its usage limit of {ctx.max_uses}.'
+        )
+    return tool_def
+
+
+@agent.tool(max_uses=2, prepare=raise_on_limit)
+def limited_tool(ctx: RunContext[None]) -> str:
+    return 'Tool executed'
+```
+
+In this example, when `limited_tool` is called more than twice, a `UsageLimitExceeded` error will be raised instead of silently removing the tool.
+
 #### Output Tool Calls
 
 When a model calls an [output tool](output.md#tool-output) in parallel with other tools, the agent's [`end_strategy`][pydantic_ai.agent.Agent.end_strategy] parameter controls how these tool calls are executed.
diff --git a/pydantic_ai_slim/pydantic_ai/_run_context.py b/pydantic_ai_slim/pydantic_ai/_run_context.py
index d2013d055e..ac9440c46a 100644
--- a/pydantic_ai_slim/pydantic_ai/_run_context.py
+++ b/pydantic_ai_slim/pydantic_ai/_run_context.py
@@ -58,6 +58,8 @@ class RunContext(Generic[RunContextAgentDepsT]):
     """Number of retries of this tool so far."""
     max_retries: int = 0
     """The maximum number of retries of this tool."""
+    max_uses: int = 0
+    """The maximum number of times this tool can be used in the run."""
     run_step: int = 0
     """The current step in the run."""
     tool_call_approved: bool = False
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
index cc5ff90a7f..d1724cd55a 100644
--- a/pydantic_ai_slim/pydantic_ai/_tool_manager.py
+++ b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -170,6 +170,7 @@ async def _call_tool(
                 max_retries=tool.max_retries,
                 tool_call_approved=approved,
                 partial_output=allow_partial,
+                max_uses=tool.max_uses,
             )
 
             self.ctx.tool_usage[name] = self.ctx.tool_usage.get(name, 0) + 1
diff --git a/pydantic_ai_slim/pydantic_ai/toolsets/function.py b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
index d611d907d6..ff4442e232 100644
--- a/pydantic_ai_slim/pydantic_ai/toolsets/function.py
+++ b/pydantic_ai_slim/pydantic_ai/toolsets/function.py
@@ -358,6 +358,7 @@ async def get_tools(self, ctx: RunContext[AgentDepsT]) -> dict[str, ToolsetTool[
                 tool_name=original_name,
                 retry=ctx.retries.get(original_name, 0),
                 max_retries=max_retries,
+                max_uses=max_uses if max_uses is not None else 0,
             )
             tool_def = await tool.prepare_tool_def(run_context)
             if not tool_def:

From 283a3f3acc742b3eec9067e66e6b840ae79beb94 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 22:35:15 +0530
Subject: [PATCH 18/20] Adding max_tool_calls, soft limit to agent. Using tool
 manager to check and enforce

---
 pydantic_ai_slim/pydantic_ai/_agent_graph.py  |  9 ++--
 pydantic_ai_slim/pydantic_ai/_run_context.py  |  2 +
 pydantic_ai_slim/pydantic_ai/_tool_manager.py | 16 ++++++
 .../pydantic_ai/agent/__init__.py             | 11 +++++
 .../pydantic_ai/agent/abstract.py             | 31 ++++++++++++
 pydantic_ai_slim/pydantic_ai/agent/wrapper.py |  5 ++
 .../pydantic_ai/durable_exec/dbos/_agent.py   | 28 +++++++++++
 .../durable_exec/prefect/_agent.py            | 25 ++++++++++
 .../durable_exec/temporal/_agent.py           | 25 ++++++++++
 pydantic_ai_slim/pydantic_ai/usage.py         | 17 +------
 tests/test_tools.py                           | 48 ++++++++++++++++++
 tests/test_usage_limits.py                    | 49 +------------------
 12 files changed, 200 insertions(+), 66 deletions(-)

diff --git a/pydantic_ai_slim/pydantic_ai/_agent_graph.py b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
index deee95b9ae..d5c78926f8 100644
--- a/pydantic_ai_slim/pydantic_ai/_agent_graph.py
+++ b/pydantic_ai_slim/pydantic_ai/_agent_graph.py
@@ -136,6 +136,7 @@ class GraphAgentDeps(Generic[DepsT, OutputDataT]):
     model_settings: ModelSettings | None
     usage_limits: _usage.UsageLimits
     max_result_retries: int
+    max_tool_calls: int | None
     end_strategy: EndStrategy
     get_instructions: Callable[[RunContext[DepsT]], Awaitable[str | None]]
 
@@ -818,6 +819,7 @@ def build_run_context(ctx: GraphRunContext[GraphAgentState, GraphAgentDeps[DepsT
         run_step=ctx.state.run_step,
         run_id=ctx.state.run_id,
         tool_usage=ctx.state.tool_usage,
+        max_tool_calls=ctx.deps.max_tool_calls,
     )
     validation_context = build_validation_context(ctx.deps.validation_context, run_context)
     run_context = replace(run_context, validation_context=validation_context)
@@ -1047,12 +1049,13 @@ async def _call_tools(
     deferred_calls_by_index: dict[int, Literal['external', 'unapproved']] = {}
     deferred_metadata_by_index: dict[int, dict[str, Any] | None] = {}
 
-    can_make_tool_calls: bool = True
-
     if usage_limits.tool_calls_limit is not None:
         projected_usage = deepcopy(usage)
         projected_usage.tool_calls += len(tool_calls)
-        can_make_tool_calls = bool(usage_limits.check_before_tool_call(projected_usage))
+        usage_limits.check_before_tool_call(projected_usage)
+
+    # Checks for soft limits(if any set on total tools)
+    can_make_tool_calls = tool_manager.can_make_tool_calls(len(tool_calls), deepcopy(usage))
 
     calls_to_run: list[_messages.ToolCallPart] = []
 
diff --git a/pydantic_ai_slim/pydantic_ai/_run_context.py b/pydantic_ai_slim/pydantic_ai/_run_context.py
index ac9440c46a..a017d3d9c5 100644
--- a/pydantic_ai_slim/pydantic_ai/_run_context.py
+++ b/pydantic_ai_slim/pydantic_ai/_run_context.py
@@ -68,6 +68,8 @@ class RunContext(Generic[RunContextAgentDepsT]):
     """Whether the output passed to an output validator is partial."""
     run_id: str | None = None
     """"Unique identifier for the agent run."""
+    max_tool_calls: int | None = None
+    """The maximum number of tool calls allowed during this run, or `None` if unlimited."""
 
     @property
     def last_attempt(self) -> bool:
diff --git a/pydantic_ai_slim/pydantic_ai/_tool_manager.py b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
index d1724cd55a..50188e7a72 100644
--- a/pydantic_ai_slim/pydantic_ai/_tool_manager.py
+++ b/pydantic_ai_slim/pydantic_ai/_tool_manager.py
@@ -304,3 +304,19 @@ def get_current_use_of_tool(self, tool_name: str) -> int:
             raise ValueError('ToolManager has not been prepared for a run step yet')  # pragma: no cover
 
         return self.ctx.tool_usage.get(tool_name, 0)
+
+    def _get_max_tool_calls(self) -> int | None:
+        """Get the maximum number of tool calls allowed during this run, or `None` if unlimited."""
+        if self.ctx is None:
+            raise ValueError('ToolManager has not been prepared for a run step yet')  # pragma: no cover
+
+        return self.ctx.max_tool_calls
+
+    def can_make_tool_calls(self, num_tool_calls: int, usage: RunUsage) -> bool:
+        """Check if the tool calls can be made within max_tool_calls limit if it is set."""
+        max_tool_calls = self._get_max_tool_calls()
+        if max_tool_calls is not None:
+            usage.tool_calls += num_tool_calls
+            if usage.tool_calls > max_tool_calls:
+                return False
+        return True
diff --git a/pydantic_ai_slim/pydantic_ai/agent/__init__.py b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
index 45edae7a1d..5a63a4695d 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/__init__.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/__init__.py
@@ -152,6 +152,7 @@ class Agent(AbstractAgent[AgentDepsT, OutputDataT]):
     _prepare_output_tools: ToolsPrepareFunc[AgentDepsT] | None = dataclasses.field(repr=False)
     _max_result_retries: int = dataclasses.field(repr=False)
     _max_tool_retries: int = dataclasses.field(repr=False)
+    _max_tool_calls: int | None = dataclasses.field(repr=False)
     _tool_timeout: float | None = dataclasses.field(repr=False)
     _validation_context: Any | Callable[[RunContext[AgentDepsT]], Any] = dataclasses.field(repr=False)
 
@@ -175,6 +176,7 @@ def __init__(
         retries: int = 1,
         validation_context: Any | Callable[[RunContext[AgentDepsT]], Any] = None,
         output_retries: int | None = None,
+        max_tool_calls: int | None = None,
         tools: Sequence[Tool[AgentDepsT] | ToolFuncEither[AgentDepsT, ...]] = (),
         builtin_tools: Sequence[AbstractBuiltinTool | BuiltinToolFunc[AgentDepsT]] = (),
         prepare_tools: ToolsPrepareFunc[AgentDepsT] | None = None,
@@ -203,6 +205,7 @@ def __init__(
         retries: int = 1,
         validation_context: Any | Callable[[RunContext[AgentDepsT]], Any] = None,
         output_retries: int | None = None,
+        max_tool_calls: int | None = None,
         tools: Sequence[Tool[AgentDepsT] | ToolFuncEither[AgentDepsT, ...]] = (),
         builtin_tools: Sequence[AbstractBuiltinTool | BuiltinToolFunc[AgentDepsT]] = (),
         prepare_tools: ToolsPrepareFunc[AgentDepsT] | None = None,
@@ -229,6 +232,7 @@ def __init__(
         retries: int = 1,
         validation_context: Any | Callable[[RunContext[AgentDepsT]], Any] = None,
         output_retries: int | None = None,
+        max_tool_calls: int | None = None,
         tools: Sequence[Tool[AgentDepsT] | ToolFuncEither[AgentDepsT, ...]] = (),
         builtin_tools: Sequence[AbstractBuiltinTool | BuiltinToolFunc[AgentDepsT]] = (),
         prepare_tools: ToolsPrepareFunc[AgentDepsT] | None = None,
@@ -264,6 +268,7 @@ def __init__(
                 For model request retries, see the [HTTP Request Retries](../retries.md) documentation.
             validation_context: Pydantic [validation context](https://docs.pydantic.dev/latest/concepts/validators/#validation-context) used to validate tool arguments and outputs.
             output_retries: The maximum number of retries to allow for output validation, defaults to `retries`.
+            max_tool_calls: Optional maximum number of tool calls allowed during runs. If `None`, no limit is applied.
             tools: Tools to register with the agent, you can also register tools via the decorators
                 [`@agent.tool`][pydantic_ai.agent.Agent.tool] and [`@agent.tool_plain`][pydantic_ai.agent.Agent.tool_plain].
             builtin_tools: The builtin tools that the agent will use. This depends on the model, as some models may not
@@ -330,6 +335,7 @@ def __init__(
 
         self._max_result_retries = output_retries if output_retries is not None else retries
         self._max_tool_retries = retries
+        self._max_tool_calls = max_tool_calls
         self._tool_timeout = tool_timeout
 
         self._validation_context = validation_context
@@ -440,6 +446,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -459,6 +466,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -478,6 +486,7 @@ async def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -554,6 +563,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -632,6 +642,7 @@ async def get_instructions(run_context: RunContext[AgentDepsT]) -> str | None:
             model_settings=model_settings,
             usage_limits=usage_limits,
             max_result_retries=self._max_result_retries,
+            max_tool_calls=self._max_tool_calls,
             end_strategy=self.end_strategy,
             output_schema=output_schema,
             output_validators=output_validators,
diff --git a/pydantic_ai_slim/pydantic_ai/agent/abstract.py b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
index cc99f80e74..e4982d43d4 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/abstract.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/abstract.py
@@ -161,6 +161,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -181,6 +182,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -200,6 +202,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -234,6 +237,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -258,6 +262,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             toolsets=toolsets,
             builtin_tools=builtin_tools,
@@ -285,6 +290,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -305,6 +311,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -324,6 +331,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -357,6 +365,7 @@ def run_sync(
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -380,6 +389,7 @@ def run_sync(
                 deps=deps,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
+                max_tool_calls=max_tool_calls,
                 usage=usage,
                 infer_name=False,
                 toolsets=toolsets,
@@ -401,6 +411,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -421,6 +432,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -441,6 +453,7 @@ async def run_stream(  # noqa: C901
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -482,6 +495,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -511,6 +525,7 @@ async def main():
             instructions=instructions,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=False,
             toolsets=toolsets,
@@ -633,6 +648,7 @@ def run_stream_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -652,6 +668,7 @@ def run_stream_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -670,6 +687,7 @@ def run_stream_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -713,6 +731,7 @@ def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -737,6 +756,7 @@ async def _consume_stream():
                 deps=deps,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
+                max_tool_calls=max_tool_calls,
                 usage=usage,
                 infer_name=infer_name,
                 toolsets=toolsets,
@@ -761,6 +781,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -780,6 +801,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -798,6 +820,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -848,6 +871,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -873,6 +897,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             toolsets=toolsets,
             builtin_tools=builtin_tools,
@@ -890,6 +915,7 @@ async def _run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
         builtin_tools: Sequence[AbstractBuiltinTool | BuiltinToolFunc[AgentDepsT]] | None = None,
@@ -916,6 +942,7 @@ async def run_agent() -> AgentRunResult[Any]:
                     deps=deps,
                     model_settings=model_settings,
                     usage_limits=usage_limits,
+                    max_tool_calls=max_tool_calls,
                     usage=usage,
                     infer_name=False,
                     toolsets=toolsets,
@@ -945,6 +972,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -964,6 +992,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -984,6 +1013,7 @@ async def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -1060,6 +1090,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
diff --git a/pydantic_ai_slim/pydantic_ai/agent/wrapper.py b/pydantic_ai_slim/pydantic_ai/agent/wrapper.py
index f363b5d990..b324f205af 100644
--- a/pydantic_ai_slim/pydantic_ai/agent/wrapper.py
+++ b/pydantic_ai_slim/pydantic_ai/agent/wrapper.py
@@ -85,6 +85,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -104,6 +105,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -123,6 +125,7 @@ async def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -199,6 +202,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -217,6 +221,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
diff --git a/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py b/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py
index c5adf5221d..132f0fc83f 100644
--- a/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py
+++ b/pydantic_ai_slim/pydantic_ai/durable_exec/dbos/_agent.py
@@ -136,6 +136,7 @@ async def wrapped_run_workflow(
             deps: AgentDepsT,
             model_settings: ModelSettings | None = None,
             usage_limits: _usage.UsageLimits | None = None,
+            max_tool_calls: int | None = None,
             usage: _usage.RunUsage | None = None,
             infer_name: bool = True,
             toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -154,6 +155,7 @@ async def wrapped_run_workflow(
                     deps=deps,
                     model_settings=model_settings,
                     usage_limits=usage_limits,
+                    max_tool_calls=max_tool_calls,
                     usage=usage,
                     infer_name=infer_name,
                     toolsets=toolsets,
@@ -177,6 +179,7 @@ def wrapped_run_sync_workflow(
             model_settings: ModelSettings | None = None,
             instructions: Instructions[AgentDepsT] = None,
             usage_limits: _usage.UsageLimits | None = None,
+            max_tool_calls: int | None = None,
             usage: _usage.RunUsage | None = None,
             infer_name: bool = True,
             toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -195,6 +198,7 @@ def wrapped_run_sync_workflow(
                     deps=deps,
                     model_settings=model_settings,
                     usage_limits=usage_limits,
+                    max_tool_calls=max_tool_calls,
                     usage=usage,
                     infer_name=infer_name,
                     toolsets=toolsets,
@@ -269,6 +273,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -289,6 +294,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -308,6 +314,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -343,6 +350,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -366,6 +374,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -387,6 +396,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -407,6 +417,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -426,6 +437,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -460,6 +472,7 @@ def run_sync(
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -483,6 +496,7 @@ def run_sync(
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -504,6 +518,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -524,6 +539,7 @@ def run_stream(
         instructions: Instructions[AgentDepsT] = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -544,6 +560,7 @@ async def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -576,6 +593,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -601,6 +619,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -623,6 +642,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -642,6 +662,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -660,6 +681,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -710,6 +732,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -737,6 +760,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -757,6 +781,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -777,6 +802,7 @@ async def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -854,6 +880,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -878,6 +905,7 @@ async def main():
                 deps=deps,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
+                max_tool_calls=max_tool_calls,
                 usage=usage,
                 infer_name=infer_name,
                 toolsets=toolsets,
diff --git a/pydantic_ai_slim/pydantic_ai/durable_exec/prefect/_agent.py b/pydantic_ai_slim/pydantic_ai/durable_exec/prefect/_agent.py
index 60c8122686..f448946983 100644
--- a/pydantic_ai_slim/pydantic_ai/durable_exec/prefect/_agent.py
+++ b/pydantic_ai_slim/pydantic_ai/durable_exec/prefect/_agent.py
@@ -185,6 +185,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -205,6 +206,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -224,6 +226,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -259,6 +262,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -285,6 +289,7 @@ async def wrapped_run_flow() -> AgentRunResult[Any]:
                         deps=deps,
                         model_settings=model_settings,
                         usage_limits=usage_limits,
+                        max_tool_calls=max_tool_calls,
                         usage=usage,
                         infer_name=infer_name,
                         toolsets=toolsets,
@@ -309,6 +314,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -329,6 +335,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -348,6 +355,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -382,6 +390,7 @@ def run_sync(
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -410,6 +419,7 @@ def wrapped_run_sync_flow() -> AgentRunResult[Any]:
                             deps=deps,
                             model_settings=model_settings,
                             usage_limits=usage_limits,
+                            max_tool_calls=max_tool_calls,
                             usage=usage,
                             infer_name=infer_name,
                             toolsets=toolsets,
@@ -435,6 +445,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -455,6 +466,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -475,6 +487,7 @@ async def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -507,6 +520,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -532,6 +546,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -554,6 +569,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -573,6 +589,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -591,6 +608,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -641,6 +659,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -666,6 +685,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -685,6 +705,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -704,6 +725,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -723,6 +745,7 @@ async def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -799,6 +822,7 @@ async def main():
             instructions: Optional additional instructions to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -823,6 +847,7 @@ async def main():
                 deps=deps,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
+                max_tool_calls=max_tool_calls,
                 usage=usage,
                 infer_name=infer_name,
                 toolsets=toolsets,
diff --git a/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py b/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py
index 42fc2a872e..7f9c89f3ea 100644
--- a/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py
+++ b/pydantic_ai_slim/pydantic_ai/durable_exec/temporal/_agent.py
@@ -268,6 +268,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -288,6 +289,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -307,6 +309,7 @@ async def run(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -342,6 +345,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -367,6 +371,7 @@ async def main():
                 deps=deps,
                 model_settings=model_settings,
                 usage_limits=usage_limits,
+                max_tool_calls=max_tool_calls,
                 usage=usage,
                 infer_name=infer_name,
                 toolsets=toolsets,
@@ -388,6 +393,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -408,6 +414,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -427,6 +434,7 @@ def run_sync(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -461,6 +469,7 @@ def run_sync(
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -485,6 +494,7 @@ def run_sync(
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -506,6 +516,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -526,6 +537,7 @@ def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -546,6 +558,7 @@ async def run_stream(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -578,6 +591,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -603,6 +617,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -625,6 +640,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -644,6 +660,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -662,6 +679,7 @@ def run_stream_events(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -712,6 +730,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -737,6 +756,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
@@ -756,6 +776,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         builtin_tools: Sequence[AbstractBuiltinTool | BuiltinToolFunc[AgentDepsT]] | None = None,
@@ -776,6 +797,7 @@ def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -796,6 +818,7 @@ async def iter(
         deps: AgentDepsT = None,
         model_settings: ModelSettings | None = None,
         usage_limits: _usage.UsageLimits | None = None,
+        max_tool_calls: int | None = None,
         usage: _usage.RunUsage | None = None,
         infer_name: bool = True,
         toolsets: Sequence[AbstractToolset[AgentDepsT]] | None = None,
@@ -873,6 +896,7 @@ async def main():
             deps: Optional dependencies to use for this run.
             model_settings: Optional settings to use for this model's request.
             usage_limits: Optional limits on model request count or token usage.
+            max_tool_calls: Optional maximum number of tool calls allowed during this run.
             usage: Optional usage to start with, useful for resuming a conversation or agents used in tools.
             infer_name: Whether to try to infer the agent name from the call frame if it's not set.
             toolsets: Optional additional toolsets for this run.
@@ -907,6 +931,7 @@ async def main():
             deps=deps,
             model_settings=model_settings,
             usage_limits=usage_limits,
+            max_tool_calls=max_tool_calls,
             usage=usage,
             infer_name=infer_name,
             toolsets=toolsets,
diff --git a/pydantic_ai_slim/pydantic_ai/usage.py b/pydantic_ai_slim/pydantic_ai/usage.py
index 0a07fab953..586af8dcfc 100644
--- a/pydantic_ai_slim/pydantic_ai/usage.py
+++ b/pydantic_ai_slim/pydantic_ai/usage.py
@@ -3,7 +3,7 @@
 import dataclasses
 from copy import copy
 from dataclasses import dataclass, fields
-from typing import Annotated, Any, Literal
+from typing import Annotated, Any
 
 from genai_prices.data_snapshot import get_snapshot
 from pydantic import AliasChoices, BeforeValidator, Field
@@ -259,11 +259,6 @@ class UsageLimits:
     """The maximum number of requests allowed to the model."""
     tool_calls_limit: int | None = None
     """The maximum number of successful tool calls allowed to be executed."""
-    tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'hard'
-    """ Whether to enforce the tool calls limit in a 'soft' or 'hard' manner.
-        Hard limits will raise UsageLimitExceeded before making a tool call that would exceed the limit.
-        Soft limits will return a ToolReturnPart indicating the limit would be exceeded so the tool call cannot be made. You can customize this response using PromptTemplates.
-    """
     input_tokens_limit: int | None = None
     """The maximum number of input/prompt tokens allowed."""
     output_tokens_limit: int | None = None
@@ -295,7 +290,6 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
-        tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'hard',
     ) -> None:
         self.request_limit = request_limit
         self.tool_calls_limit = tool_calls_limit
@@ -303,7 +297,6 @@ def __init__(
         self.output_tokens_limit = output_tokens_limit
         self.total_tokens_limit = total_tokens_limit
         self.count_tokens_before_request = count_tokens_before_request
-        self.tool_calls_limit_enforcement = tool_calls_limit_enforcement
 
     @overload
     @deprecated(
@@ -335,7 +328,6 @@ def __init__(
         output_tokens_limit: int | None = None,
         total_tokens_limit: int | None = None,
         count_tokens_before_request: bool = False,
-        tool_calls_limit_enforcement: Literal['soft', 'hard'] = 'hard',
         # deprecated:
         request_tokens_limit: int | None = None,
         response_tokens_limit: int | None = None,
@@ -346,7 +338,6 @@ def __init__(
         self.output_tokens_limit = output_tokens_limit or response_tokens_limit
         self.total_tokens_limit = total_tokens_limit
         self.count_tokens_before_request = count_tokens_before_request
-        self.tool_calls_limit_enforcement = tool_calls_limit_enforcement
 
     def has_token_limits(self) -> bool:
         """Returns `True` if this instance places any limits on token counts.
@@ -394,17 +385,13 @@ def check_tokens(self, usage: RunUsage) -> None:
         if self.total_tokens_limit is not None and total_tokens > self.total_tokens_limit:
             raise UsageLimitExceeded(f'Exceeded the total_tokens_limit of {self.total_tokens_limit} ({total_tokens=})')
 
-    def check_before_tool_call(self, projected_usage: RunUsage) -> None | bool:
+    def check_before_tool_call(self, projected_usage: RunUsage) -> None:
         """Raises a `UsageLimitExceeded` exception if the next tool call(s) would exceed the tool call limit."""
         tool_calls_limit = self.tool_calls_limit
         tool_calls = projected_usage.tool_calls
         if tool_calls_limit is not None and tool_calls > tool_calls_limit:
-            if self.tool_calls_limit_enforcement == 'soft':
-                return False
             raise UsageLimitExceeded(
                 f'The next tool call(s) would exceed the tool_calls_limit of {tool_calls_limit} ({tool_calls=}).'
             )
 
-        return True
-
     __repr__ = _utils.dataclasses_no_defaults_repr
diff --git a/tests/test_tools.py b/tests/test_tools.py
index 695cfbce94..604736d83b 100644
--- a/tests/test_tools.py
+++ b/tests/test_tools.py
@@ -1431,6 +1431,54 @@ def tool_with_max_use(ctx: RunContext[None]) -> str:
     )
 
 
+def test_max_tool_calls():
+    agent = Agent(TestModel(), max_tool_calls=0)
+
+    @agent.tool_plain
+    def my_tool(x: int) -> int:
+        return x + 1
+
+    result = agent.run_sync('Hello')
+    assert result.all_messages() == snapshot(
+        [
+            ModelRequest(
+                parts=[
+                    UserPromptPart(
+                        content='Hello',
+                        timestamp=IsDatetime(),
+                    )
+                ],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[ToolCallPart(tool_name='my_tool', args={'x': 0}, tool_call_id=IsStr())],
+                usage=RequestUsage(input_tokens=51, output_tokens=4),
+                model_name='test',
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+            ModelRequest(
+                parts=[
+                    ToolReturnPart(
+                        tool_name='my_tool',
+                        content='Tool call limit reached for tool "my_tool".',
+                        tool_call_id=IsStr(),
+                        timestamp=IsDatetime(),
+                    )
+                ],
+                run_id=IsStr(),
+            ),
+            ModelResponse(
+                parts=[TextPart(content='{"my_tool":"Tool call limit reached for tool \\"my_tool\\"."}')],
+                usage=RequestUsage(input_tokens=59, output_tokens=15),
+                model_name='test',
+                timestamp=IsDatetime(),
+                run_id=IsStr(),
+            ),
+        ]
+    )
+
+
 def test_tool_raises_call_deferred():
     agent = Agent(TestModel(), output_type=[str, DeferredToolRequests])
 
diff --git a/tests/test_usage_limits.py b/tests/test_usage_limits.py
index 96ff306a19..ac17fd0be5 100644
--- a/tests/test_usage_limits.py
+++ b/tests/test_usage_limits.py
@@ -17,7 +17,6 @@
     ModelRequest,
     ModelResponse,
     RunContext,
-    TextPart,
     ToolCallPart,
     ToolReturnPart,
     UsageLimitExceeded,
@@ -29,7 +28,7 @@
 from pydantic_ai.output import ToolOutput
 from pydantic_ai.usage import RequestUsage, RunUsage, UsageLimits
 
-from .conftest import IsDatetime, IsNow, IsStr
+from .conftest import IsNow, IsStr
 
 pytestmark = pytest.mark.anyio
 
@@ -271,52 +270,6 @@ async def ret_a(x: str) -> str:
     assert result.usage() == snapshot(RunUsage(requests=2, input_tokens=103, output_tokens=14, tool_calls=1))
 
 
-async def test_tool_call_soft_limit() -> None:
-    test_agent = Agent(TestModel())
-
-    @test_agent.tool_plain
-    async def ret_a(x: str) -> str:
-        return f'{x}-apple'
-
-    result = await test_agent.run(
-        'Hello', usage_limits=UsageLimits(tool_calls_limit=0, tool_calls_limit_enforcement='soft')
-    )
-
-    assert result.all_messages() == snapshot(
-        [
-            ModelRequest(
-                parts=[UserPromptPart(content='Hello', timestamp=IsDatetime())],
-                run_id=IsStr(),
-            ),
-            ModelResponse(
-                parts=[ToolCallPart(tool_name='ret_a', args={'x': 'a'}, tool_call_id=IsStr())],
-                usage=RequestUsage(input_tokens=51, output_tokens=5),
-                model_name='test',
-                timestamp=IsDatetime(),
-                run_id=IsStr(),
-            ),
-            ModelRequest(
-                parts=[
-                    ToolReturnPart(
-                        tool_name='ret_a',
-                        content='Tool call limit reached for tool "ret_a".',
-                        tool_call_id=IsStr(),
-                        timestamp=IsDatetime(),
-                    )
-                ],
-                run_id=IsStr(),
-            ),
-            ModelResponse(
-                parts=[TextPart(content='{"ret_a":"Tool call limit reached for tool \\"ret_a\\"."}')],
-                usage=RequestUsage(input_tokens=59, output_tokens=16),
-                model_name='test',
-                timestamp=IsDatetime(),
-                run_id=IsStr(),
-            ),
-        ]
-    )
-
-
 async def test_output_tool_not_counted() -> None:
     """Test that output tools are not counted in tool_calls usage metric."""
     test_agent = Agent(TestModel())

From 39143229abd4d7e58191abdccc768f96502980b5 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 22:47:57 +0530
Subject: [PATCH 19/20] docs update for max_tool_calls

---
 docs/agents.md         | 45 ++++++++++++++++++++++++++++++++++++++++++
 docs/tools-advanced.md |  2 ++
 2 files changed, 47 insertions(+)

diff --git a/docs/agents.md b/docs/agents.md
index 28a069a0be..e13f46dca3 100644
--- a/docs/agents.md
+++ b/docs/agents.md
@@ -669,6 +669,51 @@ except UsageLimitExceeded as e:
     - Usage limits are especially relevant if you've registered many tools. Use `request_limit` to bound the number of model turns, and `tool_calls_limit` to cap the number of successful tool executions within a run.
     - The `tool_calls_limit` is checked before executing tool calls. If the model returns parallel tool calls that would exceed the limit, no tools will be executed.
 
+##### Soft Tool Call Limits with `max_tool_calls`
+
+If you want to limit tool calls but let the model decide how to proceed instead of raising an error, use the `max_tool_calls` parameter. This is a "soft" limit that returns a message to the model when exceeded, rather than raising a [`UsageLimitExceeded`][pydantic_ai.exceptions.UsageLimitExceeded] exception.
+
+```py
+from pydantic_ai import Agent
+
+agent = Agent('anthropic:claude-sonnet-4-5', max_tool_calls=2)  # (1)!
+
+@agent.tool_plain
+def do_work() -> str:
+    return 'ok'
+
+# The model can make up to 2 tool calls
+result = agent.run_sync('Please call the tool three times')
+print(result.output)
+#> I was able to call the tool twice, but the third call reached the limit.
+```
+
+1. Set the maximum number of tool calls allowed during runs. This can also be set per-run.
+
+When `max_tool_calls` is exceeded, instead of executing the tool, the agent returns a message to the model: `'Tool call limit reached for tool "{tool_name}".'`. The model then decides how to respond based on this information.
+
+You can also override `max_tool_calls` at run time:
+
+```py
+from pydantic_ai import Agent
+
+agent = Agent('anthropic:claude-sonnet-4-5', max_tool_calls=5)  # Default limit
+
+@agent.tool_plain
+def calculate(x: int) -> int:
+    return x * 2
+
+# Override the limit for this specific run
+result = agent.run_sync('Calculate something', max_tool_calls=1)
+```
+
+**When to use `max_tool_calls` vs `tool_calls_limit`:**
+
+| Parameter | Behavior | Use Case |
+| --------- | -------- | -------- |
+| `tool_calls_limit` | Raises [`UsageLimitExceeded`][pydantic_ai.exceptions.UsageLimitExceeded] | Hard stop when you need to prevent runaway costs |
+| `max_tool_calls` | Returns message to model | Soft limit where you want the model to adapt gracefully |
+
 #### Model (Run) Settings
 
 Pydantic AI offers a [`settings.ModelSettings`][pydantic_ai.settings.ModelSettings] structure to help you fine tune your requests.
diff --git a/docs/tools-advanced.md b/docs/tools-advanced.md
index bd7bfcd0d1..fc68fccb6e 100644
--- a/docs/tools-advanced.md
+++ b/docs/tools-advanced.md
@@ -413,6 +413,8 @@ Async functions are run on the event loop, while sync functions are offloaded to
 !!! note "Limiting tool executions"
     You can cap the total number of tool executions within a run using [`UsageLimits(tool_calls_limit=...)`](agents.md#usage-limits). For finer control, you can limit how many times a *specific* tool can be called by setting the `max_uses` parameter when registering the tool (e.g., `@agent.tool(max_uses=3)` or `Tool(func, max_uses=3)`). Once a tool reaches its `max_uses` limit, it is automatically removed from the available tools for subsequent steps in the run. The `tool_calls` counter increments only after a successful tool invocation. Output tools (used for [structured output](output.md)) are not counted in the `tool_calls` metric.
 
+    For a "soft" limit that lets the model decide how to proceed instead of raising an error, use the [`max_tool_calls`](agents.md#soft-tool-call-limits-with-max_tool_calls) parameter on the agent or run method.
+
 #### Raising Hard Errors on Tool Usage Limits
 
 By default, when a tool reaches its `max_uses` limit, it is silently removed from the available tools. If you want to raise an error instead, you can use a [`prepare`](#tool-prepare) function to check the tool usage and raise a [`UsageLimitExceeded`][pydantic_ai.exceptions.UsageLimitExceeded] exception:

From 0f6356154e2e09278c45d478fa9c70297fb047e2 Mon Sep 17 00:00:00 2001
From: adtyavrdhn <adtyavrdhn@gmail.com>
Date: Tue, 16 Dec 2025 23:38:09 +0530
Subject: [PATCH 20/20] test skip doc

---
 docs/agents.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/agents.md b/docs/agents.md
index e13f46dca3..960280fb46 100644
--- a/docs/agents.md
+++ b/docs/agents.md
@@ -673,7 +673,7 @@ except UsageLimitExceeded as e:
 
 If you want to limit tool calls but let the model decide how to proceed instead of raising an error, use the `max_tool_calls` parameter. This is a "soft" limit that returns a message to the model when exceeded, rather than raising a [`UsageLimitExceeded`][pydantic_ai.exceptions.UsageLimitExceeded] exception.
 
-```py
+```py {test="skip"}
 from pydantic_ai import Agent
 
 agent = Agent('anthropic:claude-sonnet-4-5', max_tool_calls=2)  # (1)!
@@ -694,7 +694,7 @@ When `max_tool_calls` is exceeded, instead of executing the tool, the agent retu
 
 You can also override `max_tool_calls` at run time:
 
-```py
+```py {test="skip"}
 from pydantic_ai import Agent
 
 agent = Agent('anthropic:claude-sonnet-4-5', max_tool_calls=5)  # Default limit