AN
anthropic-api
Expert guidance for building applications with Anthropic''s Claude API.
Install
mkdir -p .claude/skills/anthropic-api && curl -L -o skill.zip "https://agentskills.codes/api/skills/download/14358" && unzip -o skill.zip -d .claude/skills/anthropic-api && rm skill.zipInstalls to .claude/skills/anthropic-api
Activation
This is the description your AI agent reads to decide when to run this skill — the better it matches your request, the more reliably it fires.
Expert guidance for building applications with Anthropic''s Claude API.71 charsno explicit “when” trigger
About this skill
Anthropic Claude API Expert Guide
Build production-grade applications with Anthropic's Claude API using best practices, cost optimization strategies, and proven patterns.
Model Selection Guide
| Model | Model ID | Best For | Input/Output Cost |
|---|---|---|---|
| Claude Opus 4.5 | claude-opus-4-5-20250514 | Most capable, complex reasoning | $5 / $25 per MTok |
| Claude Sonnet 4.5 | claude-sonnet-4-5-20250514 | Balanced performance/cost | $3 / $15 per MTok |
| Claude Haiku 4.5 | claude-haiku-4-5-20250514 | Fast, high-volume, cost-efficient | $1 / $5 per MTok |
Decision Framework:
- Use Haiku for: Classification, extraction, simple Q&A, high-volume workloads
- Use Sonnet for: Code generation, analysis, most production use cases (90% of Opus quality at 20% cost)
- Use Opus for: Complex reasoning, research, when quality is paramount
Setup
Python SDK
pip install anthropic
import os
from anthropic import Anthropic
client = Anthropic(
api_key=os.environ["ANTHROPIC_API_KEY"] # Default, can be omitted
)
message = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
messages=[
{"role": "user", "content": "Hello, Claude"}
]
)
print(message.content[0].text)
TypeScript SDK
npm install @anthropic-ai/sdk
import Anthropic from "@anthropic-ai/sdk";
const client = new Anthropic({
apiKey: process.env.ANTHROPIC_API_KEY,
});
const message = await client.messages.create({
model: "claude-sonnet-4-5-20250514",
max_tokens: 1024,
messages: [{ role: "user", content: "Hello, Claude" }],
});
console.log(message.content[0].text);
Cost Optimization Strategies
1. Prompt Caching (Up to 90% Savings)
Cache stable content like system prompts, documents, or tool definitions. Cache hits cost only 10% of base input price.
from anthropic import Anthropic
client = Anthropic()
# First request - writes to cache (1.25x input cost)
response = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
system=[
{
"type": "text",
"text": "You are an expert legal assistant...", # Large system prompt
"cache_control": {"type": "ephemeral"} # 5-minute cache
}
],
messages=[{"role": "user", "content": "Analyze this contract..."}]
)
# Subsequent requests within 5 minutes - cache hit (0.1x input cost)
response2 = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
system=[
{
"type": "text",
"text": "You are an expert legal assistant...", # Same content
"cache_control": {"type": "ephemeral"}
}
],
messages=[{"role": "user", "content": "Different question..."}]
)
# Check cache usage
print(f"Cache read: {response2.usage.cache_read_input_tokens}")
print(f"Cache write: {response2.usage.cache_creation_input_tokens}")
Cache Duration Options:
ephemeral- 5-minute cache (1.25x write, 0.1x read)- For 1-hour cache, use extended caching (2x write, 0.1x read)
Minimum Token Requirements:
- Claude 3.5+ models: 1,024 tokens minimum per cache checkpoint
- Content below minimum won't be cached
2. Batch API (50% Discount)
Process large volumes asynchronously with guaranteed 24-hour completion.
import asyncio
from anthropic import AsyncAnthropic
client = AsyncAnthropic()
async def process_batch():
# Create batch
batch = await client.messages.batches.create(
requests=[
{
"custom_id": "request-1",
"params": {
"model": "claude-sonnet-4-5-20250514",
"max_tokens": 1024,
"messages": [{"role": "user", "content": "Summarize document 1"}]
}
},
{
"custom_id": "request-2",
"params": {
"model": "claude-sonnet-4-5-20250514",
"max_tokens": 1024,
"messages": [{"role": "user", "content": "Summarize document 2"}]
}
}
]
)
print(f"Batch ID: {batch.id}")
# Poll for results (or use webhooks)
while True:
batch = await client.messages.batches.retrieve(batch.id)
if batch.processing_status == "ended":
break
await asyncio.sleep(60)
# Get results
async for entry in await client.messages.batches.results(batch.id):
if entry.result.type == "succeeded":
print(f"{entry.custom_id}: {entry.result.message.content[0].text}")
When to Use Batch:
- Background processing (reports, analysis)
- Bulk content generation
- Data enrichment pipelines
- Any non-real-time workload
3. Token Optimization
# Count tokens before sending (estimate costs)
token_count = client.messages.count_tokens(
model="claude-sonnet-4-5-20250514",
messages=[{"role": "user", "content": "Your message here"}]
)
print(f"Input tokens: {token_count.input_tokens}")
# Set appropriate max_tokens (don't over-reserve)
response = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=500, # Set to expected output, not maximum
messages=[{"role": "user", "content": "Brief summary of..."}]
)
# Check actual usage
print(f"Used: {response.usage.output_tokens} tokens")
Streaming
Basic Streaming
from anthropic import Anthropic
client = Anthropic()
# Simple streaming
with client.messages.stream(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Tell me a story"}]
) as stream:
for text in stream.text_stream:
print(text, end="", flush=True)
print()
# Get final message after stream completes
message = stream.get_final_message()
print(f"Total tokens: {message.usage.output_tokens}")
Async Streaming
import asyncio
from anthropic import AsyncAnthropic
client = AsyncAnthropic()
async def stream_response():
async with client.messages.stream(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Explain quantum computing"}]
) as stream:
async for text in stream.text_stream:
print(text, end="", flush=True)
print()
asyncio.run(stream_response())
Handling All Event Types
async with client.messages.stream(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Use a tool"}]
) as stream:
async for event in stream:
if event.type == "text":
print(event.text, end="")
elif event.type == "input_json":
# Tool input being streamed
print(f"Tool input delta: {event.partial_json}")
elif event.type == "content_block_stop":
print(f"\nBlock complete: {event.content_block}")
elif event.type == "message_stop":
print(f"\nFinal message: {event.message}")
Tool Use
Defining and Using Tools
from anthropic import Anthropic
client = Anthropic()
# Define tools
tools = [
{
"name": "get_weather",
"description": "Get current weather for a location",
"input_schema": {
"type": "object",
"properties": {
"location": {
"type": "string",
"description": "City and state, e.g. 'San Francisco, CA'"
},
"unit": {
"type": "string",
"enum": ["celsius", "fahrenheit"],
"description": "Temperature unit"
}
},
"required": ["location"]
}
}
]
# Initial request
response = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
tools=tools,
messages=[{"role": "user", "content": "What's the weather in Paris?"}]
)
# Check if tool use requested
if response.stop_reason == "tool_use":
tool_use = next(block for block in response.content if block.type == "tool_use")
# Execute tool (your implementation)
tool_result = execute_weather_lookup(tool_use.input)
# Continue conversation with tool result
final_response = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
tools=tools,
messages=[
{"role": "user", "content": "What's the weather in Paris?"},
{"role": "assistant", "content": response.content},
{
"role": "user",
"content": [
{
"type": "tool_result",
"tool_use_id": tool_use.id,
"content": tool_result
}
]
}
]
)
Tool Choice Options
# Let Claude decide (default)
tool_choice={"type": "auto"}
# Force tool use
tool_choice={"type": "any"}
# Force specific tool
tool_choice={"type": "tool", "name": "get_weather"}
# Disable tools for this request
tool_choice={"type": "none"}
Error Handling
import anthropic
from anthropic import Anthropic
client = Anthropic()
try:
response = client.messages.create(
model="claude-sonnet-4-5-20250514",
max_tokens=1024,
messages=[{"role": "user", "content": "Hello"}]
)
except anthropic.APIConnectionError as e:
# Network issues
print(f"Connection failed: {e.__cause__}")
except anthropic.RateLimitError as e:
# 429 - implement exponential backoff
print(f"Rate limited. Retry after backoff.")
except anthropic.BadRequestError as e:
# 400 - check your request
print(f"Bad request: {e.message}")
except anthropic.AuthenticationError as e:
# 401 - check
---
*Content truncated.*