Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 19 additions & 4 deletions src/google/adk/models/lite_llm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1151,14 +1151,20 @@ def _is_ollama_chat_provider(
return False


_MEDIA_BLOCK_TYPES = {"image_url", "video_url", "audio_url"}


def _flatten_ollama_content(
content: OpenAIMessageContent | str | None,
) -> str | None:
) -> OpenAIMessageContent | str | None:
"""Flattens multipart content to text for ollama_chat compatibility.

Ollama's chat endpoint rejects arrays for `content`. We keep textual parts,
join them with newlines, and fall back to a JSON string for non-text content.
If both text and non-text parts are present, only the text parts are kept.
Ollama's chat endpoint rejects arrays for `content` when only text is
present. However, LiteLLM's Ollama handler can convert multipart arrays
that contain media blocks (image_url, video_url, audio_url) into Ollama's
native format (e.g. the ``images`` field). So we only flatten to a plain
string when the content is text-only; mixed content with media blocks is
returned as-is so LiteLLM can handle the conversion.
"""
if content is None or isinstance(content, str):
return content
Expand All @@ -1176,6 +1182,15 @@ def _flatten_ollama_content(
except TypeError:
return str(content)

# If any block carries media data, keep the full multipart list so
# LiteLLM can convert it to Ollama's native format.
has_media = any(
isinstance(b, dict) and b.get("type") in _MEDIA_BLOCK_TYPES
for b in blocks
)
if has_media:
return blocks

text_parts = []
for block in blocks:
if isinstance(block, dict) and block.get("type") == "text":
Expand Down
95 changes: 74 additions & 21 deletions tests/unittests/models/test_litellm.py
Original file line number Diff line number Diff line change
Expand Up @@ -1689,7 +1689,7 @@ async def test_generate_content_async_with_usage_metadata(


@pytest.mark.asyncio
async def test_generate_content_async_ollama_chat_flattens_content(
async def test_generate_content_async_ollama_chat_preserves_multimodal_content(
mock_acompletion, mock_completion
):
llm_client = MockLLMClient(mock_acompletion, mock_completion)
Expand Down Expand Up @@ -1721,12 +1721,26 @@ async def test_generate_content_async_ollama_chat_flattens_content(
)
_, kwargs = mock_acompletion.call_args
message_content = kwargs["messages"][0]["content"]
assert isinstance(message_content, str)
assert "Describe this image." in message_content
# Multimodal content (text + image) should be kept as a list so LiteLLM
# can convert it to Ollama's native images field.
assert isinstance(message_content, list)
text_blocks = [
b
for b in message_content
if isinstance(b, dict) and b.get("type") == "text"
]
image_blocks = [
b
for b in message_content
if isinstance(b, dict) and b.get("type") == "image_url"
]
assert len(text_blocks) >= 1
assert "Describe this image." in text_blocks[0].get("text", "")
assert len(image_blocks) >= 1


@pytest.mark.asyncio
async def test_generate_content_async_custom_provider_flattens_content(
async def test_generate_content_async_custom_provider_preserves_multimodal(
mock_acompletion, mock_completion
):
llm_client = MockLLMClient(mock_acompletion, mock_completion)
Expand Down Expand Up @@ -1757,8 +1771,14 @@ async def test_generate_content_async_custom_provider_flattens_content(
assert kwargs["custom_llm_provider"] == "ollama_chat"
assert kwargs["model"] == "qwen2.5:7b"
message_content = kwargs["messages"][0]["content"]
assert isinstance(message_content, str)
assert "Describe this image." in message_content
# Multimodal content should be preserved as a list.
assert isinstance(message_content, list)
text_blocks = [
b
for b in message_content
if isinstance(b, dict) and b.get("type") == "text"
]
assert any("Describe this image." in b.get("text", "") for b in text_blocks)


def test_flatten_ollama_content_accepts_tuple_blocks():
Expand All @@ -1784,16 +1804,6 @@ def test_flatten_ollama_content_accepts_tuple_blocks():
],
"first\nsecond",
),
(
[
{"type": "text", "text": "Describe this image."},
{
"type": "image_url",
"image_url": {"url": "http://example.com"},
},
],
"Describe this image.",
),
],
)
def test_flatten_ollama_content_returns_str_or_none(content, expected):
Expand All @@ -1804,15 +1814,58 @@ def test_flatten_ollama_content_returns_str_or_none(content, expected):
assert flattened is None or isinstance(flattened, str)


def test_flatten_ollama_content_serializes_non_text_blocks_to_json():
def test_flatten_ollama_content_preserves_image_url_blocks():
"""Media blocks should be kept as a list so LiteLLM can convert them."""
from google.adk.models.lite_llm import _flatten_ollama_content

blocks = [
{"type": "image_url", "image_url": {"url": "http://example.com"}},
{"type": "image_url", "image_url": {"url": "http://example.com/img.png"}},
]
flattened = _flatten_ollama_content(blocks)
assert isinstance(flattened, str)
assert json.loads(flattened) == blocks
result = _flatten_ollama_content(blocks)
assert isinstance(result, list)
assert result == blocks


def test_flatten_ollama_content_preserves_mixed_text_and_image():
"""Text + image_url should return the full list, not just the text."""
from google.adk.models.lite_llm import _flatten_ollama_content

blocks = [
{"type": "text", "text": "Describe this image."},
{
"type": "image_url",
"image_url": {"url": "data:image/png;base64,iVBORw0KGgo="},
},
]
result = _flatten_ollama_content(blocks)
assert isinstance(result, list)
assert len(result) == 2
assert result[0]["type"] == "text"
assert result[1]["type"] == "image_url"


def test_flatten_ollama_content_preserves_video_url_blocks():
from google.adk.models.lite_llm import _flatten_ollama_content

blocks = [
{"type": "text", "text": "What happens in this clip?"},
{"type": "video_url", "video_url": {"url": "http://example.com/v.mp4"}},
]
result = _flatten_ollama_content(blocks)
assert isinstance(result, list)
assert len(result) == 2


def test_flatten_ollama_content_serializes_non_media_non_text_blocks_to_json():
"""Blocks with unknown types and no media should still serialize to JSON."""
from google.adk.models.lite_llm import _flatten_ollama_content

blocks = [
{"type": "custom_block", "data": "something"},
]
result = _flatten_ollama_content(blocks)
assert isinstance(result, str)
assert json.loads(result) == blocks


def test_flatten_ollama_content_serializes_dict_to_json():
Expand Down
Loading