diff --git a/CHANGELOG.md b/CHANGELOG.md
index 040184a..82a87a1 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
+## [Unreleased]
+
+### Added
+- Batch API: `client.batch` namespace for deferred-execution batch operations that pack multiple Dataverse Web API calls into a single `POST $batch` HTTP request (#129)
+- Batch DataFrame integration: `client.batch.dataframe` namespace with pandas DataFrame wrappers for batch operations (#129)
+- `client.records.upsert()` and `client.batch.records.upsert()` backed by the `UpsertMultiple` bound action with alternate-key support (#129)
+- QueryBuilder: `client.query.builder("table")` with a fluent API, 20+ chainable methods (`select`, `filter_eq`, `filter_contains`, `order_by`, `expand`, etc.), and composable filter expressions using Python operators (`&`, `|`, `~`) (#118)
+- Memo/multiline column type support: `"memo"` (or `"multiline"`) can now be passed as a column type in `client.tables.create()` and `client.tables.add_columns()` (#155)
+
+### Changed
+- Picklist label-to-integer resolution now uses a single bulk `PicklistAttributeMetadata` API call for the entire table instead of per-attribute requests, with a 1-hour TTL cache (#154)
+
+### Fixed
+- `client.query.sql()` silently truncated results at 5,000 rows. The method now follows `@odata.nextLink` pagination and returns all matching rows (#157).
+- Alternate key fields were incorrectly merged into the `UpsertMultiple` request body, causing `400 Bad Request` on the create path (#129)
+- Docstring type annotations corrected for Microsoft Learn API reference compatibility (#153)
+
## [0.1.0b7] - 2026-03-17
### Added
@@ -91,6 +108,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
- Comprehensive error handling with specific exception types (`DataverseError`, `AuthenticationError`, etc.) (#22, #24)
- HTTP retry logic with exponential backoff for resilient operations (#72)
+[Unreleased]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b7...HEAD
[0.1.0b7]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b6...v0.1.0b7
[0.1.0b6]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b5...v0.1.0b6
[0.1.0b5]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b4...v0.1.0b5
diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py
index a0bf270..3ec490d 100644
--- a/src/PowerPlatform/Dataverse/data/_odata.py
+++ b/src/PowerPlatform/Dataverse/data/_odata.py
@@ -13,12 +13,13 @@
import re
import json
import uuid
+import warnings
from datetime import datetime, timezone
import importlib.resources as ir
from contextlib import contextmanager
from contextvars import ContextVar
-from urllib.parse import quote as _url_quote
+from urllib.parse import quote as _url_quote, parse_qs, urlparse
from ..core._http import _HttpClient
from ._upload import _FileUploadMixin
@@ -54,6 +55,34 @@
_DEFAULT_EXPECTED_STATUSES: tuple[int, ...] = (200, 201, 202, 204)
+def _extract_pagingcookie(next_link: str) -> Optional[str]:
+ """Extract the raw pagingcookie value from a SQL ``@odata.nextLink`` URL.
+
+ The Dataverse SQL endpoint has a server-side bug where the pagingcookie
+ (containing first/last record GUIDs) does not advance between pages even
+ though ``pagenumber`` increments. Detecting a repeated cookie lets the
+ pagination loop break instead of looping indefinitely.
+
+ Returns the pagingcookie string if present, or ``None`` if not found.
+ """
+ try:
+ qs = parse_qs(urlparse(next_link).query)
+ skiptoken = qs.get("$skiptoken", [None])[0]
+ if not skiptoken:
+ return None
+ # parse_qs already URL-decodes the value once, giving the outer XML with
+ # pagingcookie still percent-encoded (e.g. pagingcookie="%3ccookie...").
+ # A second decode is intentionally omitted: decoding again would turn %22
+ # into " inside the cookie XML, breaking the regex and causing every page
+ # to extract the same truncated prefix regardless of the actual GUIDs.
+ m = re.search(r'pagingcookie="([^"]+)"', skiptoken)
+ if m:
+ return m.group(1)
+ except Exception:
+ pass
+ return None
+
+
@dataclass
class _RequestContext:
"""Structured request context used by ``_request`` to clarify payload and metadata."""
@@ -776,15 +805,86 @@ def _query_sql(self, sql: str) -> list[dict[str, Any]]:
body = r.json()
except ValueError:
return []
- if isinstance(body, dict):
- value = body.get("value")
- if isinstance(value, list):
- # Ensure dict rows only
- return [row for row in value if isinstance(row, dict)]
- # Fallbacks: if body itself is a list
+
+ # Collect first page
+ results: list[dict[str, Any]] = []
if isinstance(body, list):
return [row for row in body if isinstance(row, dict)]
- return []
+ if not isinstance(body, dict):
+ return results
+
+ value = body.get("value")
+ if isinstance(value, list):
+ results = [row for row in value if isinstance(row, dict)]
+
+ # Follow pagination links until exhausted
+ raw_link = body.get("@odata.nextLink") or body.get("odata.nextLink")
+ next_link: str | None = raw_link if isinstance(raw_link, str) else None
+ visited: set[str] = set()
+ seen_cookies: set[str] = set()
+ while next_link:
+ # Guard 1: exact URL cycle (same next_link returned twice)
+ if next_link in visited:
+ warnings.warn(
+ f"SQL pagination stopped after {len(results)} rows — "
+ "the Dataverse server returned the same nextLink URL twice, "
+ "indicating an infinite pagination cycle. "
+ "Returning the rows collected so far. "
+ "To avoid pagination entirely, add a TOP clause to your query.",
+ RuntimeWarning,
+ stacklevel=4,
+ )
+ break
+ visited.add(next_link)
+ # Guard 2: server-side bug where pagingcookie does not advance between
+ # pages (pagenumber increments but cookie GUIDs stay the same), which
+ # causes an infinite loop even though URLs differ.
+ cookie = _extract_pagingcookie(next_link)
+ if cookie is not None:
+ if cookie in seen_cookies:
+ warnings.warn(
+ f"SQL pagination stopped after {len(results)} rows — "
+ "the Dataverse server returned the same pagingcookie twice "
+ "(pagenumber incremented but the paging position did not advance). "
+ "This is a server-side bug. Returning the rows collected so far. "
+ "To avoid pagination entirely, add a TOP clause to your query.",
+ RuntimeWarning,
+ stacklevel=4,
+ )
+ break
+ seen_cookies.add(cookie)
+ try:
+ page_resp = self._request("get", next_link)
+ except Exception as exc:
+ warnings.warn(
+ f"SQL pagination stopped after {len(results)} rows — "
+ f"the next-page request failed: {exc}. "
+ "Add a TOP clause to your query to limit results to a single page.",
+ RuntimeWarning,
+ stacklevel=5,
+ )
+ break
+ try:
+ page_body = page_resp.json()
+ except ValueError as exc:
+ warnings.warn(
+ f"SQL pagination stopped after {len(results)} rows — "
+ f"the next-page response was not valid JSON: {exc}. "
+ "Add a TOP clause to your query to limit results to a single page.",
+ RuntimeWarning,
+ stacklevel=5,
+ )
+ break
+ if not isinstance(page_body, dict):
+ break
+ page_value = page_body.get("value")
+ if not isinstance(page_value, list) or not page_value:
+ break
+ results.extend(row for row in page_value if isinstance(row, dict))
+ raw_link = page_body.get("@odata.nextLink") or page_body.get("odata.nextLink")
+ next_link = raw_link if isinstance(raw_link, str) else None
+
+ return results
@staticmethod
def _extract_logical_table(sql: str) -> str:
diff --git a/tests/unit/data/test_sql_parse.py b/tests/unit/data/test_sql_parse.py
index 12c25a9..29eb183 100644
--- a/tests/unit/data/test_sql_parse.py
+++ b/tests/unit/data/test_sql_parse.py
@@ -1,11 +1,11 @@
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT license.
-from unittest.mock import patch
+from unittest.mock import MagicMock, patch
from urllib.parse import parse_qs, urlparse
import pytest
-from PowerPlatform.Dataverse.data._odata import _ODataClient
+from PowerPlatform.Dataverse.data._odata import _ODataClient, _extract_pagingcookie
class DummyAuth:
@@ -109,3 +109,338 @@ def test_build_sql_equals_in_value_is_percent_encoded():
def test_build_sql_decoded_param_matches_input():
sql = "SELECT accountid, name FROM account WHERE statecode = 0"
assert _sql_param(_build(sql)) == sql
+
+
+# ---------------------------------------------------------------------------
+# _query_sql pagination
+# ---------------------------------------------------------------------------
+
+
+def _make_response(rows, next_link=None):
+ """Build a mock HTTP response whose .json() returns an OData page."""
+ body = {"value": rows}
+ if next_link:
+ body["@odata.nextLink"] = next_link
+ resp = MagicMock()
+ resp.json.return_value = body
+ return resp
+
+
+def _query_sql_client():
+ """Return a bare _ODataClient suitable for _query_sql patching."""
+ client = object.__new__(_ODataClient)
+ client.api = "https://org.crm.dynamics.com/api/data/v9.2"
+ return client
+
+
+def test_query_sql_single_page_returns_all_rows():
+ client = _query_sql_client()
+ page = _make_response([{"id": 1}, {"id": 2}])
+ with (
+ patch.object(client, "_execute_raw", return_value=page),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ ):
+ result = client._query_sql("SELECT id FROM account")
+ assert result == [{"id": 1}, {"id": 2}]
+
+
+def test_query_sql_follows_next_link():
+ client = _query_sql_client()
+ page1 = _make_response([{"id": i} for i in range(5000)], next_link="https://org.example/page2")
+ page2 = _make_response([{"id": i} for i in range(5000, 6000)])
+
+ mock_request_resp = MagicMock()
+ mock_request_resp.json.return_value = page2.json.return_value
+
+ with (
+ patch.object(client, "_execute_raw", return_value=page1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=mock_request_resp) as mock_req,
+ ):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert len(result) == 6000
+ mock_req.assert_called_once_with("get", "https://org.example/page2")
+
+
+def test_query_sql_follows_odata_next_link_variant():
+ """Older OData format uses 'odata.nextLink' without the @ prefix."""
+ client = _query_sql_client()
+ page1_body = {"value": [{"id": 1}], "odata.nextLink": "https://org.example/page2"}
+ page2_body = {"value": [{"id": 2}]}
+
+ resp1 = MagicMock()
+ resp1.json.return_value = page1_body
+ resp2 = MagicMock()
+ resp2.json.return_value = page2_body
+
+ with (
+ patch.object(client, "_execute_raw", return_value=resp1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=resp2),
+ ):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert result == [{"id": 1}, {"id": 2}]
+
+
+def test_query_sql_multipage_collects_all():
+ """Three pages: verifies the loop continues past the second page."""
+ client = _query_sql_client()
+ page1 = _make_response([{"id": 1}], next_link="https://org.example/p2")
+ page2_body = {"value": [{"id": 2}], "@odata.nextLink": "https://org.example/p3"}
+ page3_body = {"value": [{"id": 3}]}
+
+ resp2 = MagicMock()
+ resp2.json.return_value = page2_body
+ resp3 = MagicMock()
+ resp3.json.return_value = page3_body
+
+ with (
+ patch.object(client, "_execute_raw", return_value=page1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", side_effect=[resp2, resp3]),
+ ):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert result == [{"id": 1}, {"id": 2}, {"id": 3}]
+
+
+def test_query_sql_mid_pagination_error_warns_and_returns_partial():
+ """A failing page mid-pagination emits a RuntimeWarning and returns rows collected so far."""
+ client = _query_sql_client()
+ page1 = _make_response([{"id": 1}], next_link="https://org.example/p2")
+
+ bad_resp = MagicMock()
+ bad_resp.json.side_effect = ValueError("not JSON")
+
+ with (
+ patch.object(client, "_execute_raw", return_value=page1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=bad_resp),
+ ):
+ with pytest.warns(RuntimeWarning, match="pagination stopped"):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert result == [{"id": 1}]
+
+
+def test_query_sql_repeated_next_link_warns_and_stops():
+ """If the server keeps returning the same @odata.nextLink a RuntimeWarning is emitted and
+ the loop stops without running forever."""
+ client = _query_sql_client()
+ # Both pages return the same next_link — simulates a server that re-executes the SQL
+ repeating_body = {"value": [{"id": 1}], "@odata.nextLink": "https://org.example/page2"}
+
+ resp1 = MagicMock()
+ resp1.json.return_value = repeating_body
+ resp2 = MagicMock()
+ resp2.json.return_value = repeating_body # same link again
+
+ with (
+ patch.object(client, "_execute_raw", return_value=resp1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=resp2) as mock_req,
+ ):
+ with pytest.warns(RuntimeWarning, match="pagination stopped"):
+ result = client._query_sql("SELECT id FROM account")
+
+ # fetched page2 once, then detected the cycle and stopped
+ mock_req.assert_called_once_with("get", "https://org.example/page2")
+ assert result == [{"id": 1}, {"id": 1}]
+
+
+def test_query_sql_empty_page_stops_pagination():
+ """If a page returns an empty value array (but includes @odata.nextLink), stop — no infinite loop."""
+ client = _query_sql_client()
+ page1 = _make_response([{"id": 1}], next_link="https://org.example/p2")
+ empty_page_body = {"value": [], "@odata.nextLink": "https://org.example/p3"}
+
+ resp2 = MagicMock()
+ resp2.json.return_value = empty_page_body
+
+ with (
+ patch.object(client, "_execute_raw", return_value=page1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=resp2) as mock_req,
+ ):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert result == [{"id": 1}]
+ mock_req.assert_called_once() # fetched p2, did not follow p3
+
+
+def test_query_sql_non_string_next_link_stops_pagination():
+ """A non-string @odata.nextLink value (e.g. a boolean) does not trigger a request."""
+ client = _query_sql_client()
+ page1_body = {"value": [{"id": 1}], "@odata.nextLink": True}
+
+ resp1 = MagicMock()
+ resp1.json.return_value = page1_body
+
+ with (
+ patch.object(client, "_execute_raw", return_value=resp1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request") as mock_req,
+ ):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert result == [{"id": 1}]
+ mock_req.assert_not_called()
+
+
+def test_query_sql_stuck_pagingcookie_warns_and_stops():
+ """When the server returns the same pagingcookie on successive pages (server-side bug),
+ pagination must stop and a RuntimeWarning must be emitted."""
+ import warnings
+ from urllib.parse import quote as _url_quote
+
+ client = _query_sql_client()
+
+ # Build a next_link that carries a recognisable pagingcookie.
+ # The pagingcookie attribute value is itself URL-encoded inside the skiptoken
+ # (matching the double-encoding the real Dataverse server produces).
+ inner_cookie = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e"
+ skiptoken_xml = f''
+ encoded_skiptoken = _url_quote(skiptoken_xml)
+ next_link_p2 = f"https://org.example/api/data/v9.2?$skiptoken={encoded_skiptoken}"
+ next_link_p3 = f"https://org.example/api/data/v9.2?$skiptoken={encoded_skiptoken}&extra=1"
+
+ page1_body = {"value": [{"id": 1}], "@odata.nextLink": next_link_p2}
+ # Page 2 carries a *different* URL but the same pagingcookie content → server bug
+ page2_body = {"value": [{"id": 2}], "@odata.nextLink": next_link_p3}
+
+ resp1 = MagicMock()
+ resp1.json.return_value = page1_body
+ resp2 = MagicMock()
+ resp2.json.return_value = page2_body
+
+ with (
+ patch.object(client, "_execute_raw", return_value=resp1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=resp2) as mock_req,
+ ):
+ with warnings.catch_warnings(record=True) as caught:
+ warnings.simplefilter("always")
+ result = client._query_sql("SELECT id FROM account")
+
+ # Page 2 was fetched; page 3 was not (cookie repeat detected after page 2)
+ mock_req.assert_called_once_with("get", next_link_p2)
+ assert result == [{"id": 1}, {"id": 2}]
+
+ assert len(caught) == 1
+ w = caught[0]
+ assert issubclass(w.category, RuntimeWarning)
+ assert "pagingcookie" in str(w.message).lower()
+ assert "server" in str(w.message).lower()
+
+
+# ---------------------------------------------------------------------------
+# _extract_pagingcookie unit tests
+# ---------------------------------------------------------------------------
+
+
+def _make_next_link(pagingcookie_inner: str, pagenumber: int = 2) -> str:
+ """Build a double-encoded nextLink URL matching the real Dataverse format."""
+ from urllib.parse import quote as _url_quote
+
+ skiptoken_xml = (
+ f''
+ )
+ return (
+ f"https://org.example/api/data/v9.2?$sql=SELECT%20name%20FROM%20account&$skiptoken={_url_quote(skiptoken_xml)}"
+ )
+
+
+def test_extract_pagingcookie_returns_cookie_value():
+ """Returns the pagingcookie attribute value from a well-formed nextLink."""
+ inner = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e"
+ url = _make_next_link(inner)
+ result = _extract_pagingcookie(url)
+ assert result == inner
+
+
+def test_extract_pagingcookie_no_skiptoken_returns_none():
+ """Returns None when the URL has no $skiptoken parameter."""
+ url = "https://org.example/api/data/v9.2?$sql=SELECT%20name%20FROM%20account"
+ assert _extract_pagingcookie(url) is None
+
+
+def test_extract_pagingcookie_empty_skiptoken_returns_none():
+ """Returns None when $skiptoken is present but empty."""
+ url = "https://org.example/api/data/v9.2?$sql=SELECT%20name%20FROM%20account&$skiptoken="
+ assert _extract_pagingcookie(url) is None
+
+
+def test_extract_pagingcookie_no_pagingcookie_attr_returns_none():
+ """Returns None when $skiptoken exists but contains no pagingcookie attribute."""
+ from urllib.parse import quote as _url_quote
+
+ skiptoken_xml = ''
+ url = f"https://org.example/api/data/v9.2?$skiptoken={_url_quote(skiptoken_xml)}"
+ assert _extract_pagingcookie(url) is None
+
+
+def test_extract_pagingcookie_different_pagenumbers_same_cookie():
+ """Two URLs with different pagenumbers but the same pagingcookie produce equal return values."""
+ inner = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e"
+ url_p2 = _make_next_link(inner, pagenumber=2)
+ url_p3 = _make_next_link(inner, pagenumber=3)
+ assert _extract_pagingcookie(url_p2) == _extract_pagingcookie(url_p3)
+
+
+def test_extract_pagingcookie_different_cookies_not_equal():
+ """Two URLs with different pagingcookie GUIDs produce different return values."""
+ inner_1 = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e"
+ inner_2 = "%3ccookie%20page%3d%222%22%3e%3caccountid%20last%3d%22%7bCCC%7d%22%20first%3d%22%7bDDD%7d%22%20%2f%3e%3c%2fcookie%3e"
+ url_p2 = _make_next_link(inner_1, pagenumber=2)
+ url_p3 = _make_next_link(inner_2, pagenumber=3)
+ assert _extract_pagingcookie(url_p2) != _extract_pagingcookie(url_p3)
+
+
+def test_extract_pagingcookie_malformed_url_returns_none():
+ """Returns None gracefully when given a non-URL string."""
+ assert _extract_pagingcookie("not a url at all !!!") is None
+
+
+def test_extract_pagingcookie_exception_returns_none():
+ """Returns None when an unexpected exception is raised during URL parsing (except branch)."""
+ with patch("PowerPlatform.Dataverse.data._odata.urlparse", side_effect=RuntimeError("boom")):
+ assert _extract_pagingcookie("https://org.example/?$skiptoken=x") is None
+
+
+def test_query_sql_request_exception_warns_and_returns_partial():
+ """When _request raises an exception mid-pagination a RuntimeWarning is emitted and
+ the rows collected so far are returned."""
+ client = _query_sql_client()
+ page1 = _make_response([{"id": 1}], next_link="https://org.example/p2")
+
+ with (
+ patch.object(client, "_execute_raw", return_value=page1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", side_effect=ConnectionError("network timeout")),
+ ):
+ with pytest.warns(RuntimeWarning, match="pagination stopped"):
+ result = client._query_sql("SELECT id FROM account")
+
+ assert result == [{"id": 1}]
+
+
+def test_query_sql_non_dict_page_body_stops_pagination():
+ """When a pagination response contains valid JSON that is not a dict (e.g. a list),
+ pagination stops silently and the rows collected so far are returned."""
+ client = _query_sql_client()
+ page1 = _make_response([{"id": 1}], next_link="https://org.example/p2")
+
+ bad_resp = MagicMock()
+ bad_resp.json.return_value = [{"id": 2}] # a list, not a dict
+
+ with (
+ patch.object(client, "_execute_raw", return_value=page1),
+ patch.object(client, "_build_sql", return_value=MagicMock()),
+ patch.object(client, "_request", return_value=bad_resp) as mock_req,
+ ):
+ result = client._query_sql("SELECT id FROM account")
+
+ mock_req.assert_called_once_with("get", "https://org.example/p2")
+ assert result == [{"id": 1}]