diff --git a/CHANGELOG.md b/CHANGELOG.md index 040184a..82a87a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -5,6 +5,23 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [Unreleased] + +### Added +- Batch API: `client.batch` namespace for deferred-execution batch operations that pack multiple Dataverse Web API calls into a single `POST $batch` HTTP request (#129) +- Batch DataFrame integration: `client.batch.dataframe` namespace with pandas DataFrame wrappers for batch operations (#129) +- `client.records.upsert()` and `client.batch.records.upsert()` backed by the `UpsertMultiple` bound action with alternate-key support (#129) +- QueryBuilder: `client.query.builder("table")` with a fluent API, 20+ chainable methods (`select`, `filter_eq`, `filter_contains`, `order_by`, `expand`, etc.), and composable filter expressions using Python operators (`&`, `|`, `~`) (#118) +- Memo/multiline column type support: `"memo"` (or `"multiline"`) can now be passed as a column type in `client.tables.create()` and `client.tables.add_columns()` (#155) + +### Changed +- Picklist label-to-integer resolution now uses a single bulk `PicklistAttributeMetadata` API call for the entire table instead of per-attribute requests, with a 1-hour TTL cache (#154) + +### Fixed +- `client.query.sql()` silently truncated results at 5,000 rows. The method now follows `@odata.nextLink` pagination and returns all matching rows (#157). +- Alternate key fields were incorrectly merged into the `UpsertMultiple` request body, causing `400 Bad Request` on the create path (#129) +- Docstring type annotations corrected for Microsoft Learn API reference compatibility (#153) + ## [0.1.0b7] - 2026-03-17 ### Added @@ -91,6 +108,7 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 - Comprehensive error handling with specific exception types (`DataverseError`, `AuthenticationError`, etc.) (#22, #24) - HTTP retry logic with exponential backoff for resilient operations (#72) +[Unreleased]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b7...HEAD [0.1.0b7]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b6...v0.1.0b7 [0.1.0b6]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b5...v0.1.0b6 [0.1.0b5]: https://github.com/microsoft/PowerPlatform-DataverseClient-Python/compare/v0.1.0b4...v0.1.0b5 diff --git a/src/PowerPlatform/Dataverse/data/_odata.py b/src/PowerPlatform/Dataverse/data/_odata.py index a0bf270..3ec490d 100644 --- a/src/PowerPlatform/Dataverse/data/_odata.py +++ b/src/PowerPlatform/Dataverse/data/_odata.py @@ -13,12 +13,13 @@ import re import json import uuid +import warnings from datetime import datetime, timezone import importlib.resources as ir from contextlib import contextmanager from contextvars import ContextVar -from urllib.parse import quote as _url_quote +from urllib.parse import quote as _url_quote, parse_qs, urlparse from ..core._http import _HttpClient from ._upload import _FileUploadMixin @@ -54,6 +55,34 @@ _DEFAULT_EXPECTED_STATUSES: tuple[int, ...] = (200, 201, 202, 204) +def _extract_pagingcookie(next_link: str) -> Optional[str]: + """Extract the raw pagingcookie value from a SQL ``@odata.nextLink`` URL. + + The Dataverse SQL endpoint has a server-side bug where the pagingcookie + (containing first/last record GUIDs) does not advance between pages even + though ``pagenumber`` increments. Detecting a repeated cookie lets the + pagination loop break instead of looping indefinitely. + + Returns the pagingcookie string if present, or ``None`` if not found. + """ + try: + qs = parse_qs(urlparse(next_link).query) + skiptoken = qs.get("$skiptoken", [None])[0] + if not skiptoken: + return None + # parse_qs already URL-decodes the value once, giving the outer XML with + # pagingcookie still percent-encoded (e.g. pagingcookie="%3ccookie..."). + # A second decode is intentionally omitted: decoding again would turn %22 + # into " inside the cookie XML, breaking the regex and causing every page + # to extract the same truncated prefix regardless of the actual GUIDs. + m = re.search(r'pagingcookie="([^"]+)"', skiptoken) + if m: + return m.group(1) + except Exception: + pass + return None + + @dataclass class _RequestContext: """Structured request context used by ``_request`` to clarify payload and metadata.""" @@ -776,15 +805,86 @@ def _query_sql(self, sql: str) -> list[dict[str, Any]]: body = r.json() except ValueError: return [] - if isinstance(body, dict): - value = body.get("value") - if isinstance(value, list): - # Ensure dict rows only - return [row for row in value if isinstance(row, dict)] - # Fallbacks: if body itself is a list + + # Collect first page + results: list[dict[str, Any]] = [] if isinstance(body, list): return [row for row in body if isinstance(row, dict)] - return [] + if not isinstance(body, dict): + return results + + value = body.get("value") + if isinstance(value, list): + results = [row for row in value if isinstance(row, dict)] + + # Follow pagination links until exhausted + raw_link = body.get("@odata.nextLink") or body.get("odata.nextLink") + next_link: str | None = raw_link if isinstance(raw_link, str) else None + visited: set[str] = set() + seen_cookies: set[str] = set() + while next_link: + # Guard 1: exact URL cycle (same next_link returned twice) + if next_link in visited: + warnings.warn( + f"SQL pagination stopped after {len(results)} rows — " + "the Dataverse server returned the same nextLink URL twice, " + "indicating an infinite pagination cycle. " + "Returning the rows collected so far. " + "To avoid pagination entirely, add a TOP clause to your query.", + RuntimeWarning, + stacklevel=4, + ) + break + visited.add(next_link) + # Guard 2: server-side bug where pagingcookie does not advance between + # pages (pagenumber increments but cookie GUIDs stay the same), which + # causes an infinite loop even though URLs differ. + cookie = _extract_pagingcookie(next_link) + if cookie is not None: + if cookie in seen_cookies: + warnings.warn( + f"SQL pagination stopped after {len(results)} rows — " + "the Dataverse server returned the same pagingcookie twice " + "(pagenumber incremented but the paging position did not advance). " + "This is a server-side bug. Returning the rows collected so far. " + "To avoid pagination entirely, add a TOP clause to your query.", + RuntimeWarning, + stacklevel=4, + ) + break + seen_cookies.add(cookie) + try: + page_resp = self._request("get", next_link) + except Exception as exc: + warnings.warn( + f"SQL pagination stopped after {len(results)} rows — " + f"the next-page request failed: {exc}. " + "Add a TOP clause to your query to limit results to a single page.", + RuntimeWarning, + stacklevel=5, + ) + break + try: + page_body = page_resp.json() + except ValueError as exc: + warnings.warn( + f"SQL pagination stopped after {len(results)} rows — " + f"the next-page response was not valid JSON: {exc}. " + "Add a TOP clause to your query to limit results to a single page.", + RuntimeWarning, + stacklevel=5, + ) + break + if not isinstance(page_body, dict): + break + page_value = page_body.get("value") + if not isinstance(page_value, list) or not page_value: + break + results.extend(row for row in page_value if isinstance(row, dict)) + raw_link = page_body.get("@odata.nextLink") or page_body.get("odata.nextLink") + next_link = raw_link if isinstance(raw_link, str) else None + + return results @staticmethod def _extract_logical_table(sql: str) -> str: diff --git a/tests/unit/data/test_sql_parse.py b/tests/unit/data/test_sql_parse.py index 12c25a9..29eb183 100644 --- a/tests/unit/data/test_sql_parse.py +++ b/tests/unit/data/test_sql_parse.py @@ -1,11 +1,11 @@ # Copyright (c) Microsoft Corporation. # Licensed under the MIT license. -from unittest.mock import patch +from unittest.mock import MagicMock, patch from urllib.parse import parse_qs, urlparse import pytest -from PowerPlatform.Dataverse.data._odata import _ODataClient +from PowerPlatform.Dataverse.data._odata import _ODataClient, _extract_pagingcookie class DummyAuth: @@ -109,3 +109,338 @@ def test_build_sql_equals_in_value_is_percent_encoded(): def test_build_sql_decoded_param_matches_input(): sql = "SELECT accountid, name FROM account WHERE statecode = 0" assert _sql_param(_build(sql)) == sql + + +# --------------------------------------------------------------------------- +# _query_sql pagination +# --------------------------------------------------------------------------- + + +def _make_response(rows, next_link=None): + """Build a mock HTTP response whose .json() returns an OData page.""" + body = {"value": rows} + if next_link: + body["@odata.nextLink"] = next_link + resp = MagicMock() + resp.json.return_value = body + return resp + + +def _query_sql_client(): + """Return a bare _ODataClient suitable for _query_sql patching.""" + client = object.__new__(_ODataClient) + client.api = "https://org.crm.dynamics.com/api/data/v9.2" + return client + + +def test_query_sql_single_page_returns_all_rows(): + client = _query_sql_client() + page = _make_response([{"id": 1}, {"id": 2}]) + with ( + patch.object(client, "_execute_raw", return_value=page), + patch.object(client, "_build_sql", return_value=MagicMock()), + ): + result = client._query_sql("SELECT id FROM account") + assert result == [{"id": 1}, {"id": 2}] + + +def test_query_sql_follows_next_link(): + client = _query_sql_client() + page1 = _make_response([{"id": i} for i in range(5000)], next_link="https://org.example/page2") + page2 = _make_response([{"id": i} for i in range(5000, 6000)]) + + mock_request_resp = MagicMock() + mock_request_resp.json.return_value = page2.json.return_value + + with ( + patch.object(client, "_execute_raw", return_value=page1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=mock_request_resp) as mock_req, + ): + result = client._query_sql("SELECT id FROM account") + + assert len(result) == 6000 + mock_req.assert_called_once_with("get", "https://org.example/page2") + + +def test_query_sql_follows_odata_next_link_variant(): + """Older OData format uses 'odata.nextLink' without the @ prefix.""" + client = _query_sql_client() + page1_body = {"value": [{"id": 1}], "odata.nextLink": "https://org.example/page2"} + page2_body = {"value": [{"id": 2}]} + + resp1 = MagicMock() + resp1.json.return_value = page1_body + resp2 = MagicMock() + resp2.json.return_value = page2_body + + with ( + patch.object(client, "_execute_raw", return_value=resp1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=resp2), + ): + result = client._query_sql("SELECT id FROM account") + + assert result == [{"id": 1}, {"id": 2}] + + +def test_query_sql_multipage_collects_all(): + """Three pages: verifies the loop continues past the second page.""" + client = _query_sql_client() + page1 = _make_response([{"id": 1}], next_link="https://org.example/p2") + page2_body = {"value": [{"id": 2}], "@odata.nextLink": "https://org.example/p3"} + page3_body = {"value": [{"id": 3}]} + + resp2 = MagicMock() + resp2.json.return_value = page2_body + resp3 = MagicMock() + resp3.json.return_value = page3_body + + with ( + patch.object(client, "_execute_raw", return_value=page1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", side_effect=[resp2, resp3]), + ): + result = client._query_sql("SELECT id FROM account") + + assert result == [{"id": 1}, {"id": 2}, {"id": 3}] + + +def test_query_sql_mid_pagination_error_warns_and_returns_partial(): + """A failing page mid-pagination emits a RuntimeWarning and returns rows collected so far.""" + client = _query_sql_client() + page1 = _make_response([{"id": 1}], next_link="https://org.example/p2") + + bad_resp = MagicMock() + bad_resp.json.side_effect = ValueError("not JSON") + + with ( + patch.object(client, "_execute_raw", return_value=page1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=bad_resp), + ): + with pytest.warns(RuntimeWarning, match="pagination stopped"): + result = client._query_sql("SELECT id FROM account") + + assert result == [{"id": 1}] + + +def test_query_sql_repeated_next_link_warns_and_stops(): + """If the server keeps returning the same @odata.nextLink a RuntimeWarning is emitted and + the loop stops without running forever.""" + client = _query_sql_client() + # Both pages return the same next_link — simulates a server that re-executes the SQL + repeating_body = {"value": [{"id": 1}], "@odata.nextLink": "https://org.example/page2"} + + resp1 = MagicMock() + resp1.json.return_value = repeating_body + resp2 = MagicMock() + resp2.json.return_value = repeating_body # same link again + + with ( + patch.object(client, "_execute_raw", return_value=resp1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=resp2) as mock_req, + ): + with pytest.warns(RuntimeWarning, match="pagination stopped"): + result = client._query_sql("SELECT id FROM account") + + # fetched page2 once, then detected the cycle and stopped + mock_req.assert_called_once_with("get", "https://org.example/page2") + assert result == [{"id": 1}, {"id": 1}] + + +def test_query_sql_empty_page_stops_pagination(): + """If a page returns an empty value array (but includes @odata.nextLink), stop — no infinite loop.""" + client = _query_sql_client() + page1 = _make_response([{"id": 1}], next_link="https://org.example/p2") + empty_page_body = {"value": [], "@odata.nextLink": "https://org.example/p3"} + + resp2 = MagicMock() + resp2.json.return_value = empty_page_body + + with ( + patch.object(client, "_execute_raw", return_value=page1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=resp2) as mock_req, + ): + result = client._query_sql("SELECT id FROM account") + + assert result == [{"id": 1}] + mock_req.assert_called_once() # fetched p2, did not follow p3 + + +def test_query_sql_non_string_next_link_stops_pagination(): + """A non-string @odata.nextLink value (e.g. a boolean) does not trigger a request.""" + client = _query_sql_client() + page1_body = {"value": [{"id": 1}], "@odata.nextLink": True} + + resp1 = MagicMock() + resp1.json.return_value = page1_body + + with ( + patch.object(client, "_execute_raw", return_value=resp1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request") as mock_req, + ): + result = client._query_sql("SELECT id FROM account") + + assert result == [{"id": 1}] + mock_req.assert_not_called() + + +def test_query_sql_stuck_pagingcookie_warns_and_stops(): + """When the server returns the same pagingcookie on successive pages (server-side bug), + pagination must stop and a RuntimeWarning must be emitted.""" + import warnings + from urllib.parse import quote as _url_quote + + client = _query_sql_client() + + # Build a next_link that carries a recognisable pagingcookie. + # The pagingcookie attribute value is itself URL-encoded inside the skiptoken + # (matching the double-encoding the real Dataverse server produces). + inner_cookie = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e" + skiptoken_xml = f'' + encoded_skiptoken = _url_quote(skiptoken_xml) + next_link_p2 = f"https://org.example/api/data/v9.2?$skiptoken={encoded_skiptoken}" + next_link_p3 = f"https://org.example/api/data/v9.2?$skiptoken={encoded_skiptoken}&extra=1" + + page1_body = {"value": [{"id": 1}], "@odata.nextLink": next_link_p2} + # Page 2 carries a *different* URL but the same pagingcookie content → server bug + page2_body = {"value": [{"id": 2}], "@odata.nextLink": next_link_p3} + + resp1 = MagicMock() + resp1.json.return_value = page1_body + resp2 = MagicMock() + resp2.json.return_value = page2_body + + with ( + patch.object(client, "_execute_raw", return_value=resp1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=resp2) as mock_req, + ): + with warnings.catch_warnings(record=True) as caught: + warnings.simplefilter("always") + result = client._query_sql("SELECT id FROM account") + + # Page 2 was fetched; page 3 was not (cookie repeat detected after page 2) + mock_req.assert_called_once_with("get", next_link_p2) + assert result == [{"id": 1}, {"id": 2}] + + assert len(caught) == 1 + w = caught[0] + assert issubclass(w.category, RuntimeWarning) + assert "pagingcookie" in str(w.message).lower() + assert "server" in str(w.message).lower() + + +# --------------------------------------------------------------------------- +# _extract_pagingcookie unit tests +# --------------------------------------------------------------------------- + + +def _make_next_link(pagingcookie_inner: str, pagenumber: int = 2) -> str: + """Build a double-encoded nextLink URL matching the real Dataverse format.""" + from urllib.parse import quote as _url_quote + + skiptoken_xml = ( + f'' + ) + return ( + f"https://org.example/api/data/v9.2?$sql=SELECT%20name%20FROM%20account&$skiptoken={_url_quote(skiptoken_xml)}" + ) + + +def test_extract_pagingcookie_returns_cookie_value(): + """Returns the pagingcookie attribute value from a well-formed nextLink.""" + inner = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e" + url = _make_next_link(inner) + result = _extract_pagingcookie(url) + assert result == inner + + +def test_extract_pagingcookie_no_skiptoken_returns_none(): + """Returns None when the URL has no $skiptoken parameter.""" + url = "https://org.example/api/data/v9.2?$sql=SELECT%20name%20FROM%20account" + assert _extract_pagingcookie(url) is None + + +def test_extract_pagingcookie_empty_skiptoken_returns_none(): + """Returns None when $skiptoken is present but empty.""" + url = "https://org.example/api/data/v9.2?$sql=SELECT%20name%20FROM%20account&$skiptoken=" + assert _extract_pagingcookie(url) is None + + +def test_extract_pagingcookie_no_pagingcookie_attr_returns_none(): + """Returns None when $skiptoken exists but contains no pagingcookie attribute.""" + from urllib.parse import quote as _url_quote + + skiptoken_xml = '' + url = f"https://org.example/api/data/v9.2?$skiptoken={_url_quote(skiptoken_xml)}" + assert _extract_pagingcookie(url) is None + + +def test_extract_pagingcookie_different_pagenumbers_same_cookie(): + """Two URLs with different pagenumbers but the same pagingcookie produce equal return values.""" + inner = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e" + url_p2 = _make_next_link(inner, pagenumber=2) + url_p3 = _make_next_link(inner, pagenumber=3) + assert _extract_pagingcookie(url_p2) == _extract_pagingcookie(url_p3) + + +def test_extract_pagingcookie_different_cookies_not_equal(): + """Two URLs with different pagingcookie GUIDs produce different return values.""" + inner_1 = "%3ccookie%20page%3d%221%22%3e%3caccountid%20last%3d%22%7bAAA%7d%22%20first%3d%22%7bBBB%7d%22%20%2f%3e%3c%2fcookie%3e" + inner_2 = "%3ccookie%20page%3d%222%22%3e%3caccountid%20last%3d%22%7bCCC%7d%22%20first%3d%22%7bDDD%7d%22%20%2f%3e%3c%2fcookie%3e" + url_p2 = _make_next_link(inner_1, pagenumber=2) + url_p3 = _make_next_link(inner_2, pagenumber=3) + assert _extract_pagingcookie(url_p2) != _extract_pagingcookie(url_p3) + + +def test_extract_pagingcookie_malformed_url_returns_none(): + """Returns None gracefully when given a non-URL string.""" + assert _extract_pagingcookie("not a url at all !!!") is None + + +def test_extract_pagingcookie_exception_returns_none(): + """Returns None when an unexpected exception is raised during URL parsing (except branch).""" + with patch("PowerPlatform.Dataverse.data._odata.urlparse", side_effect=RuntimeError("boom")): + assert _extract_pagingcookie("https://org.example/?$skiptoken=x") is None + + +def test_query_sql_request_exception_warns_and_returns_partial(): + """When _request raises an exception mid-pagination a RuntimeWarning is emitted and + the rows collected so far are returned.""" + client = _query_sql_client() + page1 = _make_response([{"id": 1}], next_link="https://org.example/p2") + + with ( + patch.object(client, "_execute_raw", return_value=page1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", side_effect=ConnectionError("network timeout")), + ): + with pytest.warns(RuntimeWarning, match="pagination stopped"): + result = client._query_sql("SELECT id FROM account") + + assert result == [{"id": 1}] + + +def test_query_sql_non_dict_page_body_stops_pagination(): + """When a pagination response contains valid JSON that is not a dict (e.g. a list), + pagination stops silently and the rows collected so far are returned.""" + client = _query_sql_client() + page1 = _make_response([{"id": 1}], next_link="https://org.example/p2") + + bad_resp = MagicMock() + bad_resp.json.return_value = [{"id": 2}] # a list, not a dict + + with ( + patch.object(client, "_execute_raw", return_value=page1), + patch.object(client, "_build_sql", return_value=MagicMock()), + patch.object(client, "_request", return_value=bad_resp) as mock_req, + ): + result = client._query_sql("SELECT id FROM account") + + mock_req.assert_called_once_with("get", "https://org.example/p2") + assert result == [{"id": 1}]