Source code for sdmx.testing

import logging
import os
from collections import ChainMap
from contextlib import contextmanager
from pathlib import Path
from typing import Union

import numpy as np
import pandas as pd
import pytest

from sdmx.exceptions import HTTPError
from sdmx.rest import Resource
from sdmx.source import DataContentType, add_source, sources
from sdmx.testing.report import ServiceReporter

log = logging.getLogger(__name__)


# Expected to_pandas() results for data files; see expected_data()
# - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml'
# - Data is stored in expected/{KEY}.txt
# - Values are either argument to pd.read_csv(); or a dict(use='other-key'),
#   in which case the info for other-key is used instead.
EXPECTED = {
    "ng-flat-xml": dict(index_col=[0, 1, 2, 3, 4, 5]),
    "ng-ts-gf-xml": dict(use="ng-flat-xml"),
    "ng-ts-xml": dict(use="ng-flat-xml"),
    "ng-xs-xml": dict(index_col=[0, 1, 2, 3, 4, 5]),
    # Excluded: this file contains two DataSets, and expected_data() currently
    # only supports specimens with one DataSet
    # 'action-delete-json': dict(header=[0, 1, 2, 3, 4]),
    "xs-json": dict(index_col=[0, 1, 2, 3, 4, 5]),
    "flat-json": dict(index_col=[0, 1, 2, 3, 4, 5]),
    "ts-json": dict(use="flat-json"),
}


[docs]def assert_pd_equal(left, right, **kwargs): """Assert equality of two pandas objects.""" if left is None: return method = { pd.Series: pd.testing.assert_series_equal, pd.DataFrame: pd.testing.assert_frame_equal, np.ndarray: np.testing.assert_array_equal, }[left.__class__] method(left, right, **kwargs)
[docs]def pytest_addoption(parser): """Add the ``--sdmx-test-data`` command-line option to pytest.""" parser.addoption( "--sdmx-test-data", # Use the environment variable value by default default=os.environ.get("SDMX_TEST_DATA", None), help="path to SDMX test specimens", )
[docs]def pytest_configure(config): """Handle the ``--sdmx-test-data`` command-line option.""" # Register "parametrize_specimens" as a known mark to suppress warnings from pytest config.addinivalue_line( "markers", "parametrize_specimens: (for internal use by sdmx.testing)" ) # Register plugin for reporting service outputs config._sdmx_reporter = ServiceReporter(config) config.pluginmanager.register(config._sdmx_reporter) # Check the value can be converted to a path, and exists message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable" try: sdmx_test_data = Path(config.option.sdmx_test_data) except TypeError: # pragma: no cover raise RuntimeError(message) from None else: # pragma: no cover if not sdmx_test_data.exists(): # pragma: no cover # Cannot proceed further; this exception kills the test session raise FileNotFoundError(f"SDMX test data in {sdmx_test_data}\n{message}") setattr(config, "sdmx_test_data", sdmx_test_data) setattr(config, "sdmx_specimens", SpecimenCollection(sdmx_test_data))
[docs]def pytest_generate_tests(metafunc): """Generate tests. Calls both :func:`parametrize_specimens` and :func:`generate_endpoint_tests`. """ parametrize_specimens(metafunc) generate_endpoint_tests(metafunc)
[docs]def parametrize_specimens(metafunc): """Handle ``@pytest.mark.parametrize_specimens(…)``.""" try: mark = next(metafunc.definition.iter_markers("parametrize_specimens")) except StopIteration: return metafunc.parametrize( mark.args[0], metafunc.config.sdmx_specimens.as_params(**mark.kwargs) )
#: This exception is raised by client.Client._request_from_args #: #: .. todo:: parametrize force=True to query these endpoints anyway; then XPASS will #: reveal when data sources change their support for endpoints unsupported = pytest.mark.xfail( strict=True, reason="Known non-supported endpoint.", raises=NotImplementedError ) _503 = pytest.mark.xfail( raises=HTTPError, reason="503 Server Error: Service Unavailable" )
[docs]def generate_endpoint_tests(metafunc): """pytest hook for parametrizing tests that need an "endpoint" fixture.""" if "endpoint" not in metafunc.fixturenames: return # Don't need to parametrize this metafunc # Arguments to parametrize() params = [] # Use the test class' source_id attr to look up the Source class source = sources[metafunc.cls.source_id] # Merge "common" and subclass-specific xfails xfails = ChainMap(metafunc.cls.xfail, metafunc.cls.xfail_common) # Iterate over all known endpoints for ep in Resource: # Accumulate multiple marks; first takes precedence marks = [] # Check if the associated source supports the endpoint supported = source.supports[ep] if source.data_content_type == DataContentType.JSON and ep is not Resource.data: # SDMX-JSON sources only support data queries continue elif not supported: marks.append(unsupported) # Check if the test function's class contains an expected failure for `endpoint` exc_class = xfails.get(ep.name, None) if exc_class: # Mark the test as expected to fail marks.append(pytest.mark.xfail(strict=True, raises=exc_class)) if not supported: # pragma: no cover log.info( f"tests for {repr(metafunc.cls.source_id)} mention unsupported " f"endpoint {repr(ep.name)}" ) # Tolerate 503 errors if metafunc.cls.tolerate_503: marks.append(_503) # Get any keyword arguments for this endpoint args = metafunc.cls.endpoint_args.get(ep.name, dict()) if ep is Resource.data and not len(args): # args must be specified for a data query; no args → no test continue params.append(pytest.param(ep, args, id=ep.name, marks=marks)) if len(params): # Run the test function once for each endpoint metafunc.parametrize("endpoint, args", params) else: pytest.skip("No endpoints to be tested")
[docs]class MessageTest: """Base class for tests of specific specimen files.""" directory: Union[str, Path] = Path(".") filename: str
[docs] @pytest.fixture(scope="class") def path(self, test_data_path): yield test_data_path / self.directory
[docs] @pytest.fixture(scope="class") def msg(self, path): import sdmx return sdmx.read_sdmx(path / self.filename)
[docs]class SpecimenCollection: """Collection of test specimens.""" def __init__(self, base_path): self.base_path = base_path specimens = [ (base_path / "INSEE" / "CNA-2010-CONSO-SI-A17.xml", "xml", "data"), (base_path / "INSEE" / "IPI-2010-A21.xml", "xml", "data"), ] # XML data files for the ECB exchange rate data flow for path in (base_path / "ECB_EXR").rglob("*.xml"): kind = "data" if "structure" in path.name or "common" in path.name: kind = "structure" specimens.append((path, "xml", kind)) # JSON data files for the ECB exchange rate data flow for fp in (base_path / "ECB_EXR").rglob("*.json"): specimens.append((fp, "json", "data")) for fp in (base_path / "OECD").rglob("*.json"): specimens.append((fp, "json", "data")) # Miscellaneous XML data files specimens.append((base_path / "ESTAT" / "footer.xml", "xml", "data")) # Miscellaneous XML structure files specimens.extend( (base_path.joinpath(*parts), "xml", "structure") for parts in [ ("ECB", "orgscheme.xml"), ("ESTAT", "apro_mk_cola-structure.xml"), # Manually reduced subset of the response for this DSD. Test for # <str:CubeRegion> containing both <com:KeyValue> and <com:Attribute> ("IMF", "ECOFIN_DSD-structure.xml"), ("INSEE", "CNA-2010-CONSO-SI-A17-structure.xml"), ("INSEE", "dataflow.xml"), ("INSEE", "IPI-2010-A21-structure.xml"), ("ISTAT", "47_850-structure.xml"), ("UNSD", "codelist_partial.xml"), ("SGR", "common-structure.xml"), ] ) self.specimens = specimens @contextmanager def __call__(self, pattern="", opened=True): """Open the test specimen file with `pattern` in the name.""" for path, f, k in self.specimens: if path.match("*" + pattern + "*"): yield open(path, "br") if opened else path return raise ValueError(pattern) # pragma: no cover
[docs] def as_params(self, format=None, kind=None, marks=dict()): """Generate :func:`pytest.param` from specimens. One :func:`~.pytest.param` is generated for each specimen that matches the `format` and `kind` arguments (if any). Marks are attached to each param from `marks`, wherein the keys are partial paths. """ for path, f, k in self.specimens: if (format and format != f) or (kind and kind != k): continue yield pytest.param( path, id=str(path.relative_to(self.base_path)), marks=marks.get(path, tuple()), )
[docs] def expected_data(self, path): """Return the expected :func:`.to_pandas()` result for the specimen `path`.""" try: key = path.name.replace(".", "-") info = EXPECTED[key] if "use" in info: # Use the same expected data as another file key = info["use"] info = EXPECTED[key] except KeyError: return None args = dict(sep=r"\s+", index_col=[0], header=[0]) args.update(info) result = pd.read_csv( self.base_path.joinpath("expected", key).with_suffix(".txt"), **args ) # A series; unwrap if set(result.columns) == {"value"}: result = result["value"] return result
[docs]@pytest.fixture(scope="session") def test_data_path(pytestconfig): """Fixture: the :py:class:`.Path` given as --sdmx-test-data.""" yield pytestconfig.sdmx_test_data
[docs]@pytest.fixture(scope="session") def specimen(pytestconfig): """Fixture: the :class:`SpecimenCollection`.""" yield pytestconfig.sdmx_specimens
[docs]@pytest.fixture(scope="class") def testsource(): """Fixture: the :attr:`.Source.id` of a non-existent data source.""" id = "TEST" add_source(dict(id=id, name="Test source", url="https://example.com/sdmx-rest")) try: yield id finally: sources.pop(id)