Source code for sdmx.testing

import logging
import os
from collections import ChainMap
from contextlib import contextmanager
from pathlib import Path
from typing import Union

import numpy as np
import pandas as pd
import pytest

from sdmx.exceptions import HTTPError
from sdmx.rest import Resource
from sdmx.source import DataContentType, add_source, sources
from sdmx.testing.report import ServiceReporter

log = logging.getLogger(__name__)


# Expected to_pandas() results for data files; see expected_data()
# - Keys are the file name (above) with '.' -> '-': 'foo.xml' -> 'foo-xml'
# - Data is stored in expected/{KEY}.txt
# - Values are either argument to pd.read_csv(); or a dict(use='other-key'),
#   in which case the info for other-key is used instead.
EXPECTED = {
    "ng-flat-xml": dict(index_col=[0, 1, 2, 3, 4, 5]),
    "ng-ts-gf-xml": dict(use="ng-flat-xml"),
    "ng-ts-xml": dict(use="ng-flat-xml"),
    "ng-xs-xml": dict(index_col=[0, 1, 2, 3, 4, 5]),
    # Excluded: this file contains two DataSets, and expected_data() currently
    # only supports specimens with one DataSet
    # 'action-delete-json': dict(header=[0, 1, 2, 3, 4]),
    "xs-json": dict(index_col=[0, 1, 2, 3, 4, 5]),
    "flat-json": dict(index_col=[0, 1, 2, 3, 4, 5]),
    "ts-json": dict(use="flat-json"),
}


[docs]def assert_pd_equal(left, right, **kwargs):
    """Assert equality of two pandas objects."""
    if left is None:
        return
    method = {
        pd.Series: pd.testing.assert_series_equal,
        pd.DataFrame: pd.testing.assert_frame_equal,
        np.ndarray: np.testing.assert_array_equal,
    }[left.__class__]
    method(left, right, **kwargs)


[docs]def pytest_addoption(parser):
    """Add the ``--sdmx-test-data`` command-line option to pytest."""
    parser.addoption(
        "--sdmx-test-data",
        # Use the environment variable value by default
        default=os.environ.get("SDMX_TEST_DATA", None),
        help="path to SDMX test specimens",
    )


[docs]def pytest_configure(config):
    """Handle the ``--sdmx-test-data`` command-line option."""
    # Register "parametrize_specimens" as a known mark to suppress warnings from pytest
    config.addinivalue_line(
        "markers", "parametrize_specimens: (for internal use by sdmx.testing)"
    )

    # Register plugin for reporting service outputs
    config._sdmx_reporter = ServiceReporter(config)
    config.pluginmanager.register(config._sdmx_reporter)

    # Check the value can be converted to a path, and exists
    message = "Give --sdmx-test-data=… or set the SDMX_TEST_DATA environment variable"
    try:
        sdmx_test_data = Path(config.option.sdmx_test_data)
    except TypeError:  # pragma: no cover
        raise RuntimeError(message) from None
    else:  # pragma: no cover
        if not sdmx_test_data.exists():  # pragma: no cover
            # Cannot proceed further; this exception kills the test session
            raise FileNotFoundError(f"SDMX test data in {sdmx_test_data}\n{message}")

    setattr(config, "sdmx_test_data", sdmx_test_data)
    setattr(config, "sdmx_specimens", SpecimenCollection(sdmx_test_data))


[docs]def pytest_generate_tests(metafunc):
    """Generate tests.

    Calls both :func:`parametrize_specimens` and :func:`generate_endpoint_tests`.
    """
    parametrize_specimens(metafunc)
    generate_endpoint_tests(metafunc)


[docs]def parametrize_specimens(metafunc):
    """Handle ``@pytest.mark.parametrize_specimens(…)``."""
    try:
        mark = next(metafunc.definition.iter_markers("parametrize_specimens"))
    except StopIteration:
        return

    metafunc.parametrize(
        mark.args[0], metafunc.config.sdmx_specimens.as_params(**mark.kwargs)
    )


#: Marks for use below.
XFAIL = {
    # Exceptions resulting from querying an endpoint not supported by a service
    "unsupported": pytest.mark.xfail(
        strict=True,
        reason="Not implemented by service",
        raises=(
            HTTPError,  # 401, 404, 405, etc.
            NotImplementedError,  # 501, converted automatically
            ValueError,  # e.g. WB_WDI, returns invalid content type
        ),
    ),
    # Returned by servers that may be temporarily unavailable at the time of test
    503: pytest.mark.xfail(
        raises=HTTPError, reason="503 Server Error: Service Unavailable"
    ),
}


[docs]def generate_endpoint_tests(metafunc):
    """pytest hook for parametrizing tests that need an "endpoint" fixture.

    This function relies on the :class:`.DataSourceTest` base class defined in
    :mod:`.test_sources`. It:

    - Generates one parametrization for every :class:`.Resource` (= REST API endpoint).
    - Applies pytest "xfail" (expected failure) marks according to:

      1. :attr:`.Source.supports`, i.e. if the particular source is marked as not
         supporting certain endpoints, the test is expected to fail.
      2. :attr:`.DataSourceTest.xfail`, any other failures defined on the source test
         class (e.g. :class:`.DataSourceTest` subclass).
      3. :attr:`.DataSourceTest.xfail_common`, common failures.
    """
    if "endpoint" not in metafunc.fixturenames:
        return  # Don't need to parametrize this metafunc

    # Arguments to parametrize()
    params = []

    # Use the test class' source_id attr to look up the Source class
    cls = metafunc.cls
    source = sources[cls.source_id]

    # Merge subclass-specific and "common" xfail marks, preferring the former
    xfails = ChainMap(cls.xfail, cls.xfail_common)

    # Iterate over all known endpoints
    for ep in Resource:
        # Accumulate multiple marks; first takes precedence
        marks = []

        # Get any keyword arguments for this endpoint
        args = cls.endpoint_args.get(ep.name, dict())
        if ep is Resource.data and not len(args):
            # args must be specified for a data query; no args → no test
            continue

        # Check if the associated source supports the endpoint
        supported = source.supports[ep]
        if source.data_content_type == DataContentType.JSON and ep is not Resource.data:
            # SDMX-JSON sources only support data queries
            continue
        elif not supported:
            args["force"] = True
            marks.append(XFAIL["unsupported"])

        # Check if the test function's class contains an expected failure for `endpoint`
        xfail = xfails.get(ep.name, None)
        if not marks and xfail:
            # Mark the test as expected to fail
            try:  # Unpack a tuple
                mark = pytest.mark.xfail(raises=xfail[0], reason=xfail[1])
            except TypeError:
                mark = pytest.mark.xfail(raises=xfail)
            marks.append(mark)

            if not supported:  # pragma: no cover; for identifying extraneous entries
                log.info(
                    f"tests for {source.id!r} mention unsupported endpoint {ep.name!r}"
                )

        # Tolerate 503 errors
        if cls.tolerate_503:
            marks.append(XFAIL[503])

        params.append(pytest.param(ep, args, id=ep.name, marks=marks))

    if len(params):
        # Run the test function once for each endpoint
        metafunc.parametrize("endpoint, args", params)
    # commented: for debugging
    # else:
    #     pytest.skip("No endpoints to be tested")


[docs]class MessageTest:
    """Base class for tests of specific specimen files."""

    directory: Union[str, Path] = Path(".")
    filename: str

[docs]    @pytest.fixture(scope="class")
    def path(self, test_data_path):
        yield test_data_path / self.directory

[docs]    @pytest.fixture(scope="class")
    def msg(self, path):
        import sdmx

        return sdmx.read_sdmx(path / self.filename)


[docs]class SpecimenCollection:
    """Collection of test specimens."""

    def __init__(self, base_path):
        self.base_path = base_path

        specimens = []

        # XML data files for the ECB exchange rate data flow
        for source_id in ("ECB_EXR",):
            for path in base_path.joinpath(source_id).rglob("*.xml"):
                kind = "data"
                if "structure" in path.name or "common" in path.name:
                    kind = "structure"
                specimens.append((path, "xml", kind))

        # JSON data files for ECB and OECD data flows
        for source_id in ("ECB_EXR", "OECD"):
            specimens.extend(
                (fp, "json", "data")
                for fp in base_path.joinpath(source_id).rglob("*.json")
            )

        # Miscellaneous XML data files
        specimens.extend(
            (base_path.joinpath(*parts), "xml", "data")
            for parts in [
                ("INSEE", "CNA-2010-CONSO-SI-A17.xml"),
                ("INSEE", "IPI-2010-A21.xml"),
                ("ESTAT", "footer.xml"),
                ("ESTAT", "NAMA_10_GDP-ss.xml"),
            ]
        )

        # Miscellaneous XML structure files
        specimens.extend(
            (base_path.joinpath(*parts), "xml", "structure")
            for parts in [
                ("ECB", "orgscheme.xml"),
                ("ESTAT", "apro_mk_cola-structure.xml"),
                ("IMF", "1PI-structure.xml"),
                # Manually reduced subset of the response for this DSD. Test for
                # <str:CubeRegion> containing both <com:KeyValue> and <com:Attribute>
                ("IMF", "ECOFIN_DSD-structure.xml"),
                ("INSEE", "CNA-2010-CONSO-SI-A17-structure.xml"),
                ("INSEE", "dataflow.xml"),
                ("INSEE", "IPI-2010-A21-structure.xml"),
                ("ISTAT", "22_289-structure.xml"),
                ("ISTAT", "47_850-structure.xml"),
                ("UNSD", "codelist_partial.xml"),
                ("SGR", "common-structure.xml"),
            ]
        )

        self.specimens = specimens

    @contextmanager
    def __call__(self, pattern="", opened=True):
        """Open the test specimen file with `pattern` in the name."""
        for path, f, k in self.specimens:
            if path.match("*" + pattern + "*"):
                yield open(path, "br") if opened else path
                return
        raise ValueError(pattern)  # pragma: no cover

[docs]    def as_params(self, format=None, kind=None, marks=dict()):
        """Generate :func:`pytest.param` from specimens.

        One :func:`~.pytest.param` is generated for each specimen that matches the
        `format` and `kind` arguments (if any). Marks are attached to each param from
        `marks`, wherein the keys are partial paths.
        """
        for path, f, k in self.specimens:
            if (format and format != f) or (kind and kind != k):
                continue
            yield pytest.param(
                path,
                id=str(path.relative_to(self.base_path)),
                marks=marks.get(path, tuple()),
            )

[docs]    def expected_data(self, path):
        """Return the expected :func:`.to_pandas()` result for the specimen `path`."""
        try:
            key = path.name.replace(".", "-")
            info = EXPECTED[key]
            if "use" in info:
                # Use the same expected data as another file
                key = info["use"]
                info = EXPECTED[key]
        except KeyError:
            return None

        args = dict(sep=r"\s+", index_col=[0], header=[0])
        args.update(info)

        result = pd.read_csv(
            self.base_path.joinpath("expected", key).with_suffix(".txt"), **args
        )

        # A series; unwrap
        if set(result.columns) == {"value"}:
            result = result["value"]

        return result


[docs]@pytest.fixture(scope="session")
def test_data_path(pytestconfig):
    """Fixture: the :py:class:`.Path` given as --sdmx-test-data."""
    yield pytestconfig.sdmx_test_data


[docs]@pytest.fixture(scope="session")
def specimen(pytestconfig):
    """Fixture: the :class:`SpecimenCollection`."""
    yield pytestconfig.sdmx_specimens


[docs]@pytest.fixture(scope="class")
def testsource():
    """Fixture: the :attr:`.Source.id` of a non-existent data source."""
    id = "TEST"
    add_source(dict(id=id, name="Test source", url="https://example.com/sdmx-rest"))

    try:
        yield id
    finally:
        sources.pop(id)
sdmx documentation

Source code for sdmx.testing