How to dynamically parametrize generated test lambda functions with pytest

Question

Disclaimer: Yes I am well aware this is a mad attempt.

Use case:

I am reading from a config file to run a test collection where each such collection comprises of set of test cases with corresponding results and a fixed setup.

Flow (for each test case):

Setup: wipe and setup database with specific test case dataset (glorified SQL file)
load expected test case results from csv
execute collections query/report
compare results.

Sounds good, except the people writing the test cases are more from a tech admin perspective, so the goal is to enable this without writing any python code.

code

Assume these functions exist.

# test_queries.py
def gather_collections(): (collection, query, config)
def gather_cases(collection): (test_case)
def load_collection_stubs(collection): None
def load_case_dataset(test_case): None
def read_case_result_csv(test_case): [csv_result]
def execute(query): [query_result]


class TestQueries(unittest.TestCase):
    def setup_method(self, method):
        collection = self._item.name.replace('test_', '')
        load_collection_stubs(collection)

# conftest.py
import pytest

@pytest.hookimpl(hookwrapper=True)
def pytest_runtest_protocol(item, nextitem):
    item.cls._item = item
    yield

Example Data

Collection stubs / data (setting up of environment)

-- stubs/test_setup_log.sql
DROP DATABASE IF EXISTS `test`;
CREATE DATABASE `test`;
USE test;
CREATE TABLE log (`id` int(9) NOT NULL AUTO_INCREMENT, `timestamp` datetime NOT NULL DEFAULT NOW(), `username` varchar(100) NOT NULL, `message` varchar(500));

Query to test

-- queries/count.sql
SELECT count(*) as `log_count` from test.log where username = 'unicorn';

Test case 1 input data

-- test_case_1.sql
INSERT INTO log (`id`, `timestamp`, `username`, `message`)
VALUES
    (1,'2020-12-18T11:23.01Z', 'unicorn', 'user logged in'),
    (2,'2020-12-18T11:23.02Z', 'halsey', 'user logged off'),
    (3,'2020-12-18T11:23.04Z', 'unicorn', 'user navigated to home')

Test case 1 expected result test_case_1.csv

log_count
2

Attempt 1

for collection, query, config in gather_collections():
    test_method_name = 'test_{}'.format(collection)

    LOGGER.debug("collections.{}.test - {}".format(collection, config))
    cases = gather_cases(collection)
    LOGGER.debug("collections.{}.cases - {}".format(collection, cases))
    setattr(
        TestQueries,
        test_method_name,
        pytest.mark.parametrize(
            'case_name',
            cases,
            ids=cases
        )(
            lambda self, case_name: (
                load_case_dataset(case_name),
                self.assertEqual(execute(query, case_name), read_case_result_csv( case_name))
            )
        )
    )

Attempt 2

for collection, query, config in gather_collections():
    test_method_name = 'test_{}'.format(collection)

    LOGGER.debug("collections.{}.test - {}".format(collection, config))
    setattr(
        TestQueries,
        test_method_name,
        lambda self, case_name: (
            load_case_dataset(case_name),
            self.assertEqual(execute(query, case_name), read_case_result_csv(case_name))
        )
    )

def pytest_generate_tests(metafunc):
    collection = metafunc.function.__name__.replace('test_', '')
    # FIXME logs and id setting not working
    cases = gather_cases(collection)
    LOGGER.info("collections.{}.pytest.cases - {}".format(collection, cases))

    metafunc.parametrize(
        'case_name',
        cases,
        ids=cases
    )

`pytest` supports data-driven testing via multiple options, for example test parametrization or implementing custom test protocol. Rather than showing function stubs for loading data, please provide an example of the data itself (with fake values, of course). You mentioned test case dataset, test case results and some collection, to me it's not yet clear how they correspond and how the real test case data looks like. — hoefling, Dec 16 '20 at 15:19
@hoefling added a very rudimentary case but it should illustrate the structure — WiR3D, Dec 18 '20 at 09:27

score 0 · Accepted Answer · answered Dec 18 '20 at 14:53

So I figured it out, but it's not the most elegant solution. Essentially you use one function and then use some of pytests hooks to change the function names for reporting.

There are numerous issues, e.g. if you don't use pytest.param to pass the parameters to parametrize then you do not have the required information available. Also the method passed to setup_method is not aware of the actual iteration being run when its called, so I had to hack that in with the iter counter.

# test_queries.py
def gather_tests():
    global TESTS

    for test_collection_name in TESTS.keys():
        LOGGER.debug("collections.{}.gather - {}".format(test_collection_name, TESTS[test_collection_name]))
        query = path.join(SRC_DIR, TESTS[test_collection_name]['query'])
        cases_dir = TESTS[test_collection_name]['cases']
        result_sets = path.join(TEST_DIR, cases_dir, '*.csv')

        for case_result_csv in glob.glob(result_sets):
            test_case_name = path.splitext(path.basename(case_result_csv))[0]
            yield test_case_name, query, test_collection_name, TESTS[test_collection_name]



class TestQueries():
    iter = 0

    def setup_method(self, method):
        method_name = method.__name__  # or self._item.originalname
        global TESTS

        if method_name == 'test_scripts_reports':
            _mark = next((m for m in method.pytestmark if m.name == 'parametrize' and 'collection_name' in m.args[0]), None)
            if not _mark:
                raise Exception('test {} missing collection_name parametrization'.format(method_name))  # nothing to do here

            _args = _mark.args[0]
            _params = _mark.args[1]
            LOGGER.debug('setup_method: _params - {}'.format(_params))
            if not _params:
                raise Exception('test {} missing pytest.params'.format(method_name))  # nothing to do here

            _currparams =_params[self.iter]
            self.iter += 1

            _argpos = [arg.strip() for arg in _args.split(',')].index('collection_name')
            collection = _currparams.values[_argpos]
            LOGGER.debug('collections.{}.setup_method[{}] - {}'.format(collection, self.iter, _currparams))
            load_collection_stubs(collection)


    @pytest.mark.parametrize(
        'case_name, collection_query, collection_name, collection_config',
        [pytest.param(*c, id='{}:{}'.format(c[2], c[0])) for c in gather_tests()]
    )
    def test_scripts_reports(self, case_name, collection_query, collection_name, collection_config):
        if not path.isfile(collection_query):
            pytest.skip("report query does not exist: {}".format(collection_query))

        LOGGER.debug("test_scripts_reports.{}.{} - ".format(collection_name, case_name))
        load_case_dataset( case_name)
        assert execute(collection_query, case_name) == read_case_result_csv(case_name)

Then to make the test ids more human you can do this

# conftest.py
def pytest_collection_modifyitems(items):
    # https://stackoverflow.com/questions/61317809/pytest-dynamically-generating-test-name-during-runtime
    for item in items:
        if item.originalname == 'test_scripts_reports':
            item._nodeid = re.sub(r'::\w+::\w+\[', '[', item.nodeid)

the result with the following files:

stubs/
  00-wipe-db.sql
  setup-db.sql
queries/
  report1.sql
collection/
  report1/
    case1.sql
    case1.csv
    case2.sql
    case2.csv

# results (with setup_method firing before each test and loading the appropriate stubs as per configuration)
FAILED test_queries.py[report1:case1]
FAILED test_queries.py[report1:case2]

the fact that it failed is because the actual query is not correct, so the tests are working. — WiR3D, Dec 18 '20 at 14:55