0

I struggle to understand how to handle unknown fields when the Schema is passed a list of objects for validation. I got so far :

class MySchema(Schema):
    # fields ...

    @marshmallow_decorators.validates_schema(pass_original=True)
    def check_unknown_fields(self, data, original_data):
        if isinstance(original_data, list):
            for dct in original_data:
                self._assert_no_unknown_field(dct)
        else:
            self._assert_no_unknown_field(original_data)

    def _assert_no_unknown_field(self, dct):
        unknown = set(dct.keys()) - set(self.fields)
        if unknown:
            raise MarshmallowValidationError('Unknown field', unknown)

But that obviously doesn't work, as the validator is ran for all items in the list every time. Therefore the first error will be caught, and returned on all items :

items = [
    {'a': 1, 'b': 2, 'unknown1': 3},
    {'a': 4, 'b': 5, 'unknown2': 6},
]
errors = MySchema(many=True).validate(items)
# {0: {'unknown1': ['Unknown field']}, 1: {'unknown1': ['Unknown field']}}

I was trying to think of a way to get only the single item from original_data corresponding to the data argument and validate only that one, but I can't really do that, as items have no id, or field that would make them searchable ...

Am I missing something? Is there a solution to this?

martineau
  • 119,623
  • 25
  • 170
  • 301
sebpiq
  • 7,540
  • 9
  • 52
  • 69

2 Answers2

1

This is a workaround I came up with ... I wish it was simpler, but here it is :

from marshmallow import Schema, ValidationError as MarshmallowValidationError, fields

UNKNOWN_MESSAGE = 'unknown field'


class _RejectUnknownMixin(object):

    def _collect_unknown_fields_errors(self, schema, data):
        """
        Checks `data` against `schema` and returns a dictionary `{<field>: <error>}`
        if unknown fields detected, or `{0: {<field>: <error>}, ... N: <field>: <error>}`
        if `data` is a list.
        """
        if isinstance(data, list):
            validation_errors = {}
            for i, datum in enumerate(data):
                datum_validation_errors = self._collect_unknown_fields_errors(schema, datum)
                if datum_validation_errors:
                    validation_errors[i] = datum_validation_errors
            return validation_errors

        else:
            unknown = set(data.keys()) - set(schema.fields)
            return {name: [UNKNOWN_MESSAGE] for name in unknown}


class NestedRejectUnknown(fields.Nested, _RejectUnknownMixin):
    """
    Nested field that returns validation errors if unknown fields are detected.
    """

    def _deserialize(self, value, attr, data):
        validation_errors = {}
        try:
            result = super(NestedRejectUnknown, self)._deserialize(value, attr, data)
        except MarshmallowValidationError as err:
            validation_errors = err.normalized_messages()

        # Merge with unknown field errors
        validation_errors = _merge_dicts(
            self._collect_unknown_fields_errors(self.schema, value), validation_errors)
        if validation_errors:
            raise MarshmallowValidationError(validation_errors)

        return result


class SchemaRejectUnknown(Schema, _RejectUnknownMixin):
    """
    Schema that return validation errors if unknown fields are detected
    """

    def validate(self, data, **kwargs):
        validation_errors = super(SchemaRejectUnknown, self).validate(data, **kwargs)
        return _merge_dicts(
            self._collect_unknown_fields_errors(self, data), validation_errors)


def _merge_dicts(a, b, path=None):
    """
    Ref : https://stackoverflow.com/questions/7204805/dictionaries-of-dictionaries-merge
    merges b into a
    """
    if path is None:
        path = []
    for key in b:
        if key in a:
            if isinstance(a[key], dict) and isinstance(b[key], dict):
                _merge_dicts(a[key], b[key], path + [str(key)])
            elif a[key] == b[key]:
                # same leaf value
                pass
            else:
                raise Exception('Conflict at %s' % '.'.join(path + [str(key)]))
        else:
            a[key] = b[key]
    return a
sebpiq
  • 7,540
  • 9
  • 52
  • 69
1

In marshmallow 3.0+ there is the unknown field in Meta, i.e.:

def test_validate(self):
        class ModelSchema(Schema):
            class Meta:
                unknown = RAISE
            name = fields.String()

        schema = ModelSchema()
        data = dict(name='jfaleiro', xyz=2)
        schema.validate(data) # passes
        schema.load(data) # fails (as intended)

It is a bit contradictory why it passes validate and fails load though.

jfaleiro
  • 126
  • 1
  • 4
  • `Schema.validate` returns a dictionary of errors, whereas `Schema.load` raises a ValidationError if validation fails. – Steve L Aug 13 '19 at 04:15