2

Trying to learn JSON schemas and I have a complex schema that is all based on relative URL's. When nesting properties call a definitions property defined in another file, the validations sometimes get ignored.

My goal is to basically make reusable definitions and frequently used enums/consts/definitions globally

Assume project tree is:

-------------------------------------------
| C:/project                              |
| `----main.json (file to be validated)   |
| `----/schemas                           |
|   `----base.json                        |
|   `----datatypes.json                   |
|   `----defs.json                        |
|   `----frequent.json                    |
|   `----/pii                             |
|     `----jobs.json                       |
-------------------------------------------

Based on my understanding of how $id works at root level ref, is that it sets the base uri for the schema, where $ref is relative to. However, in some cases, my schema is not validated against false values, while others are. Nested, or otherwise.

datatypes.json

{
  "$id": "http://project.com/schemas/datatypes.json",
  "$schema" : "http://json-schema.org/draft-07/schema",

  "definitions" : {
    "type_number": {
      "type": "number",
      "default": 0.0
    },

    "type_integer": {
      "type": "integer",
      "default": 0
    },

    "type_nonempty_string" : {
      "type": "string",
      "minLength": 1,
      "pattern": "^([^\\s].*[^\\s]*)$"
    }
  }
}

defs.json

{
  "$id": "http://project.com/schemas/defs.json",
  "$schema": "http://json-schema.org/draft-07/schema",
  "definitions": {
    "number": {
      "$ref": "./datatypes.json#/definitions/type_number"
    },
    "null_number": {
      "oneOf": [
        {
          "$ref": "#/definitions/number"
        },
        {
          "type": "null"
        }
      ]
    },
    "positive_number": {
      "$ref": "#/definitions/number",
      "minimum": 0.0
    },
    "positive_nonzero_number": {
      "$ref": "#/definitions/number",
      "exclusiveMinimum": 0.0
    },
    "integer": {"$ref": "./datatypes.json#/definitions/type_integer"},
    "positive_integer" : {
      "$ref": "#/definitions/integer",
      "minimum" : 0
    },
    "positive_nonzero_integer" : {
      "$ref": "#/definitions/integer",
      "exclusiveMinimum" : 0
    },

    "strict_object": {
      "type": "object",
      "additionalProperties": false
    },
    "nonempty_string": {"$ref": "./datatypes.json#/definitions/type_nonempty_string"},
    "nonempty_string_null" : {
      "oneOf": [
        {
          "$ref": "#/definitions/nonempty_string"
        },
        {
          "type": "null"
        }
      ]
    }
  }
}

frequent.json

{
  "$id": "http://project.com/schemas/frequent.json",
  "$schema" : "http://json-schema.org/draft-07/schema",
  "definitions" : {
    "SEX_TYPES" : {
      "enum" : ["Male", "M","Female","F"]
    },
    "STATES" : {
      "$comment" : "assume this is an emum of states in US",
      "enum" : ["AK","AL","CT"]
    },

    "address" : {
      "$ref" : "./defs.json#/definitions/strict_object",
      "required" : ["street1", "city", "state", "zipcode"],
      "properties" : {
        "street1" : { "$ref" : "./defs.json#/definitions/nonempty_string" },
        "street2" : { "$ref" : "./defs.json#/definitions/nonempty_string" },
        "city" : { "$ref" : "./defs.json#/definitions/nonempty_string" },
        "state" : { "$ref" : "#/definitions/STATES" },
        "zipcode" : { "$ref" : "#/definitions/zipcode" }
      }
    },

    "zipcode": {
      "allOf": [
        {
          "$ref": "./defs.json#/definitions/nonempty_string"
        },
        {
          "pattern": "^(\\d{5})$"
        },
        {
          "not": {
            "pattern": "^(00000)$"
          }
        }
      ]
    }
  }
}

base.json

{
  "$id": "http://project.com/schemas/base.json",
  "$schema" : "http://json-schema.org/draft-07/schema",
  "type": "object",
  "$ref": "./defs.json#/definitions/strict_object",
  "properties": {
    "main": {
      "$ref": "./defs.json#/definitions/strict_object",
      "required": ["age", "sex", "empl_num", "description", "address", "jobs"],
      "properties": {
        "age": {"$ref": "./defs.json#/definitions/positive_nonzero_integer"},
        "sex": {"$ref": "./frequent.json#/definitions/SEX_TYPES" },
        "empl_num" : {"$ref": "./defs.json#/definitions/positive_nonzero_integer"},
        "description": {"$ref": "./defs.json#/definitions/nonempty_string"},
        "jobs" : {"$ref": "pii/jobs.json"},
        "address": {"$ref": "./frequent.json#/definitions/address"}
      }
    }
  }
}

pii/jobs.json

{
  "$id" : "http://project.com/schemas/pii/jobs.json",
  "$schema": "http://json-schema.org/draft-07/schema",
  "type" : "array",
  "minItems": 1,
  "items": {
    "$ref" : "../defs.json#/definitions/strict_object",
    "required": ["description","company","pay_details"],
    "properties" : {
      "description" : { "$ref" : "../defs.json#/definitions/nonempty_string" },
      "company" : {
        "$ref": "../defs.json#/definitions/strict_object",
        "required": ["name","address"],
        "properties": {
          "name" : { "$ref" :  "../defs.json#/definitions/nonempty_string" },
          "address" : {"$ref":  "../frequent.json#/definitions/address"}
        }
      },
      "pay_details" : {
        "$ref" : "../defs.json#/definitions/strict_object",
        "required": ["pay_grade", "salary", "performance_review"],
        "properties": {
          "pay_grade" : { "$ref" : "../defs.json#/definitions/positive_nonzero_integer" },
          "salary" : { "$ref" : "../defs.json#/definitions/positive_nonzero_number" },
          "performance_review" : {
            "$ref" : "../defs.json#/definitions/strict_object",
            "required" : ["previous", "nextDate", "numberOfWriteUps"],
            "properties" : {
              "notes" : { "$ref" : "../defs.json#/definitions/nonempty_string" },
              "numberOfWriteUps" : {"$ref" : "../defs.json#/definitions/positive_integer" },
              "previous" : {
                "$ref" : "../defs.json#/definitions/strict_object",
                "required": ["date", "rating"],
                "properties": {
                  "date" : {
                    "$ref" : "../defs.json#/definitions/nonempty_string",
                    "format" : "date-time"
                  },
                  "rating" : {"$ref" : "../defs.json#/definitions/positive_integer" }
                }
              },
              "nextDate" : {
                "$ref" : "../defs.json#/definitions/nonempty_string",
                "format" : "date-time"
              }
            }
          }
        }
      }
    }
  }
}

In intellij, the following is the result: (every single property shown should not be valid, however some are simply ignored and allowed (like negative numbers and nulls etc). I get no warnings/errors about $ref not being able to be resolved. Autocomplete even finishes the relative file name and definition property for me.

pic1


edit

After playing around with it some more I found that it is related to the relative URLs. For example, in my pii (personally identifiable information) folder, where jobs schema is located, if I move that schema to the root directory with the other schemas everything is validated properly. To illustrate the new project tree:

-------------------------------------------
| C:/project                              |
| `----main.json (file to be validated)   |
| `----/schemas                           |
|   `----base.json                        |
|   `----datatypes.json                   |
|   `----defs.json                        |
|   `----frequent.json                    |
|   `----jobs.json                        |
-------------------------------------------

pic2

For brevity, my schema isn't actually stored at http://project.com. Based on my understanding of $id this doesn't matter, it's just a way for the schemas to communicate with one another. I would like to modularize as much as possible and segregate schemas into their own sub-folders (like pii) but, in my example it does not validate that way.

SOFe
  • 7,867
  • 4
  • 33
  • 61
soulshined
  • 9,612
  • 5
  • 44
  • 79
  • 1
    Looks like you’re making good use of JSON Schema! The examples you’ve included are complex and not super easy to test. Is there a more minimal example you can create which demonstrates the same behaviour? – Relequestual May 26 '19 at 16:19
  • thanks @Relequestual I wasn't sure. This is actually a really dumbed down version from what I was using that demonstrates intention. I can remove the jobs.json schema and even though it's a required property for base.json schema, it doesn't produce any errors stating it's required. Based on that, I think the issue revolves around something being cached maybe or recursion, using the same data type from different files? But again, not sure how uri's work under the hood. I will look into seeing how to dumb this down more that is reproducible. Give me a half hour or so. – soulshined May 26 '19 at 16:31
  • btw, i'm using intellij? are there better methods to test? per your comment – soulshined May 26 '19 at 16:33
  • @Relequestual I have edited the question to provide some details of what I think it is. I wasn't able to dumb it down anymore because it does appear to be base on relative urls. (for example `base.json` imports `pii/jobs.json` which imports commonly used `defs.json` definitions) but if they share the same root directory there is no longer an issue. So I guess basically, the real question is how to properly compartmentalize/modularize schemas into sub folders? – soulshined May 26 '19 at 18:38
  • OK, well thanks for trying, appreciated. My immediate comment would be that reference resolution based on file location or "paths" as you've done here isn't something supported (defined) in JSON Schema. $ref resolves using URI resolution against the base URI of the file (assuming $id is only at the root of the file... because otherwise that adds much complexity). – Relequestual May 27 '19 at 05:39
  • 1
    Additionally, looking at "performance_review" in your schema, you're using `$ref` alongside other keywords. When you use $ref in an object, the rest of that object is ignored. To get around this, create two objects, and wrap them in an `allOf`. This is "fixed" / allowed in upcoming draft-8. – Relequestual May 27 '19 at 05:41
  • What I was trying to say in my response here but failed, is that reference resolution based on file location (and relative file location paths like you've done here) is non standard, and will depend on the library if such is supported or not (you'd have to read the docs for the lib). I tend to always full URL for URI references for external files, which means the schemas can be used without needing to be network addressable or connected to the internet. – Relequestual May 27 '19 at 05:44
  • You've actually used $ref and other keywords in the same object A LOT, so this is likely to be a huge issue in your expected validation. – Relequestual May 27 '19 at 05:45
  • Indeed I have @Relequestual that is a critical oversight on my part. After reading the docs just now I must have skimmed that part. Unfortunate. I took the word "extending" too literally and didn't read further. So basically, what I was doing was trying to make reusable defs to limit vertical space and promote shorthand, but would be undone by using `allOf` wouldn't it? if I have to wrap each property? What I'm interpreting that as, is it's better to just explicitly type parameters for each property if I want to extend or individualize them? – soulshined May 27 '19 at 05:58
  • add to clarify, when you say you use full URL for URI references, you mean their respective `$id` correct? – soulshined May 27 '19 at 05:59
  • I'd be interested to hear what docs you've read which mention "extending". – Relequestual May 27 '19 at 06:32
  • Yes, I mean their respective `$id`s. I think it's easier to work this way when referencing another file and avoids URI resolution confusion which many seem to have. – Relequestual May 27 '19 at 06:33

1 Answers1

2

It looks like you were on the right track, however you cannot use $ref with other keywords in the same object, as they are always ignored when using $ref with draft-7 JSON Schema.

8.3. Schema References With "$ref"

The "$ref" keyword is used to reference a schema, and provides the ability to validate recursive structures through self-reference.

An object schema with a "$ref" property MUST be interpreted as a
"$ref" reference. The value of the "$ref" property MUST be a URI
Reference. Resolved against the current URI base, it identifies the
URI of a schema to use. All other properties in a "$ref" object MUST be ignored.

https://datatracker.ietf.org/doc/html/draft-handrews-json-schema-01#section-8.3

If you want to use $ref with other keywords in the same schema object, you need to in stead create two schema objects and wrap them in an allOf.

Community
  • 1
  • 1
Relequestual
  • 11,631
  • 6
  • 47
  • 83
  • ah thanks for the cited resource. That clears a lot up. It's good to see the note about recursion as well. I think ultimately what I'll do is in each file's `definitions` property, tailor the needs to that specific schema instead of referencing it directly in each individual property as I'm doing. One last thing, per your comments, can you provide an example of how you reference them? My schemas are not network addressable, nor intended to be, so when testing locally, my json mapping does not recognize `http://project.com/schemas/defs.json` for say, the age property in `base.json` – soulshined May 27 '19 at 06:45
  • I'm not sure if it's IntelliJ or not, but every time I google it, SEO provides some sort of java solution, which this is not a java project, I'm just happen to be mapping it in IntelliJ – soulshined May 27 '19 at 06:46
  • All I know about IntelliJ is it's popular for use with Java. Here's an example repo where I do a lot of referencing: https://github.com/ga4gh-discovery/ga4gh-discovery-search/tree/master/json_schema/schemas_source - When using ajv (js implementation) I can load in schemas which may be referenced. The $id is picked up, which then allows it to be come resolveable. – Relequestual May 27 '19 at 06:52
  • Ok, I will thoroughly review that thanks. It's possible I actually have to pragmatically run it instead of relying on the IDE to visually represent it. Thank you for your patience and I sincerely thank you for your explanations. – soulshined May 27 '19 at 06:58
  • More than welcome. If you have more questions which don't belong on SO, feel free to join the JSON Schema slack. If you'd like to tip, you'll find a link on my profile. No expectations =] – Relequestual May 27 '19 at 07:00