diff --git a/README.rst b/README.rst index 9e3b351..0d2d467 100644 --- a/README.rst +++ b/README.rst @@ -206,24 +206,54 @@ OpenAPI 3.0 keeps historical ``format: binary`` / ``format: byte`` usage on **OAS30Validator (default - compatibility behavior)** - ``type: string`` accepts ``str`` - ``type: string, format: binary`` accepts Python ``bytes`` and strings + - ``maxLength`` / ``minLength`` constrain raw ``bytes`` by octet count - useful when validating Python-native runtime data **OAS30StrictValidator** - ``type: string`` accepts ``str`` only - - ``type: string, format: binary`` uses strict format validation + - ``type: string, format: binary`` uses strict format validation and rejects + ``bytes`` - use when you want strict, spec-oriented behavior for 3.0 schemas OpenAPI 3.1+ ------------ -OpenAPI 3.1+ follows JSON Schema semantics for string typing in this library. - -- ``type: string`` accepts ``str`` only (not ``bytes``) -- ``format: binary`` and ``format: byte`` are not treated as built-in formats -- for base64-in-JSON, model with ``contentEncoding: base64`` (optionally - ``contentMediaType``) -- for raw binary payloads, model via media type (for example - ``application/octet-stream``) rather than schema string formats +Under JSON Schema 2020-12, OpenAPI 3.1 and 3.2 model raw binary with a +**typeless** schema (the 3.0 ``format: binary`` / ``format: byte`` pair was +dropped). This library accepts Python ``bytes`` for such raw-binary schemas. + +**OAS31Validator / OAS32Validator (default - runtime-friendly behavior)** + - canonical raw binary is a **typeless** schema, optionally annotated with a + non-text ``contentMediaType`` and no ``contentEncoding`` (for example + ``{}`` or ``{"contentMediaType": "application/octet-stream"}``); a ``bytes`` + instance validates + - as a **pragmatic compatibility extension**, ``type: string`` together with a + non-text ``contentMediaType`` (and no ``contentEncoding``) also accepts + ``bytes``. This is runtime tolerance for specs migrated from 3.0, not a + claim of spec conformance + - plain ``type: string`` accepts ``str`` only (not ``bytes``) + - encoded text stays on the string path: model base64-in-JSON and similar with + ``contentEncoding``. *Any* real ``contentEncoding`` (``base64``, + ``base64url``, ``base16``, ``base32``, ``quoted-printable`` ...) keeps the + schema textual; only the no-op identity encodings (``identity`` / ``binary`` + / ``7bit`` / ``8bit``) leave it raw + - ``maxLength`` / ``minLength`` constrain raw ``bytes`` by octet count + +**OAS31StrictValidator / OAS32StrictValidator** + - explicit opt-ins that preserve JSON Schema string typing + - canonical **typeless** raw binary still accepts ``bytes`` + - a schema asserting ``type: string`` rejects ``bytes`` even with a non-text + ``contentMediaType`` (no pragmatic tolerance) + - ``validator_for`` keeps resolving the 3.1 / 3.2 dialect ids to the default + validators; the strict classes are never the dialect default + +.. note:: + + **Migration from 3.0:** in 3.1 / 3.2 ``format: binary`` is no longer a binary + marker (under 2020-12 ``format`` is an annotation). A ``bytes`` value + validated against a 3.1 / 3.2 ``{"type": "string", "format": "binary"}`` + schema is now **rejected** -- model raw binary with ``contentMediaType`` + (ideally a typeless schema) instead. Regex Behavior ============== diff --git a/docs/validation.rst b/docs/validation.rst index c95164e..dea5a70 100644 --- a/docs/validation.rst +++ b/docs/validation.rst @@ -265,25 +265,57 @@ OpenAPI 3.0 keeps historical ``format: binary`` / ``format: byte`` usage on - ``type: string`` accepts ``str`` - ``type: string, format: binary`` accepts Python ``bytes`` and strings +- ``maxLength`` / ``minLength`` constrain raw ``bytes`` by octet count - useful when validating Python-native runtime data **OAS30StrictValidator** - ``type: string`` accepts ``str`` only -- ``type: string, format: binary`` uses strict format validation +- ``type: string, format: binary`` uses strict format validation and rejects + ``bytes`` - use when you want strict, spec-oriented behavior for 3.0 schemas OpenAPI 3.1+ ~~~~~~~~~~~~ -OpenAPI 3.1+ follows JSON Schema semantics for string typing in this library. - -- ``type: string`` accepts ``str`` only (not ``bytes``) -- ``format: binary`` and ``format: byte`` are not treated as built-in formats -- for base64-in-JSON, model with ``contentEncoding: base64`` (optionally - ``contentMediaType``) -- for raw binary payloads, model via media type (for example - ``application/octet-stream``) rather than schema string formats +Under JSON Schema 2020-12, OpenAPI 3.1 and 3.2 model raw binary with a +**typeless** schema; the 3.0 ``format: binary`` / ``format: byte`` pair was +dropped. This library accepts Python ``bytes`` for raw-binary schemas. + +**OAS31Validator / OAS32Validator (default - runtime-friendly behavior)** + +- the canonical raw-binary form is a **typeless** schema, optionally annotated + with a non-text ``contentMediaType`` and no ``contentEncoding`` (for example + ``{}`` or ``{"contentMediaType": "application/octet-stream"}``); a ``bytes`` + instance validates +- as a **pragmatic compatibility extension**, ``type: string`` together with a + non-text ``contentMediaType`` (and no ``contentEncoding``) also accepts + ``bytes`` -- runtime tolerance for specs migrated from 3.0, not a claim of + spec conformance +- plain ``type: string`` accepts ``str`` only (not ``bytes``) +- encoded text stays on the string path: model base64-in-JSON and similar with + ``contentEncoding``. *Any* real ``contentEncoding`` (``base64``, + ``base64url``, ``base16``, ``base32``, ``quoted-printable`` ...) keeps the + schema textual; only the no-op identity encodings (``identity`` / ``binary`` + / ``7bit`` / ``8bit``) leave it raw +- ``maxLength`` / ``minLength`` constrain raw ``bytes`` by octet count + +**OAS31StrictValidator / OAS32StrictValidator** + +- explicit opt-ins that preserve JSON Schema string typing +- canonical **typeless** raw binary still accepts ``bytes`` +- a schema asserting ``type: string`` rejects ``bytes`` even with a non-text + ``contentMediaType`` (no pragmatic tolerance) +- ``validator_for`` keeps resolving the 3.1 / 3.2 dialect ids to the default + validators; the strict classes are never the dialect default + +.. note:: + + **Migration from 3.0:** in 3.1 / 3.2 ``format: binary`` is no longer a binary + marker (under 2020-12 ``format`` is an annotation). A ``bytes`` value + validated against a 3.1 / 3.2 ``{"type": "string", "format": "binary"}`` + schema is now **rejected** -- model raw binary with ``contentMediaType`` + (ideally a typeless schema) instead. Quick Reference ~~~~~~~~~~~~~~~ @@ -306,12 +338,45 @@ Quick Reference - Strict 3.0 validation mode * - OAS 3.1 + ``OAS31Validator`` - Pass - - Fail - - Use ``contentEncoding``/``contentMediaType`` and media types + - Pass for raw binary + - Typeless, or ``type: string`` + non-text ``contentMediaType`` * - OAS 3.2 + ``OAS32Validator`` - Pass - - Fail + - Pass for raw binary - Same semantics as OAS 3.1 + * - OAS 3.1/3.2 strict validators + - Pass + - Pass for typeless raw binary + - Rejects ``bytes`` whenever ``type: string`` is asserted + +Example usage: + +.. code-block:: python + + from openapi_schema_validator import OAS31StrictValidator + from openapi_schema_validator import OAS31Validator + + # Canonical typeless raw binary - accepts bytes + validator = OAS31Validator({"contentMediaType": "application/octet-stream"}) + validator.validate(b"binary data") # passes + + # Pragmatic compatibility extension (default validators only) + validator = OAS31Validator( + {"type": "string", "contentMediaType": "application/octet-stream"} + ) + validator.validate(b"binary data") # passes + + # Octet-length bounds apply to raw bytes + validator = OAS31Validator( + {"contentMediaType": "application/pdf", "maxLength": 1} + ) + validator.validate(b"abc") # raises ValidationError (3 octets > 1) + + # Strict - preserves JSON Schema string typing + validator = OAS31StrictValidator( + {"type": "string", "contentMediaType": "application/octet-stream"} + ) + validator.validate(b"binary data") # raises ValidationError Regex Behavior -------------- diff --git a/openapi_schema_validator/__init__.py b/openapi_schema_validator/__init__.py index fda19fa..d80095c 100644 --- a/openapi_schema_validator/__init__.py +++ b/openapi_schema_validator/__init__.py @@ -9,7 +9,9 @@ from openapi_schema_validator.validators import OAS30StrictValidator from openapi_schema_validator.validators import OAS30Validator from openapi_schema_validator.validators import OAS30WriteValidator +from openapi_schema_validator.validators import OAS31StrictValidator from openapi_schema_validator.validators import OAS31Validator +from openapi_schema_validator.validators import OAS32StrictValidator from openapi_schema_validator.validators import OAS32Validator __author__ = "Artur Maciag" @@ -27,8 +29,10 @@ "oas30_format_checker", "oas30_strict_format_checker", "OAS31Validator", + "OAS31StrictValidator", "oas31_format_checker", "OAS32Validator", + "OAS32StrictValidator", "oas32_format_checker", "OAS31_BASE_DIALECT_ID", "OAS32_BASE_DIALECT_ID", diff --git a/openapi_schema_validator/_binary.py b/openapi_schema_validator/_binary.py new file mode 100644 index 0000000..87e98cf --- /dev/null +++ b/openapi_schema_validator/_binary.py @@ -0,0 +1,348 @@ +"""Per-version detection of opaque binary ``bytes`` payload schemas. + +This module is the single source of truth for deciding whether a schema +describes an *opaque binary* string payload for which a Python ``bytes`` +instance should be accepted. It deliberately works on plain schema mappings +(pure functions, no validation side effects) so the predicates can be unit +tested directly -- importantly for the OAS 3.0 array-``type`` case, which +crashes the 3.0 ``type`` keyword if evaluated through validation. + +Each OAS dialect declares "this is opaque binary" differently, so the +predicates are version scoped: + +* OAS 3.0 uses ``format: binary`` (it has no ``contentMediaType`` / + ``contentEncoding`` keywords). +* OAS 3.1 / 3.2 model raw binary with a *typeless* schema, optionally annotated + with a non-text ``contentMediaType`` and no ``contentEncoding``. ``format`` is + an annotation in 2020-12 and is therefore *not* a binary marker there. + +The default 3.1 / 3.2 predicates additionally tolerate a common non-canonical +form (``type: string`` + non-text ``contentMediaType``) as a documented runtime +compatibility extension. The strict predicates recognize canonical typeless raw +binary only. + +The acceptance of ``bytes`` lives exclusively in the keyword wrappers built here +(``build_binary_type`` and friends); the global ``"string"`` type checker is +never broadened to include ``bytes`` (see the design's architectural invariant). +""" + +from collections.abc import Mapping +from typing import Any +from typing import Callable +from typing import Iterator + +from jsonschema.exceptions import ValidationError + +BinarySchemaPredicate = Callable[[Mapping[str, Any]], bool] +KeywordValidator = Callable[ + [Any, Any, Any, Mapping[str, Any]], Iterator[ValidationError] +] + +# Media types that are textual even though they fall outside the ``text/*`` +# tree. JSON Schema 2020-12 treats ``contentMediaType`` as describing the media +# type of the string's *contents*; for these the contents are text (a ``str``), +# not raw bytes, so they stay on the normal string path. Non-exhaustive on +# purpose: an unrecognized ``application/*`` subtype with no textual suffix +# defaults to opaque binary. Keep this easy to extend as new textual subtypes +# surface. +_TEXTUAL_MEDIA_TYPES = frozenset( + { + "application/json", + "application/xml", + "application/x-www-form-urlencoded", + "application/javascript", + "application/ecmascript", + "application/yaml", + "application/x-yaml", + "application/graphql", + "application/x-ndjson", + "application/csv", + } +) + +# Structured-syntax suffixes whose payloads are textual (e.g. ``image/svg+xml``, +# ``application/problem+json``, ``application/ld+json``). +_TEXTUAL_MEDIA_TYPE_SUFFIXES = ("+json", "+xml", "+yaml") + +# ``contentEncoding`` values that denote identity / no-op transfer: the string +# is *not* an encoded representation, so the payload can still be raw binary. +_NOOP_CONTENT_ENCODINGS = frozenset({"identity", "binary", "7bit", "8bit"}) + +# ``format`` values that mean the string is base64-style encoded text in any +# OAS version. +_ENCODED_TEXT_FORMATS = frozenset({"byte", "base64"}) + + +def _normalize_media_type(media_type: str) -> str: + """Return the lowercased ``type/subtype`` with parameters stripped. + + Media types are case-insensitive (RFC 6838) and MAY carry parameters + (``; charset=...``, ``; version=...``). Split on the first ``;``, keep the + type/subtype portion, trim whitespace, then lowercase -- so + ``application/problem+json; charset=utf-8`` stays textual and + ``application/pdf; version=1`` stays opaque. + """ + return media_type.split(";", 1)[0].strip().lower() + + +def _is_textual_media_type(media_type: str) -> bool: + normalized = _normalize_media_type(media_type) + if normalized.startswith("text/"): + return True + if normalized in _TEXTUAL_MEDIA_TYPES: + return True + return any( + normalized.endswith(suffix) for suffix in _TEXTUAL_MEDIA_TYPE_SUFFIXES + ) + + +def _content_media_type(schema: Mapping[str, Any]) -> str | None: + media_type = schema.get("contentMediaType") + if isinstance(media_type, str): + return media_type + return None + + +def _has_non_text_content_media_type(schema: Mapping[str, Any]) -> bool: + media_type = _content_media_type(schema) + return media_type is not None and not _is_textual_media_type(media_type) + + +def _has_textual_content_media_type(schema: Mapping[str, Any]) -> bool: + media_type = _content_media_type(schema) + return media_type is not None and _is_textual_media_type(media_type) + + +def _is_encoded_text(schema: Mapping[str, Any]) -> bool: + """Whether the schema's string is an *encoded* representation. + + Encoded strings are never opaque binary; they stay on the normal string + path in every version. This covers ``format: byte`` / ``format: base64`` and + -- for 3.1 / 3.2 -- any real ``contentEncoding`` (``base64``, ``base64url``, + ``base16``, ``base32``, ``quoted-printable``, ...). A no-op identity + encoding (``identity`` / ``binary`` / ``7bit`` / ``8bit``) does not count as + encoded. Checked across versions so that, e.g., ``format: binary`` alongside + ``contentEncoding: base64`` is correctly excluded. + """ + fmt = schema.get("format") + if isinstance(fmt, str) and fmt.strip().lower() in _ENCODED_TEXT_FORMATS: + return True + + encoding = schema.get("contentEncoding") + if encoding is None: + return False + if ( + isinstance(encoding, str) + and encoding.strip().lower() in _NOOP_CONTENT_ENCODINGS + ): + return False + # Any other present ``contentEncoding`` asserts the value is encoded text. + return True + + +def _type_includes_string(schema: Mapping[str, Any]) -> bool: + declared_type = schema.get("type") + if declared_type == "string": + return True + return isinstance(declared_type, list) and "string" in declared_type + + +def _is_oas30_binary_candidate(schema: Mapping[str, Any]) -> bool: + # OAS 3.0 does not permit array-valued ``type`` and the 3.0 ``type`` keyword + # cannot evaluate one, so detection is restricted to scalar ``type``: no + # ``type``, or exactly ``type: string``. + if "type" not in schema: + return True + return schema.get("type") == "string" + + +def _is_oas31_binary_candidate(schema: Mapping[str, Any]) -> bool: + # JSON Schema 2020-12 permits array-valued ``type`` and evaluates it + # natively, so multi-type binary schemas are supported: no ``type``, + # ``type: string``, or a ``type`` array that includes ``"string"``. + if "type" not in schema: + return True + return _type_includes_string(schema) + + +def is_oas30_binary_schema(schema: Mapping[str, Any]) -> bool: + """OAS 3.0 opaque binary: scalar-``type`` candidate plus ``format: binary``. + + ``format`` is the sole binary indicator in 3.0. Encoded co-presence (e.g. + ``contentEncoding: base64`` or ``format: byte``) disqualifies the schema. + """ + if not isinstance(schema, Mapping): + return False + if _is_encoded_text(schema): + return False + if not _is_oas30_binary_candidate(schema): + return False + fmt = schema.get("format") + return isinstance(fmt, str) and fmt.strip().lower() == "binary" + + +def is_oas31_binary_schema(schema: Mapping[str, Any]) -> bool: + """Default (runtime-friendly) OAS 3.1 opaque binary predicate. + + A candidate schema (no ``type`` / ``type: string`` / ``type`` array with + ``"string"``) that is not encoded text, and is one of: + + * **no ``type`` and no textual ``contentMediaType``** -- the canonical + raw-binary form (covers ``{}`` and + ``{"contentMediaType": "application/octet-stream"}``); or + * **``type`` includes ``"string"`` and a non-text ``contentMediaType``** -- + the pragmatic compatibility extension. + + Phrasing it this way keeps a typeless *text* schema like + ``{"contentMediaType": "application/json"}`` from being mislabeled as binary. + ``format: binary`` is not a binary indicator in 3.1. + """ + if not isinstance(schema, Mapping): + return False + if _is_encoded_text(schema): + return False + if not _is_oas31_binary_candidate(schema): + return False + if "type" not in schema: + # Canonical typeless raw binary: no textual contentMediaType. + return not _has_textual_content_media_type(schema) + # type includes "string" (guaranteed by the candidate gate): pragmatic + # compatibility extension requires a non-text contentMediaType. + return _has_non_text_content_media_type(schema) + + +def is_oas32_binary_schema(schema: Mapping[str, Any]) -> bool: + """Default OAS 3.2 opaque binary predicate (identical to OAS 3.1).""" + return is_oas31_binary_schema(schema) + + +def is_oas31_strict_binary_schema(schema: Mapping[str, Any]) -> bool: + """Strict OAS 3.1 opaque binary predicate: canonical typeless raw binary. + + Recognizes only the typeless form (no ``type``, not encoded text, no textual + ``contentMediaType``). A schema asserting ``type: string`` -- even with a + non-text ``contentMediaType`` -- stays on the ordinary string path and + rejects ``bytes``, preserving JSON Schema string typing. + """ + if not isinstance(schema, Mapping): + return False + if _is_encoded_text(schema): + return False + if "type" in schema: + return False + return not _has_textual_content_media_type(schema) + + +def is_oas32_strict_binary_schema(schema: Mapping[str, Any]) -> bool: + """Strict OAS 3.2 opaque binary predicate (identical to strict OAS 3.1).""" + return is_oas31_strict_binary_schema(schema) + + +def build_binary_type( + original: KeywordValidator, + is_binary: BinarySchemaPredicate, +) -> KeywordValidator: + """Wrap a ``type`` keyword to accept ``bytes`` for opaque binary schemas. + + When the instance is ``bytes`` and the schema is opaque binary for the + active predicate, the instance is accepted as a string-compatible binary + payload (no type assertion to violate). Every other case delegates to the + original ``type`` validator unchanged. + """ + + def binary_type( + validator: Any, + data_type: Any, + instance: Any, + schema: Mapping[str, Any], + ) -> Iterator[ValidationError]: + if isinstance(instance, bytes) and is_binary(schema): + return + yield from original(validator, data_type, instance, schema) + + return binary_type + + +def build_binary_max_length( + original: KeywordValidator, + is_binary: BinarySchemaPredicate, +) -> KeywordValidator: + """Wrap ``maxLength`` to enforce octet length for opaque binary ``bytes``. + + jsonschema's ``maxLength`` is guarded by ``is_type(instance, "string")`` and + so no-ops on ``bytes``. For opaque binary ``bytes`` the bound is instead + checked against ``len(instance)`` (the number of octets), matching the OAS + 3.2 statement that for unencoded binary "the length is the number of + octets." Every other case delegates to the original validator. + """ + + def binary_max_length( + validator: Any, + max_length: Any, + instance: Any, + schema: Mapping[str, Any], + ) -> Iterator[ValidationError]: + if isinstance(instance, bytes) and is_binary(schema): + if len(instance) > max_length: + message = ( + "is expected to be empty" + if max_length == 0 + else "is too long" + ) + yield ValidationError(f"{instance!r} {message}") + return + yield from original(validator, max_length, instance, schema) + + return binary_max_length + + +def build_binary_min_length( + original: KeywordValidator, + is_binary: BinarySchemaPredicate, +) -> KeywordValidator: + """Wrap ``minLength`` to enforce octet length for opaque binary ``bytes``. + + Mirrors :func:`build_binary_max_length` for the lower bound. + """ + + def binary_min_length( + validator: Any, + min_length: Any, + instance: Any, + schema: Mapping[str, Any], + ) -> Iterator[ValidationError]: + if isinstance(instance, bytes) and is_binary(schema): + if len(instance) < min_length: + message = ( + "should be non-empty" + if min_length == 1 + else "is too short" + ) + yield ValidationError(f"{instance!r} {message}") + return + yield from original(validator, min_length, instance, schema) + + return binary_min_length + + +def build_binary_format( + original: KeywordValidator, + is_binary: BinarySchemaPredicate, +) -> KeywordValidator: + """Wrap ``format`` to skip text-oriented format checks on opaque binary. + + Format checks are text oriented and should not run on opaque binary + ``bytes``. Every other case delegates to the original validator. + """ + + def binary_format( + validator: Any, + format: Any, + instance: Any, + schema: Mapping[str, Any], + ) -> Iterator[ValidationError]: + if isinstance(instance, bytes) and is_binary(schema): + return + yield from original(validator, format, instance, schema) + + return binary_format diff --git a/openapi_schema_validator/_keywords.py b/openapi_schema_validator/_keywords.py index 7ebe467..dcc39e7 100644 --- a/openapi_schema_validator/_keywords.py +++ b/openapi_schema_validator/_keywords.py @@ -120,7 +120,12 @@ def type( instance: Any, schema: Mapping[str, Any], ) -> Iterator[ValidationError]: - """Default type validator - allows Python bytes for binary format for pragmatic reasons.""" + """Default OAS 3.0 type validator with ``nullable`` support. + + Acceptance of Python ``bytes`` for opaque binary schemas is handled by the + predicate-driven wrapper in ``_binary.build_binary_type`` (the single source + of truth), not by an inline ``format: binary`` branch here. + """ if instance is None: # nullable implementation based on OAS 3.0.3 # * nullable is only meaningful if its value is true @@ -131,14 +136,6 @@ def type( return yield ValidationError("None for not nullable") - # Pragmatic: allow bytes for binary format (common in Python use cases) - if ( - data_type == "string" - and schema.get("format") == "binary" - and isinstance(instance, bytes) - ): - return - if not validator.is_type(instance, data_type): data_repr = repr(data_type) yield ValidationError(f"{instance!r} is not of type {data_repr}") diff --git a/openapi_schema_validator/validators.py b/openapi_schema_validator/validators.py index 907b803..73e9972 100644 --- a/openapi_schema_validator/validators.py +++ b/openapi_schema_validator/validators.py @@ -17,6 +17,15 @@ from openapi_schema_validator import _format as oas_format from openapi_schema_validator import _keywords as oas_keywords from openapi_schema_validator import _types as oas_types +from openapi_schema_validator._binary import build_binary_format +from openapi_schema_validator._binary import build_binary_max_length +from openapi_schema_validator._binary import build_binary_min_length +from openapi_schema_validator._binary import build_binary_type +from openapi_schema_validator._binary import is_oas30_binary_schema +from openapi_schema_validator._binary import is_oas31_binary_schema +from openapi_schema_validator._binary import is_oas31_strict_binary_schema +from openapi_schema_validator._binary import is_oas32_binary_schema +from openapi_schema_validator._binary import is_oas32_strict_binary_schema from openapi_schema_validator._dialects import OAS31_BASE_DIALECT_ID from openapi_schema_validator._dialects import OAS31_BASE_DIALECT_METASCHEMA from openapi_schema_validator._dialects import OAS32_BASE_DIALECT_ID @@ -99,12 +108,52 @@ def _oas30_id_of(schema: Any) -> str: ) +def _binary_aware_draft202012_keywords(predicate: Any) -> dict[str, Any]: + """Binary-aware wrappers over the native draft-2020-12 keyword callables. + + Used for the OAS 3.1 / 3.2 validators (default and strict). ``type`` accepts + ``bytes`` for opaque binary schemas, ``maxLength`` / ``minLength`` enforce + octet length, and ``format`` is skipped on opaque binary ``bytes``. The + originals are the native draft-2020-12 callables so the strict variants do + not inherit the default predicate's acceptance. + """ + return { + "type": build_binary_type( + Draft202012Validator.VALIDATORS["type"], predicate + ), + "maxLength": build_binary_max_length( + Draft202012Validator.VALIDATORS["maxLength"], predicate + ), + "minLength": build_binary_min_length( + Draft202012Validator.VALIDATORS["minLength"], predicate + ), + "format": build_binary_format( + Draft202012Validator.VALIDATORS["format"], predicate + ), + } + + def _build_oas30_validator() -> Any: + # Fold binary-awareness into the base OAS 3.0 validator so that the read / + # write / strict subclasses inherit the octet-length and format behavior. + validators = dict(OAS30_VALIDATORS) + validators["type"] = build_binary_type( + oas_keywords.type, is_oas30_binary_schema + ) + validators["maxLength"] = build_binary_max_length( + _keywords.maxLength, is_oas30_binary_schema + ) + validators["minLength"] = build_binary_min_length( + _keywords.minLength, is_oas30_binary_schema + ) + validators["format"] = build_binary_format( + oas_keywords.format, is_oas30_binary_schema + ) return create( meta_schema=OPENAPI_SPECIFICATIONS.contents( "http://json-schema.org/draft-04/schema#", ), - validators=OAS30_VALIDATORS, + validators=cast(Any, validators), type_checker=oas_types.oas30_type_checker, format_checker=oas_format.oas30_format_checker, # NOTE: version causes conflict with global jsonschema validator @@ -115,19 +164,25 @@ def _build_oas30_validator() -> Any: def _build_oas31_validator() -> Any: + # Binary-awareness is folded in here, before register_openapi_dialect, so + # that validator_for resolves the dialect id to this binary-aware class. + validators = { + # adjusted to OAS + "pattern": oas_keywords.pattern, + "description": oas_keywords.not_implemented, + # fixed OAS fields + # discriminator is annotation-only in OAS 3.1+ + "discriminator": oas_keywords.not_implemented, + "xml": oas_keywords.not_implemented, + "externalDocs": oas_keywords.not_implemented, + "example": oas_keywords.not_implemented, + } + validators.update( + _binary_aware_draft202012_keywords(is_oas31_binary_schema) + ) validator = extend( Draft202012Validator, - { - # adjusted to OAS - "pattern": oas_keywords.pattern, - "description": oas_keywords.not_implemented, - # fixed OAS fields - # discriminator is annotation-only in OAS 3.1+ - "discriminator": oas_keywords.not_implemented, - "xml": oas_keywords.not_implemented, - "externalDocs": oas_keywords.not_implemented, - "example": oas_keywords.not_implemented, - }, + validators, type_checker=oas31_type_checker, format_checker=oas_format.oas31_format_checker, ) @@ -142,7 +197,7 @@ def _build_oas31_validator() -> Any: def _build_oas32_validator() -> Any: validator = extend( OAS31Validator, - {}, + _binary_aware_draft202012_keywords(is_oas32_binary_schema), format_checker=oas_format.oas32_format_checker, ) return register_openapi_dialect( @@ -192,6 +247,27 @@ def _build_oas32_validator() -> Any: OAS31Validator.check_schema = classmethod(check_openapi_schema) OAS32Validator.check_schema = classmethod(check_openapi_schema) +# Strict OAS 3.1 / 3.2 validators: explicit opt-ins that preserve JSON Schema +# string typing. They accept canonical typeless raw binary but reject ``bytes`` +# whenever a schema asserts ``type: string`` (even with a non-text +# ``contentMediaType``). Built with the strict predicates rather than inheriting +# the default binary wrappers, and intentionally NOT registered as the dialect +# default, so validator_for keeps resolving the OAS 3.1 / 3.2 dialect ids to the +# runtime-friendly OAS31Validator / OAS32Validator. +OAS31StrictValidator = extend( + OAS31Validator, + _binary_aware_draft202012_keywords(is_oas31_strict_binary_schema), +) +OAS32StrictValidator = extend( + OAS32Validator, + _binary_aware_draft202012_keywords(is_oas32_strict_binary_schema), + format_checker=oas_format.oas32_format_checker, +) +# extend() builds a fresh class via create() and drops the custom check_schema +# classmethod, so re-attach it (mirrors build_enforce_properties_required_validator). +OAS31StrictValidator.check_schema = classmethod(check_openapi_schema) +OAS32StrictValidator.check_schema = classmethod(check_openapi_schema) + @lru_cache(maxsize=None) def build_enforce_properties_required_validator( diff --git a/tests/integration/test_validators.py b/tests/integration/test_validators.py index 60a9c7f..01193d5 100644 --- a/tests/integration/test_validators.py +++ b/tests/integration/test_validators.py @@ -1,6 +1,7 @@ import re import warnings from base64 import b64encode +from copy import deepcopy from typing import Any from typing import cast from unittest.mock import patch @@ -25,7 +26,9 @@ from openapi_schema_validator import OAS30StrictValidator from openapi_schema_validator import OAS30Validator from openapi_schema_validator import OAS30WriteValidator +from openapi_schema_validator import OAS31StrictValidator from openapi_schema_validator import OAS31Validator +from openapi_schema_validator import OAS32StrictValidator from openapi_schema_validator import OAS32Validator from openapi_schema_validator import oas30_format_checker from openapi_schema_validator import oas30_strict_format_checker @@ -285,6 +288,39 @@ def test_string_binary_invalid( with pytest.raises(ValidationError): validator.validate(value) + def test_binary_octet_max_length_rejects_and_accepts( + self, validator_class + ): + # maxLength constrains raw bytes by octet count for OAS 3.0 too. + schema = {"type": "string", "format": "binary", "maxLength": 1} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"abc") + assert validator.validate(b"a") is None + + def test_binary_format_byte_rejects_bytes( + self, validator_class, format_checker + ): + # format: byte is base64-encoded text, so raw bytes are rejected. + schema = {"type": "string", "format": "byte"} + validator = validator_class(schema, format_checker=format_checker) + + with pytest.raises(ValidationError): + validator.validate(b"\x00\x01\x02") + + def test_binary_enum_bytes_stays_active(self, validator_class): + schema = { + "type": "string", + "format": "binary", + "enum": [b"a", b"b"], + } + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"zzz") + assert validator.validate(b"a") is None + @pytest.mark.parametrize( "schema_type", [ @@ -1180,6 +1216,255 @@ def test_discriminator_unresolvable_reference_ignored( {"discipline": "mountain_hiking", "length": 10}, ) + # -- binary bytes (runs for both OAS 3.1 and OAS 3.2 via inheritance) -- + + @pytest.mark.parametrize( + "schema", + [ + {}, + {"contentMediaType": "application/octet-stream"}, + {"contentMediaType": "application/pdf"}, + ], + ) + def test_binary_typeless_accepts_bytes(self, validator_class, schema): + # Canonical 3.1/3.2 raw-binary form: a typeless schema asserts no type. + validator = validator_class(schema) + + assert validator.validate(b"\x00\x01\x02") is None + + def test_binary_type_string_content_media_type_accepts_bytes( + self, validator_class + ): + # Pragmatic compatibility extension. + schema = { + "type": "string", + "contentMediaType": "application/octet-stream", + } + validator = validator_class(schema) + + assert validator.validate(b"raw") is None + + @pytest.mark.parametrize( + "content_encoding", + ["base64", "base64url", "base16", "base32", "quoted-printable"], + ) + def test_binary_type_string_encoded_rejects_bytes( + self, validator_class, content_encoding + ): + # Encoded text stays on the string path; type: string asserts, so the + # type wrapper rejects raw bytes. + schema = { + "type": "string", + "contentMediaType": "application/octet-stream", + "contentEncoding": content_encoding, + } + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"raw") + + def test_binary_type_string_format_byte_rejects_bytes( + self, validator_class + ): + schema = {"type": "string", "format": "byte"} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"\x00\x01\x02") + + def test_binary_typeless_encoded_still_accepts_bytes( + self, validator_class + ): + # A typeless schema asserts nothing, so it accepts bytes even though the + # encoded-text gate classifies it as not-binary (enforceability + # boundary). Rejection would require a type: string assertion. + schema = { + "contentMediaType": "application/octet-stream", + "contentEncoding": "base16", + } + validator = validator_class(schema) + + assert validator.validate(b"raw") is None + + def test_binary_octet_max_length_rejects_and_accepts( + self, validator_class + ): + schema = {"contentMediaType": "application/pdf", "maxLength": 1} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"abc") + assert validator.validate(b"a") is None + + def test_binary_octet_min_length_rejects_and_accepts( + self, validator_class + ): + schema = {"contentMediaType": "application/pdf", "minLength": 2} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"a") + assert validator.validate(b"ab") is None + + def test_string_max_length_still_applies_to_str(self, validator_class): + # The octet-length wrapper delegates to the native string check for + # non-binary instances. + schema = {"type": "string", "maxLength": 2} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate("abc") + assert validator.validate("ab") is None + + def test_string_min_length_still_applies_to_str(self, validator_class): + schema = {"type": "string", "minLength": 2} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate("a") + assert validator.validate("ab") is None + + def test_binary_enum_bytes_stays_active(self, validator_class): + schema = { + "contentMediaType": "application/octet-stream", + "enum": [b"a", b"b"], + } + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"zzz") + assert validator.validate(b"a") is None + + def test_binary_const_bytes_stays_active(self, validator_class): + schema = { + "contentMediaType": "application/octet-stream", + "const": b"a", + } + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"b") + assert validator.validate(b"a") is None + + @pytest.mark.parametrize("ecma", [True, False]) + def test_binary_pattern_does_not_raise_or_reject( + self, validator_class, ecma + ): + # pattern is guarded by is_type(instance, "string") on both the ECMA and + # non-ECMA paths, so it short-circuits bytes -- no TypeError, no error. + schema = { + "type": "string", + "contentMediaType": "application/octet-stream", + "pattern": "^a", + } + validator = validator_class(schema) + + with patch( + "openapi_schema_validator._keywords.has_ecma_regex", + return_value=ecma, + ): + assert validator.validate(b"does-not-match-pattern") is None + + def test_binary_multi_type_accepts_none_and_bytes(self, validator_class): + schema = { + "type": ["string", "null"], + "contentMediaType": "application/octet-stream", + } + validator = validator_class(schema) + + assert validator.validate(None) is None + assert validator.validate(b"raw") is None + + @pytest.mark.parametrize( + "content_media_type", + [ + "application/json", + "application/ld+json", + "image/svg+xml", + "application/problem+json; charset=utf-8", + ], + ) + def test_binary_structured_text_type_string_rejects_bytes( + self, validator_class, content_media_type + ): + schema = {"type": "string", "contentMediaType": content_media_type} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"raw") + + def test_binary_media_type_parameter_opaque_accepts_bytes( + self, validator_class + ): + schema = { + "type": "string", + "contentMediaType": "application/pdf; version=1", + } + validator = validator_class(schema) + + assert validator.validate(b"%PDF") is None + + def test_binary_oneof_selects_binary_branch_for_bytes( + self, validator_class + ): + schema = { + "oneOf": [ + {"type": "integer"}, + { + "type": "string", + "contentMediaType": "application/octet-stream", + }, + ] + } + validator = validator_class(schema) + + assert validator.validate(b"raw") is None + + def test_binary_anyof_selects_binary_branch_for_bytes( + self, validator_class + ): + schema = { + "anyOf": [ + {"type": "integer"}, + {"contentMediaType": "application/octet-stream"}, + ] + } + validator = validator_class(schema) + + assert validator.validate(b"raw") is None + + def test_binary_nested_object_property_accepts_bytes( + self, validator_class + ): + schema = { + "type": "object", + "properties": { + "file": { + "type": "string", + "contentMediaType": "application/octet-stream", + }, + }, + } + validator = validator_class(schema) + + assert validator.validate({"file": b"raw"}) is None + + def test_binary_does_not_mutate_schema_or_instance(self, validator_class): + schema = { + "type": "string", + "contentMediaType": "application/octet-stream", + "maxLength": 8, + } + schema_before = deepcopy(schema) + instance = bytearray(b"raw") + instance_before = bytearray(instance) + validator = validator_class(schema) + + validator.validate(bytes(instance)) + + assert schema == schema_before + assert instance == instance_before + class TestOAS32ValidatorValidate(TestOAS31ValidatorValidate): """OAS 3.2 uses the OAS 3.2 published dialect resources.""" @@ -1199,7 +1484,20 @@ def test_format_checker_is_distinct_from_oas31(self): assert oas32_format_checker is not oas31_format_checker def test_validator_shares_oas31_behavior(self): - assert OAS32Validator.VALIDATORS == OAS31Validator.VALIDATORS + # OAS 3.2 inherits the OAS 3.1 keyword surface. The binary-aware + # wrappers (type/maxLength/minLength/format) are re-bound to the OAS 3.2 + # predicate, so they are distinct callables with identical behavior; + # every other keyword handler is inherited unchanged. + assert ( + OAS32Validator.VALIDATORS.keys() + == OAS31Validator.VALIDATORS.keys() + ) + binary_wrapped = {"type", "maxLength", "minLength", "format"} + assert all( + OAS32Validator.VALIDATORS[name] is handler + for name, handler in OAS31Validator.VALIDATORS.items() + if name not in binary_wrapped + ) def test_validator_has_oas32_dialect_metaschema(self): assert OAS32Validator.META_SCHEMA["$id"] == OAS32_BASE_DIALECT_ID @@ -1281,6 +1579,112 @@ def test_oas32_check_schema_does_not_fetch_remote_metaschemas(self): urlopen.assert_not_called() +class TestOAS31StrictValidatorValidate: + """OAS31StrictValidator preserves JSON Schema string typing. + + It accepts canonical typeless raw binary, but rejects ``bytes`` whenever a + schema asserts ``type: string`` -- even with a non-text ``contentMediaType``. + """ + + @pytest.fixture + def validator_class(self): + return OAS31StrictValidator + + @pytest.fixture + def format_checker(self): + return oas31_format_checker + + @pytest.mark.parametrize( + "schema", + [ + {}, + {"contentMediaType": "application/octet-stream"}, + {"contentMediaType": "application/pdf"}, + ], + ) + def test_typeless_raw_binary_accepts_bytes(self, validator_class, schema): + validator = validator_class(schema) + + assert validator.validate(b"\x00\x01\x02") is None + + def test_type_string_content_media_type_rejects_bytes( + self, validator_class + ): + # The defining strict behavior: a type: string assertion rejects bytes, + # even with a non-text contentMediaType (no pragmatic tolerance). + schema = { + "type": "string", + "contentMediaType": "application/octet-stream", + } + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"raw") + + def test_type_string_format_binary_rejects_bytes(self, validator_class): + schema = {"type": "string", "format": "binary"} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"raw") + + def test_plain_string_rejects_bytes(self, validator_class): + validator = validator_class({"type": "string"}) + + with pytest.raises(ValidationError): + validator.validate(b"raw") + + def test_plain_string_accepts_str(self, validator_class): + validator = validator_class({"type": "string"}) + + assert validator.validate("text") is None + + def test_typeless_octet_length_enforced(self, validator_class): + # Octet length still applies to canonical typeless raw binary. + schema = {"contentMediaType": "application/pdf", "maxLength": 1} + validator = validator_class(schema) + + with pytest.raises(ValidationError): + validator.validate(b"abc") + assert validator.validate(b"a") is None + + def test_multi_type_with_marker_rejects_bytes(self, validator_class): + # type: [string, null] asserts type, so strict rejects bytes but still + # accepts null. + schema = { + "type": ["string", "null"], + "contentMediaType": "application/octet-stream", + } + validator = validator_class(schema) + + assert validator.validate(None) is None + with pytest.raises(ValidationError): + validator.validate(b"raw") + + def test_check_schema_accepts_typeless_binary(self, validator_class): + validator_class.check_schema( + {"contentMediaType": "application/octet-stream"} + ) + + +class TestOAS32StrictValidatorValidate(TestOAS31StrictValidatorValidate): + """OAS 3.2 strict mirrors OAS 3.1 strict on the OAS 3.2 dialect.""" + + @pytest.fixture + def validator_class(self): + return OAS32StrictValidator + + @pytest.fixture + def format_checker(self): + return oas32_format_checker + + def test_strict_validator_is_distinct_from_oas31_strict(self): + assert OAS32StrictValidator is not OAS31StrictValidator + + def test_strict_validator_has_oas32_dialect_metaschema(self): + assert OAS32StrictValidator.META_SCHEMA["$id"] == OAS32_BASE_DIALECT_ID + + class TestOAS30StrictValidator: """ Tests for OAS30StrictValidator which follows OAS spec strictly: @@ -1447,3 +1851,16 @@ def test_openapi_oas32_dialect_registration_is_idempotent(self): validator_class = validator_for({"$schema": OAS32_BASE_DIALECT_ID}) assert validator_class is OAS32Validator + + def test_oas31_dialect_does_not_resolve_to_strict_validator(self): + # Strict validators are explicit opt-ins, not the dialect default. + validator_class = validator_for({"$schema": OAS31_BASE_DIALECT_ID}) + + assert validator_class is OAS31Validator + assert validator_class is not OAS31StrictValidator + + def test_oas32_dialect_does_not_resolve_to_strict_validator(self): + validator_class = validator_for({"$schema": OAS32_BASE_DIALECT_ID}) + + assert validator_class is OAS32Validator + assert validator_class is not OAS32StrictValidator diff --git a/tests/unit/test_binary.py b/tests/unit/test_binary.py new file mode 100644 index 0000000..68058ab --- /dev/null +++ b/tests/unit/test_binary.py @@ -0,0 +1,324 @@ +"""Unit coverage for the per-version opaque-binary schema predicates. + +These predicates are pure functions on the schema mapping, so they are asserted +directly without invoking validation. That is important for the OAS 3.0 array +``type`` case, which would otherwise crash the 3.0 ``type`` keyword. +""" + +import pytest + +from openapi_schema_validator._binary import is_oas30_binary_schema +from openapi_schema_validator._binary import is_oas31_binary_schema +from openapi_schema_validator._binary import is_oas31_strict_binary_schema +from openapi_schema_validator._binary import is_oas32_binary_schema +from openapi_schema_validator._binary import is_oas32_strict_binary_schema + +OCTET = "application/octet-stream" + +ALL_PREDICATES = [ + is_oas30_binary_schema, + is_oas31_binary_schema, + is_oas32_binary_schema, + is_oas31_strict_binary_schema, + is_oas32_strict_binary_schema, +] + + +@pytest.mark.parametrize("predicate", ALL_PREDICATES) +@pytest.mark.parametrize("schema", [True, False, None, "x", 1, ["string"]]) +def test_non_mapping_schema_is_false(predicate, schema): + # Boolean / non-mapping schemas are never classified as binary. + assert predicate(schema) is False + + +class TestIsOAS30BinarySchema: + @pytest.mark.parametrize( + "schema", + [ + {"type": "string", "format": "binary"}, + {"format": "binary"}, + ], + ) + def test_binary_marker_is_true(self, schema): + assert is_oas30_binary_schema(schema) is True + + @pytest.mark.parametrize( + "schema", + [ + {"type": "string"}, + {"type": "string", "format": "byte"}, + {"type": "string", "format": "base64"}, + {"type": "integer", "format": "binary"}, + {"type": "string", "format": "date"}, + {}, + ], + ) + def test_non_binary_is_false(self, schema): + assert is_oas30_binary_schema(schema) is False + + def test_array_type_is_false(self): + # Array-valued type is out of scope for OAS 3.0. The predicate must not + # mask the (separately latent) crashing schema -- assert directly, + # never via validation. + schema = {"type": ["string", "null"], "format": "binary"} + + assert is_oas30_binary_schema(schema) is False + + def test_encoding_co_presence_excludes(self): + # format: binary alongside a real contentEncoding is encoded text -- the + # encoding wins, so this is not classified as binary. + schema = { + "type": "string", + "format": "binary", + "contentEncoding": "base64", + } + + assert is_oas30_binary_schema(schema) is False + + def test_non_mapping_is_false(self): + assert is_oas30_binary_schema(True) is False + + +class TestIsOAS31BinarySchema: + @pytest.mark.parametrize( + "schema", + [ + {}, + {"contentMediaType": OCTET}, + {"type": "string", "contentMediaType": OCTET}, + {"type": ["string", "null"], "contentMediaType": OCTET}, + {"contentMediaType": "application/pdf"}, + {"contentMediaType": "image/png"}, + {"contentMediaType": "audio/mpeg"}, + {"contentMediaType": "video/mp4"}, + # no-op identity encodings do not count as encoded text + {"contentMediaType": OCTET, "contentEncoding": "identity"}, + {"contentMediaType": OCTET, "contentEncoding": "binary"}, + {"contentMediaType": OCTET, "contentEncoding": "7bit"}, + {"contentMediaType": OCTET, "contentEncoding": "8bit"}, + # case-insensitive media-type matching, parameter stripping + {"type": "string", "contentMediaType": "Image/PNG"}, + { + "type": "string", + "contentMediaType": "application/pdf; version=1", + }, + ], + ) + def test_binary_is_true(self, schema): + assert is_oas31_binary_schema(schema) is True + + @pytest.mark.parametrize( + "schema", + [ + # typeless text is NOT binary (tightened phrasing) + {"contentMediaType": "application/json"}, + {"contentMediaType": "application/ld+json"}, + {"contentMediaType": "image/svg+xml"}, + {"contentMediaType": "text/plain"}, + # plain string assertion, no binary marker + {"type": "string"}, + {"type": "string", "contentMediaType": "application/json"}, + { + "type": "string", + "contentMediaType": "application/problem+json; charset=utf-8", + }, + # format: binary is not a 3.1 binary marker + {"type": "string", "format": "binary"}, + # encoded text is never opaque binary + {"type": "string", "format": "byte"}, + {"type": "string", "format": "base64"}, + { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": "base64", + }, + { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": "base64url", + }, + { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": "base16", + }, + { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": "base32", + }, + { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": "quoted-printable", + }, + # typeless encoded schema: excluded by the encoded-text gate + {"contentMediaType": OCTET, "contentEncoding": "base16"}, + # type without "string" is not a candidate + {"type": "integer", "contentMediaType": OCTET}, + ], + ) + def test_non_binary_is_false(self, schema): + assert is_oas31_binary_schema(schema) is False + + def test_non_mapping_is_false(self): + assert is_oas31_binary_schema(True) is False + + +class TestIsOAS31StrictBinarySchema: + @pytest.mark.parametrize( + "schema", + [ + {}, + {"contentMediaType": OCTET}, + {"contentMediaType": "application/pdf"}, + {"contentMediaType": "application/pdf; version=1"}, + ], + ) + def test_typeless_raw_binary_is_true(self, schema): + assert is_oas31_strict_binary_schema(schema) is True + + @pytest.mark.parametrize( + "schema", + [ + # any type assertion stays on the string path under strict mode + {"type": "string", "contentMediaType": OCTET}, + {"type": ["string", "null"], "contentMediaType": OCTET}, + {"type": "string", "format": "binary"}, + {"type": "string"}, + # encoded / textual typeless schemas are not raw binary + {"contentMediaType": OCTET, "contentEncoding": "base16"}, + {"contentMediaType": "application/json"}, + {"contentMediaType": "image/svg+xml"}, + ], + ) + def test_non_raw_binary_is_false(self, schema): + assert is_oas31_strict_binary_schema(schema) is False + + +class TestMediaTypeClassification: + @pytest.mark.parametrize( + "media_type", + [ + "text/plain", + "text/csv", + "application/json", + "application/xml", + "application/x-www-form-urlencoded", + "application/javascript", + "application/ecmascript", + "application/yaml", + "application/x-yaml", + "application/graphql", + "application/x-ndjson", + "application/csv", + "application/ld+json", + "application/problem+json", + "image/svg+xml", + "application/vnd.api+yaml", + # case-insensitive + parameters + "Application/JSON", + "application/json; charset=utf-8", + "application/problem+json; charset=utf-8", + ], + ) + def test_textual_media_types_are_not_binary(self, media_type): + # Under a type: string assertion, textual content stays on the string + # path, so the default predicate is False. + schema = {"type": "string", "contentMediaType": media_type} + + assert is_oas31_binary_schema(schema) is False + + @pytest.mark.parametrize( + "media_type", + [ + "application/octet-stream", + "application/pdf", + "application/zip", + "application/vnd.ms-excel", + "image/png", + "image/jpeg", + "audio/mpeg", + "video/mp4", + # case-insensitive + parameters + "Application/PDF", + "application/pdf; version=1.7", + ], + ) + def test_opaque_media_types_are_binary(self, media_type): + schema = {"type": "string", "contentMediaType": media_type} + + assert is_oas31_binary_schema(schema) is True + + +class TestEncodingExclusion: + @pytest.mark.parametrize( + "encoding", + ["base64", "base64url", "base16", "base32", "quoted-printable"], + ) + def test_real_encoding_excludes_binary(self, encoding): + schema = { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": encoding, + } + + assert is_oas31_binary_schema(schema) is False + + @pytest.mark.parametrize( + "encoding", ["identity", "binary", "7bit", "8bit"] + ) + def test_noop_encoding_keeps_binary(self, encoding): + schema = { + "type": "string", + "contentMediaType": OCTET, + "contentEncoding": encoding, + } + + assert is_oas31_binary_schema(schema) is True + + +class TestCrossVersionDivergence: + def test_format_binary_is_binary_in_oas30_only(self): + schema = {"type": "string", "format": "binary"} + + assert is_oas30_binary_schema(schema) is True + assert is_oas31_binary_schema(schema) is False + assert is_oas32_binary_schema(schema) is False + + def test_typeless_content_media_type_is_binary_in_oas31_plus_only(self): + schema = {"contentMediaType": OCTET} + + assert is_oas30_binary_schema(schema) is False + assert is_oas31_binary_schema(schema) is True + assert is_oas32_binary_schema(schema) is True + + @pytest.mark.parametrize( + "schema", + [ + {}, + {"contentMediaType": OCTET}, + {"type": "string", "contentMediaType": OCTET}, + {"type": ["string", "null"], "contentMediaType": OCTET}, + {"type": "string", "contentMediaType": "application/json"}, + {"type": "string", "format": "binary"}, + {"contentMediaType": OCTET, "contentEncoding": "base16"}, + ], + ) + def test_oas32_default_matches_oas31_default(self, schema): + assert is_oas32_binary_schema(schema) == is_oas31_binary_schema(schema) + + @pytest.mark.parametrize( + "schema", + [ + {}, + {"contentMediaType": OCTET}, + {"type": "string", "contentMediaType": OCTET}, + {"contentMediaType": "application/json"}, + {"contentMediaType": OCTET, "contentEncoding": "base16"}, + ], + ) + def test_oas32_strict_matches_oas31_strict(self, schema): + assert is_oas32_strict_binary_schema( + schema + ) == is_oas31_strict_binary_schema(schema)