{ "cells": [ { "cell_type": "markdown", "id": "4af89d3e", "metadata": {}, "source": [ "# Quick start\n", "\n", "xarrera provides a simple class-based API for defining schemas and validating Xarray objects (and their components). \n", "\n", "All schema objects objects have `.validate()` and `to_json` methods." ] }, { "cell_type": "code", "execution_count": 1, "id": "1d4033b4", "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import xarray as xr\n", "\n", "from xarrera import DataArraySchema" ] }, { "cell_type": "markdown", "id": "338a75ea-852b-45cb-bad9-ffe15f589b29", "metadata": {}, "source": [ "We'll start with a simple example that uses the `DataArraySchema` to validate the following `DataArray`:" ] }, { "cell_type": "code", "execution_count": 2, "id": "d6ef6296", "metadata": {}, "outputs": [], "source": [ "da = xr.DataArray(np.ones((4, 10), dtype='i4'), dims=['x', 't'], name='foo')" ] }, { "cell_type": "markdown", "id": "ef3705b1-8253-4741-ab38-1e034ec3a363", "metadata": {}, "source": [ "We can create a schema for this `DataArray` that includes the datatype, name, and shape. Note that for the shape schema, we've used `None` as a wildcard. " ] }, { "cell_type": "code", "execution_count": 3, "id": "8bd2a195-3a52-42a7-83e2-55d986f0f11f", "metadata": {}, "outputs": [], "source": [ "schema = DataArraySchema(dtype=np.integer, name='foo', shape=(4, None))" ] }, { "cell_type": "markdown", "id": "c5d2ec18-2d24-4e2a-8096-5ba2444517ac", "metadata": {}, "source": [ "With our schema created, we can now validate our `DataArray`:" ] }, { "cell_type": "code", "execution_count": 4, "id": "4ccfdb2c-2b3a-4a73-956c-229d12efff7f", "metadata": {}, "outputs": [], "source": [ "schema.validate(da)" ] }, { "cell_type": "markdown", "id": "47658fee-f0ff-4af8-af2b-1b4e53e6f3df", "metadata": {}, "source": [ "When we validate an object that doesn't conform to our schema, we get a `SchemaError`:" ] }, { "cell_type": "code", "execution_count": 5, "id": "46b6600f-e5c5-47ae-996f-38f20679b2f5", "metadata": {}, "outputs": [ { "ename": "SchemaError", "evalue": "dtype float32 != ", "output_type": "error", "traceback": [ "\u001b[31m---------------------------------------------------------------------------\u001b[39m", "\u001b[31mSchemaError\u001b[39m Traceback (most recent call last)", "\u001b[36mCell\u001b[39m\u001b[36m \u001b[39m\u001b[32mIn[5]\u001b[39m\u001b[32m, line 2\u001b[39m\n\u001b[32m 1\u001b[39m da2 = xr.DataArray(np.ones((\u001b[32m4\u001b[39m, \u001b[32m10\u001b[39m), dtype=\u001b[33m'\u001b[39m\u001b[33mf4\u001b[39m\u001b[33m'\u001b[39m), dims=[\u001b[33m'\u001b[39m\u001b[33mx\u001b[39m\u001b[33m'\u001b[39m, \u001b[33m'\u001b[39m\u001b[33mt\u001b[39m\u001b[33m'\u001b[39m], name=\u001b[33m'\u001b[39m\u001b[33mfoo\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m----> \u001b[39m\u001b[32m2\u001b[39m \u001b[43mschema\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mda2\u001b[49m\u001b[43m)\u001b[49m\n", "\u001b[36mFile \u001b[39m\u001b[32m~/Repos/xarrera/xarrera/dataarray.py:199\u001b[39m, in \u001b[36mDataArraySchema.validate\u001b[39m\u001b[34m(self, da)\u001b[39m\n\u001b[32m 196\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mValueError\u001b[39;00m(\u001b[33m'\u001b[39m\u001b[33mInput must be a xarray.DataArray\u001b[39m\u001b[33m'\u001b[39m)\n\u001b[32m 198\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.dtype \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m--> \u001b[39m\u001b[32m199\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[43m.\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m.\u001b[49m\u001b[43mvalidate\u001b[49m\u001b[43m(\u001b[49m\u001b[43mda\u001b[49m\u001b[43m.\u001b[49m\u001b[43mdtype\u001b[49m\u001b[43m)\u001b[49m\n\u001b[32m 201\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m.name \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[32m 202\u001b[39m \u001b[38;5;28mself\u001b[39m.name.validate(da.name)\n", "\u001b[36mFile \u001b[39m\u001b[32m~/Repos/xarrera/xarrera/components.py:74\u001b[39m, in \u001b[36mDTypeSchema.validate\u001b[39m\u001b[34m(self, dtype)\u001b[39m\n\u001b[32m 66\u001b[39m \u001b[38;5;250m\u001b[39m\u001b[33;03m'''Validate dtype\u001b[39;00m\n\u001b[32m 67\u001b[39m \n\u001b[32m 68\u001b[39m \u001b[33;03mParameters\u001b[39;00m\n\u001b[32m (...)\u001b[39m\u001b[32m 71\u001b[39m \u001b[33;03m Dtype of the DataArray.\u001b[39;00m\n\u001b[32m 72\u001b[39m \u001b[33;03m'''\u001b[39;00m\n\u001b[32m 73\u001b[39m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m np.issubdtype(dtype, \u001b[38;5;28mself\u001b[39m.dtype):\n\u001b[32m---> \u001b[39m\u001b[32m74\u001b[39m \u001b[38;5;28;01mraise\u001b[39;00m SchemaError(\u001b[33mf\u001b[39m\u001b[33m'\u001b[39m\u001b[33mdtype \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mdtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m != \u001b[39m\u001b[38;5;132;01m{\u001b[39;00m\u001b[38;5;28mself\u001b[39m.dtype\u001b[38;5;132;01m}\u001b[39;00m\u001b[33m'\u001b[39m)\n", "\u001b[31mSchemaError\u001b[39m: dtype float32 != " ] } ], "source": [ "da2 = xr.DataArray(np.ones((4, 10), dtype='f4'), dims=['x', 't'], name='foo')\n", "schema.validate(da2)" ] }, { "cell_type": "markdown", "id": "73f6805f-6437-48d8-93ca-4853f561f77f", "metadata": {}, "source": [ "Schemas can also be exported to JSON:" ] }, { "cell_type": "code", "execution_count": 6, "id": "64ded3cf-ac73-4082-be7a-b64861b81ecb", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'dtype': 'integer', 'shape': [4, None], 'name': 'foo'}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "schema.json" ] }, { "cell_type": "markdown", "id": "b288ef4d-cd9e-4412-b5cc-f877710a80f4", "metadata": {}, "source": [ "## Components\n", "\n", "Each component of the Xarray data model is implemented as a stand alone class:" ] }, { "cell_type": "code", "execution_count": 7, "id": "afb51650-3966-439f-9569-d5f2978513d8", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{\"x\": null, \"y\": -1}\n" ] } ], "source": [ "from xarrera.components import (\n", " DTypeSchema,\n", " DimsSchema,\n", " ShapeSchema,\n", " NameSchema,\n", " ChunksSchema,\n", " ArrayTypeSchema,\n", " AttrSchema,\n", " AttrsSchema\n", ")\n", "\n", "# example constructions\n", "dtype_schema = DTypeSchema('i4')\n", "dims_schema = DimsSchema(('x', 'y', None)) # None is used as a wildcard\n", "shape_schema = ShapeSchema((5, 10, None)) # None is used as a wildcard\n", "name_schema = NameSchema('foo')\n", "chunk_schema = ChunksSchema({'x': None, 'y': -1}) # None is used as a wildcard, -1 is used as\n", "ArrayTypeSchema = ArrayTypeSchema(np.ndarray)\n", "\n", "# Example usage\n", "dtype_schema.validate(da.dtype)\n", "\n", "# Each object schema can be exported to JSON format\n", "chunk_schema = chunk_schema.to_json()\n", "print(chunk_schema)" ] }, { "cell_type": "code", "execution_count": null, "id": "b812ce5e-ed2c-4da3-b597-d49b438b975a", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": ".venv (3.12.3)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 5 }