diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 82cf5507..b8e2b1b0 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,15 +12,15 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.10", "3.12"] + python-version: ["3.12", "3.x"] runs-on: ${{ matrix.platform }} steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Prepare python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} @@ -31,7 +31,7 @@ jobs: shell: bash - name: Cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ${{ steps.pip_cache_dir.outputs.dir }} key: ${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/.ci_pip_reqs.txt') }}-${{ hashFiles('**/setup.py') }} @@ -51,8 +51,8 @@ jobs: run: tox -e unit - name: Coverage - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v7 with: token: ${{ secrets.CODECOV_TOKEN }} - file: ./coverage.xml - if: matrix.platform == 'ubuntu-latest' + files: ./coverage.xml + if: matrix.platform == 'ubuntu-latest' && matrix.python-version == '3.x' diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2b00c08f..9e76dcda 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -9,17 +9,17 @@ jobs: runs-on: ubuntu-latest environment: name: pypi - url: https://pypi.org/p/sbmlmath + url: https://pypi.org/p/petab permissions: id-token: write steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v6 - name: Set up python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: - python-version: 3.11 + python-version: 3.x - name: Install dependencies / build sdist run: | diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d32e5b68..4ba9c6fb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: check-added-large-files - id: check-merge-conflict @@ -12,7 +12,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.11 + rev: v0.9.10 hooks: # Run the linter. - id: ruff diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 90c2fc8f..51f9841e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,9 +8,9 @@ sphinx: fail_on_warning: false build: - os: "ubuntu-22.04" + os: "ubuntu-24.04" tools: - python: "3.10" + python: "3.12" python: install: diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d12d40d..fe7be354 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,158 @@ # PEtab changelog + +## 0.8 series + +### 0.8.2 + +**Fixes** +* We now support pandas>=3.0 + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/471) +* Updated to the latest PEtab v2 schema + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/470) +* Fixed some warnings + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/475) + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.8.1...v0.8.2 + +### 0.8.1 + +The only change in this release is requiring pandas<3 until we support +the new pandas 3 API. + +### 0.8.0 + +**Fixes** +* Handle `KeyError` in CheckInitialChangeSymbols + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/459) +* Fix return type in `petab.v2.calculate` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/464) +* Implement sampling for v2 prior distributions + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/461) + +**Features** +* Add `v2.Problem.__repr__` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/458) +* Add `LogUniform` distribution + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/465) +* Add `v2.Problem.has_{map,ml}_objective` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/463) + +**Other** +* Require Python>=3.11 per nep-0029 + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/462) +* v2: Remove `log10-normal` distribution per updated spec + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/456) +* Remove `petab.v2`-is-experimental warnings + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/457) + It still is, but we no longer spam users with warnings. + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.7.0...v0.8.0 + + +## 0.7 series + +### 0.7.0 + +**Fixes** + +* Misc minor `petab.v2` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/445 + and https://github.com/PEtab-dev/libpetab-python/pull/450) +* Fixed serialization of `priorParameters` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/449) + +**Features** + +* Added `PySBModel.to_str` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/451) +* Added `id` field to `v2.ProblemConfig` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/442) +* Updated `ExperimentsToEventsConverter` to changed initialization semantics + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/443) +* Added `v2.Problem.{get_output_parameters,get_x_nominal_dict}` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/447) +* Extended PEtab v2 mapping table validation + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/452) + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.6.0...v0.7.0 + +## 0.6 series + +### 0.6.0 + +**Fixes** + +* Implement proper truncation for prior distributions + (parameter bounds now truncate the prior distribution instead of putting + extra probability mass on the bounds) + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/335) +* Fixed `get_required_parameters_for_parameter_table` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/340) +* Fixed `Prior.from_par_dict` for missing `priorParameters` columns + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/341) +* Fixed petablint v2 warning + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/342) +* Fixed linter failing on missing `noiseFormula` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/367) +* Fixed v2 import Deprecation warning + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/346) +* petab.calculate: compare all common columns + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/347) +* Fixed version checks + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/350) +* Create output directories in `write_*_df` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/352) +* Handle `observableTransformation` in `petab.v1.simulate.sample_noise` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/383) +* Fixed residual calculation in `v1.calculate` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/395) +* Allow empty string "" in columns to be overridden with default values in priors + (by @PaulJonasJost in https://github.com/PEtab-dev/libpetab-python/pull/384) +* Fixed `goodness_of_fit` plot and add color parameter + (by @plakrisenko in https://github.com/PEtab-dev/libpetab-python/pull/402 + & https://github.com/PEtab-dev/libpetab-python/pull/437, + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/440) +* Plot without vis spec without `ids_per_plot` + (by @PaulJonasJost in https://github.com/PEtab-dev/libpetab-python/pull/386) + +**Deprecations** + +* Using any PEtab-v2-related functionality currently in `petab.v1` is + deprecated and will be removed in a subsequent release. + Use `petab.v2` instead. + + This affects, for example, PEtab-v2-specific constants `petab.v1.C` + (use `petab.v2.C` instead), `petab.v1.mapping`, + or anything mapping-table-related in `petab.v1.Problem`. + +**Features** + +* Substantially extended and updated `petab.v2` for working with PEtab v2 problems. + + PEtab v2 is still in [draft](https://petab.readthedocs.io/en/latest/v2/documentation_data_format.html) stage -- feedback is welcome! + + * PEtab v2 support for `petablint` + * The library uses pydantic-based objects for most PEtab entities instead of + plain DataFrames + * Functionality for converting PEtab v1 problems to (the current state of) PEtab v2 + + At least until PEtab v2 is finalized, the `petab.v2` API may change rapidly, and should not be considered stable. + +* SbmlModel enhancements + (by @fbergmann in https://github.com/PEtab-dev/libpetab-python/pull/333) +* Added `SbmlModel.from_antimony` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/331) +* Added `SbmlModel.{to_antimony,to_sbml_str}` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/371) +* Enable passing the base path to `Problem.from_yaml` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/327) +* Functions for adding conditions/observables/parameter to `Problem` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/328) +* Added `evaluate: bool` argument to math parser (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/365) +* Added petab-compatible sympy string-printer (`PetabStrPrinter`/`petab_math_str`) (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/364) +* Prettified linter output (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/401) +* Store problem configuration in `Problem` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/326) +* Store path info in *Table objects (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/416) + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.5.0...v0.6.0 + ## 0.5 series ### 0.5.0 diff --git a/README.md b/README.md index 7873928b..7888714e 100644 --- a/README.md +++ b/README.md @@ -17,19 +17,19 @@ Documentation of the PEtab format in general is available at ## Installation -The PEtab library is available on [pypi](https://pypi.org/project/petab/) +The PEtab library is available on [PyPI](https://pypi.org/project/petab/) and the easiest way to install it is running pip3 install petab -It will require Python>=3.10 to run. (We are following the -[numpy Python support policy](https://numpy.org/neps/nep-0029-deprecation_policy.html)). +`petab` requires Python>=3.11. We are following +[NumPy's Python support policy](https://numpy.org/neps/nep-0029-deprecation_policy.html). Development versions of the PEtab library can be installed using - pip3 install https://github.com/PEtab-dev/libpetab-python/archive/develop.zip + pip3 install https://github.com/PEtab-dev/libpetab-python/archive/main.zip -(replace `develop` by the branch or commit you would like to install). +(replace `main` by the branch or commit you would like to install). When setting up a new parameter estimation problem, the most useful tools will be: diff --git a/doc/conf.py b/doc/conf.py index 4dbd3009..975cad03 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -47,10 +47,12 @@ ] intersphinx_mapping = { + "petab": ("https://petab.readthedocs.io/en/latest/", None), "pandas": ("https://pandas.pydata.org/docs/", None), "numpy": ("https://numpy.org/devdocs/", None), "sympy": ("https://docs.sympy.org/latest/", None), "python": ("https://docs.python.org/3", None), + "pydantic": ("https://docs.pydantic.dev/latest/", None), } # Add any paths that contain templates here, relative to this directory. @@ -62,6 +64,7 @@ exclude_patterns = [ "build/doctrees", "build/html", + "build/jupyter_execute", "**.ipynb_checkpoints", "logo/LICENSE.md", ] @@ -71,10 +74,10 @@ autosummary_generate = True autodoc_default_options = { - "members": None, + "members": True, "imported-members": ["petab"], - "inherited-members": None, - "show-inheritance": None, + "show-inheritance": True, + "undoc-members": True, } # For some reason causes sphinx import errors otherwise @@ -85,6 +88,7 @@ nb_execution_mode = "force" nb_execution_raise_on_error = True nb_execution_show_tb = True +nb_execution_timeout = 90 # max. seconds/cell source_suffix = { ".rst": "restructuredtext", @@ -111,7 +115,7 @@ "display_github": True, "github_user": "petab-dev", "github_repo": "libpetab-python", - "github_version": "develop", + "github_version": "main", "conf_py_path": "/doc", } diff --git a/doc/development.rst b/doc/development.rst index df4edf55..181505a9 100644 --- a/doc/development.rst +++ b/doc/development.rst @@ -24,3 +24,67 @@ Python compatibility -------------------- We follow `numpy's Python support policy `_. + +Release process +--------------- + +1. Update the version number in ``petab/version.py``. + +2. Update the changelog in ``doc/CHANGELOG.md``. + The update content can be generated automatically: + draft a new dummy GitHub release with a dummy tag and the ``develop`` + branch, then click :guilabel:`Generate release notes`. + +3. Create a pull request with the to-be-released changes to the main branch + (usually from ``develop``). + +4. Once the pull request is merged, create a new release on GitHub. + Make sure to set the tag to the version number prefixed with 'v' + (e.g., ``v1.0.0``), and the release title to ``libpetab-python $RELEASE_TAG`` + (e.g., ``libpetab-python v1.0.0``). + +5. Check that the release is now available on PyPI. + The upload to PyPI is performed automatically by a GitHub Actions workflow, + which may take a few minutes to complete. + +6. Merge the main branch back into the `develop` branch. + +Style guide +----------- + +Code style +~~~~~~~~~~ + +We use pre-commit with ruff to enforce code style. To install pre-commit and +the pre-commit hooks, run: + +.. code-block:: bash + + pip install pre-commit + pre-commit install + +To run the pre-commit checks manually on all, not just the modified files, run: + +.. code-block:: bash + + pre-commit run --all-files + +Documentation style +~~~~~~~~~~~~~~~~~~~ + +We use `Sphinx `_ to generate the documentation. +The documentation is written in `reStructuredText `_. + +We use the `sphinx docstring-style `__ for new code. +The ``:param [ParamName]:`` and ``:return:`` statements are important when +applicable. +Manual type annotations (``:type [ParamName]:``) are redundant and should be +avoided. + +To build the documentation, run: + +.. code-block:: bash + + cd doc + make html + # then open `build/html/index.html` in a browser diff --git a/doc/example.rst b/doc/example.rst index 6fe6dab5..dfe54fb3 100644 --- a/doc/example.rst +++ b/doc/example.rst @@ -10,6 +10,7 @@ The following examples should help to get a better idea of how to use the PEtab example/example_petablint.ipynb example/example_visualization.ipynb + example/distributions.ipynb Examples of systems biology parameter estimation problems specified in PEtab can be found in the `systems biology benchmark model collection `_. diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb new file mode 100644 index 00000000..2b3ab24f --- /dev/null +++ b/doc/example/distributions.ipynb @@ -0,0 +1,282 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "372289411a2aa7b3", + "metadata": {}, + "source": [ + "# Prior distributions in PEtab\n", + "\n", + "This notebook gives a brief overview of the prior distributions in PEtab and how they are represented in the PEtab library.\n", + "\n", + "Prior distributions are used to specify the prior knowledge about the parameters.\n", + "Parameter priors are specified in the parameter table. A prior is defined by its type and its parameters.\n", + "Each prior type has a specific set of parameters. For example, the normal distribution has two parameters: the mean and the standard deviation.\n", + "\n", + "There are two types of priors in PEtab - objective priors and initialization priors:\n", + "\n", + "* *Objective priors* are used to specify the prior knowledge about the parameters that are to be estimated. They will enter the objective function of the optimization problem. They are specified in the `objectivePriorType` and `objectivePriorParameters` columns of the parameter table.\n", + "* *Initialization priors* can be used as a hint for the optimization algorithm. They will not enter the objective function. They are specified in the `initializationPriorType` and `initializationPriorParameters` columns of the parameter table.\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "id": "initial_id", + "metadata": { + "collapsed": true + }, + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "from petab.v1.C import *\n", + "from petab.v1.parameters import unscale\n", + "from petab.v1.priors import Prior\n", + "\n", + "sns.set_style(None)\n", + "\n", + "\n", + "def plot(prior: Prior):\n", + " \"\"\"Visualize a distribution.\"\"\"\n", + " fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))\n", + " sample = prior.sample(20_000, x_scaled=True)\n", + "\n", + " fig.suptitle(str(prior))\n", + "\n", + " plot_single(prior, ax=ax1, sample=sample, scaled=False)\n", + " plot_single(prior, ax=ax2, sample=sample, scaled=True)\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "\n", + "def plot_single(\n", + " prior: Prior, scaled: bool = False, ax=None, sample: np.array = None\n", + "):\n", + " fig = None\n", + " if ax is None:\n", + " fig, ax = plt.subplots()\n", + "\n", + " if sample is None:\n", + " sample = prior.sample(20_000)\n", + "\n", + " # assuming scaled sample\n", + " if not scaled:\n", + " sample = unscale(sample, prior.transformation)\n", + " bounds = prior.bounds\n", + " else:\n", + " bounds = (\n", + " (prior.lb_scaled, prior.ub_scaled)\n", + " if prior.bounds is not None\n", + " else None\n", + " )\n", + "\n", + " # plot pdf\n", + " xmin = min(\n", + " sample.min(), bounds[0] if prior.bounds is not None else sample.min()\n", + " )\n", + " xmax = max(\n", + " sample.max(), bounds[1] if prior.bounds is not None else sample.max()\n", + " )\n", + " padding = 0.1 * (xmax - xmin)\n", + " xmin -= padding\n", + " xmax += padding\n", + " x = np.linspace(xmin, xmax, 500)\n", + " y = prior.pdf(x, x_scaled=scaled, rescale=scaled)\n", + " ax.plot(x, y, color=\"red\", label=\"pdf\")\n", + "\n", + " sns.histplot(sample, stat=\"density\", ax=ax, label=\"sample\")\n", + "\n", + " # plot bounds\n", + " if prior.bounds is not None:\n", + " for bound in bounds:\n", + " if bound is not None and np.isfinite(bound):\n", + " ax.axvline(bound, color=\"black\", linestyle=\"--\", label=\"bound\")\n", + "\n", + " if fig is not None:\n", + " ax.set_title(str(prior))\n", + "\n", + " if scaled:\n", + " ax.set_xlabel(\n", + " f\"Parameter value on parameter scale ({prior.transformation})\"\n", + " )\n", + " ax.set_ylabel(\"Rescaled density\")\n", + " else:\n", + " ax.set_xlabel(\"Parameter value\")\n", + "\n", + " ax.grid(False)\n", + " handles, labels = ax.get_legend_handles_labels()\n", + " unique_labels = dict(zip(labels, handles, strict=False))\n", + " ax.legend(unique_labels.values(), unique_labels.keys())\n", + "\n", + " if ax is None:\n", + " plt.show()" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "db36a4a93622ccb8", + "metadata": {}, + "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n" + }, + { + "cell_type": "code", + "id": "4f09e50a3db06d9f", + "metadata": {}, + "source": [ + "plot_single(Prior(UNIFORM, (0, 1)))\n", + "plot_single(Prior(NORMAL, (0, 1)))\n", + "plot_single(Prior(LAPLACE, (0, 1)))\n", + "plot_single(Prior(LOG_NORMAL, (0, 1)))\n", + "plot_single(Prior(LOG_LAPLACE, (1, 0.5)))" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "dab4b2d1e0f312d8", + "metadata": {}, + "source": "If a parameter scale is specified (`parameterScale=lin|log|log10`), the distribution parameters are used as is without applying the `parameterScale` to them. The exception are the `parameterScale*`-type distributions, as explained below. In the context of PEtab prior distributions, `parameterScale` will only be used for the start point sampling for optimization, where the sample will be transformed accordingly. This is demonstrated below. The left plot always shows the prior distribution for unscaled parameter values, and the right plot shows the prior distribution for scaled parameter values. Note that in the objective function, the prior is always on the unscaled parameters.\n" + }, + { + "cell_type": "code", + "id": "f6192c226f179ef9", + "metadata": {}, + "source": [ + "plot(Prior(NORMAL, (10, 2), transformation=LIN))\n", + "plot(Prior(NORMAL, (10, 2), transformation=LOG))\n", + "\n", + "# Note that the log-normal distribution is different\n", + "# from a log-transformed normal distribution:\n", + "plot(Prior(LOG_NORMAL, (10, 2), transformation=LIN))" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "4281ed48859e6431", + "metadata": {}, + "source": "On the log-transformed parameter scale, `Log*` and `parameterScale*` distributions are equivalent:" + }, + { + "cell_type": "code", + "id": "34c95268e8921070", + "metadata": {}, + "source": [ + "plot(Prior(LOG_NORMAL, (10, 2), transformation=LOG))\n", + "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 2)))" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "263c9fd31156a4d5", + "metadata": {}, + "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameters are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`." + }, + { + "cell_type": "code", + "id": "5ca940bc24312fc6", + "metadata": {}, + "source": [ + "# different, because transformation!=LIN\n", + "plot(Prior(UNIFORM, (0.01, 2), transformation=LOG10))\n", + "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LOG10))\n", + "\n", + "# same, because transformation=LIN\n", + "plot(Prior(UNIFORM, (0.01, 2), transformation=LIN))\n", + "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))" + ], + "outputs": [], + "execution_count": null + }, + { + "cell_type": "markdown", + "id": "b1a8b17d765db826", + "metadata": {}, + "source": "The given distributions are truncated at the bounds defined in the parameter table:" + }, + { + "cell_type": "code", + "id": "4ac42b1eed759bdd", + "metadata": {}, + "source": [ + "plot(Prior(NORMAL, (0, 1), bounds=(-2, 2)))\n", + "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9)))\n", + "plot(Prior(UNIFORM, (1e-8, 1), bounds=(0.1, 0.9), transformation=LOG10))\n", + "plot(Prior(LAPLACE, (0, 1), bounds=(-0.5, 0.5)))\n", + "plot(\n", + " Prior(\n", + " PARAMETER_SCALE_UNIFORM,\n", + " (-3, 1),\n", + " bounds=(1e-2, 1),\n", + " transformation=LOG10,\n", + " )\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "This results in a constant shift in the probability density, compared to the non-truncated version (https://en.wikipedia.org/wiki/Truncated_distribution), such that the probability density still sums to 1.", + "id": "67de0cace55617a2" + }, + { + "cell_type": "markdown", + "id": "45ffce1341483f24", + "metadata": {}, + "source": "Further distribution examples:" + }, + { + "cell_type": "code", + "id": "581e1ac431860419", + "metadata": {}, + "source": [ + "plot(Prior(NORMAL, (10, 1), bounds=(6, 11), transformation=\"log10\"))\n", + "plot(\n", + " Prior(\n", + " PARAMETER_SCALE_NORMAL,\n", + " (2, 1),\n", + " bounds=(10**0, 10**3),\n", + " transformation=\"log10\",\n", + " )\n", + ")\n", + "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))\n", + "plot(Prior(LOG_LAPLACE, (1, 0.5), bounds=(0.5, 8)))\n", + "plot(Prior(LOG_NORMAL, (2, 1), bounds=(0.5, 8)))" + ], + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/example/example_petablint.ipynb b/doc/example/example_petablint.ipynb index ed20b0d9..6925a433 100644 --- a/doc/example/example_petablint.ipynb +++ b/doc/example/example_petablint.ipynb @@ -16,75 +16,26 @@ }, { "cell_type": "code", - "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: petablint [-h] [-v] [-s SBML_FILE_NAME] [-m MEASUREMENT_FILE_NAME]\r\n", - " [-c CONDITION_FILE_NAME] [-p PARAMETER_FILE_NAME]\r\n", - " [-y YAML_FILE_NAME | -n MODEL_NAME] [-d DIRECTORY]\r\n", - "\r\n", - "Check if a set of files adheres to the PEtab format.\r\n", - "\r\n", - "optional arguments:\r\n", - " -h, --help show this help message and exit\r\n", - " -v, --verbose More verbose output\r\n", - " -s SBML_FILE_NAME, --sbml SBML_FILE_NAME\r\n", - " SBML model filename\r\n", - " -m MEASUREMENT_FILE_NAME, --measurements MEASUREMENT_FILE_NAME\r\n", - " Measurement table\r\n", - " -c CONDITION_FILE_NAME, --conditions CONDITION_FILE_NAME\r\n", - " Conditions table\r\n", - " -p PARAMETER_FILE_NAME, --parameters PARAMETER_FILE_NAME\r\n", - " Parameter table\r\n", - " -y YAML_FILE_NAME, --yaml YAML_FILE_NAME\r\n", - " PEtab YAML problem filename\r\n", - " -n MODEL_NAME, --model-name MODEL_NAME\r\n", - " Model name where all files are in the working\r\n", - " directory and follow PEtab naming convention.\r\n", - " Specifying -[smcp] will override defaults\r\n", - " -d DIRECTORY, --directory DIRECTORY\r\n" - ] - } - ], "source": [ "!petablint -h" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "Let's look at an example: In the example_Fujita folder, we have a PEtab configuration file `Fujita.yaml` telling which files belong to the Fujita model:" - ] + "source": "Let's look at an example: In the `example_Fujita/` directory, we have a PEtab problem configuration file `Fujita.yaml` telling which files belong to the \"Fujita\" problem:" }, { "cell_type": "code", - "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "parameter_file: Fujita_parameters.tsv\r\n", - "petab_version: 0.0.0a17\r\n", - "problems:\r\n", - "- condition_files:\r\n", - " - Fujita_experimentalCondition.tsv\r\n", - " measurement_files:\r\n", - " - Fujita_measurementData.tsv\r\n", - " sbml_files:\r\n", - " - Fujita_model.xml\r\n" - ] - } - ], "source": [ "!cat example_Fujita/Fujita.yaml" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -95,20 +46,10 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0m" - ] - } - ], - "source": [ - "!petablint -y example_Fujita/Fujita.yaml" - ] + "source": "!petablint example_Fujita/Fujita.yaml", + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", diff --git a/doc/modules.rst b/doc/modules.rst index 8d6335c8..1eb0220c 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -14,7 +14,9 @@ API Reference petab.v1.composite_problem petab.v1.conditions petab.v1.core + petab.v1.distributions petab.v1.lint + petab.v1.math petab.v1.measurements petab.v1.models petab.v1.observables @@ -30,5 +32,9 @@ API Reference petab.v1.yaml petab.v2 petab.v2.C + petab.v2.converters + petab.v2.core + petab.v2.experiments petab.v2.lint - petab.v2.problem + petab.v2.models + petab.v2.petab1to2 diff --git a/petab/__init__.py b/petab/__init__.py index 81b58729..23b376c2 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -8,6 +8,7 @@ PEtab should use for operations that can be performed in parallel. By default, all operations are performed sequentially. """ + import importlib import sys from functools import partial @@ -23,7 +24,17 @@ def __getattr__(name): return attr if name == "v1": return importlib.import_module("petab.v1") - if name != "__path__": + if name == "v2": + return importlib.import_module("petab.v2") + if name not in ( + "__path__", + "__all__", + "__wrapped__", + # accessed under pytest + "_pytestfixturefunction", + "__test__", + "__bases__", + ): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", @@ -33,8 +44,8 @@ def __getattr__(name): return getattr(importlib.import_module("petab.v1"), name) -def v1getattr(name, module): - if name != "__path__": +def _v1getattr(name, module): + if name not in ("__path__", "__all__", "__wrapped__"): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", @@ -64,7 +75,7 @@ def v1getattr(name, module): real_module = importlib.import_module( f"petab.v1.{'.'.join(v1_object_parts)}" ) - real_module.__getattr__ = partial(v1getattr, module=real_module) + real_module.__getattr__ = partial(_v1getattr, module=real_module) sys.modules[module_name] = real_module except ModuleNotFoundError: pass diff --git a/petab/_utils.py b/petab/_utils.py new file mode 100644 index 00000000..808cebe7 --- /dev/null +++ b/petab/_utils.py @@ -0,0 +1,35 @@ +"""Private, version-independent utility functions for PEtab.""" + +from pathlib import Path + +from pydantic import AnyUrl, TypeAdapter + +PathOrUrlAdapter = TypeAdapter(AnyUrl | Path) + + +def _generate_path( + file_path: str | Path | AnyUrl, + base_path: Path | str | AnyUrl | None = None, +) -> str: + """ + Generate a local path or URL from a file path and an optional base path. + + :return: A string representing the relative or absolute path or URL. + Absolute if `file_path` or `base_path` is an absolute path or URL, + relative otherwise. + """ + if base_path is None: + return str(file_path) + + file_path = PathOrUrlAdapter.validate_python(file_path) + if isinstance(file_path, AnyUrl): + # if URL, this is absolute + return str(file_path) + + base_path = PathOrUrlAdapter.validate_python(base_path) + if isinstance(base_path, Path): + # if file_path is absolute, base_path will be ignored + return str(base_path / file_path) + + # combine URL parts + return f"{base_path}/{file_path}" diff --git a/petab/petablint.py b/petab/petablint.py index f8228d42..afc481e2 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -6,15 +6,15 @@ import logging import sys +import pydantic from colorama import Fore from colorama import init as init_colorama from jsonschema.exceptions import ValidationError as SchemaValidationError import petab.v1 as petab +from petab.v1 import validate_yaml_semantics, validate_yaml_syntax from petab.v1.C import FORMAT_VERSION -from petab.v2.lint import lint_problem from petab.versions import get_major_version -from petab.yaml import validate logger = logging.getLogger(__name__) @@ -159,31 +159,60 @@ def main(): if args.yaml_file_name: try: - validate(args.yaml_file_name) + validate_yaml_syntax(args.yaml_file_name) except SchemaValidationError as e: + path = "" + if e.absolute_path: + # construct a path to the error location inside the YAML file + path = list(e.absolute_path) + path = ( + f" at {path[0]}" + + "".join(f"[{str(p)}]" for p in path[1:]) + + ": " + ) logger.error( - "Provided YAML file does not adhere to PEtab " f"schema: {e}" + "Provided YAML file does not adhere to the PEtab schema" + f"{path}: {e.args[0]}" ) sys.exit(1) - - if petab.is_composite_problem(args.yaml_file_name): - # TODO: further checking: - # https://github.com/ICB-DCM/PEtab/issues/191 - # problem = petab.CompositeProblem.from_yaml(args.yaml_file_name) - return + except ValueError as e: + logger.error(e) + sys.exit(1) match get_major_version(args.yaml_file_name): case 1: + validate_yaml_semantics(args.yaml_file_name) + + if petab.is_composite_problem(args.yaml_file_name): + # TODO: further checking: + # https://github.com/ICB-DCM/PEtab/issues/191 + # petab.CompositeProblem.from_yaml(args.yaml_file_name) + return + problem = petab.Problem.from_yaml(args.yaml_file_name) ret = petab.lint.lint_problem(problem) sys.exit(ret) case 2: - validation_issues = lint_problem(args.yaml_file_name) - if validation_issues: - validation_issues.log(logger=logger) + from petab.v2.lint import lint_problem + + try: + validation_issues = lint_problem(args.yaml_file_name) + if validation_issues: + # Handle petab.v2.lint.ValidationTask issues + validation_issues.log(logger=logger) + sys.exit(1) + logger.info("PEtab format check completed successfully.") + sys.exit(0) + except pydantic.ValidationError as e: + # Handle Pydantic validation errors + for err in e.errors(): + loc = ", ".join(str(loc) for loc in err["loc"]) + msg = err["msg"] + # TODO: include model info here once available + # https://github.com/pydantic/pydantic/issues/7224 + logger.error(f"Error in field(s) `{loc}`: {msg}") sys.exit(1) - logger.info("PEtab format check completed successfully.") - sys.exit(0) + case _: logger.error( "The provided PEtab files are of unsupported version " @@ -204,9 +233,7 @@ def main(): if args.parameter_file_name: logger.debug(f"\tParameter table: {args.parameter_file_name}") if args.visualization_file_name: - logger.debug( - "\tVisualization table: " f"{args.visualization_file_name}" - ) + logger.debug(f"\tVisualization table: {args.visualization_file_name}") try: problem = petab.Problem.from_files( diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index ddeb428a..5b6f1be7 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -1,7 +1,6 @@ # For syntax see: https://json-schema.org/understanding-json-schema -#$schema: "https://json-schema.org/draft/2019-09/meta/core" -$schema: "http://json-schema.org/draft-06/schema" -description: PEtab parameter estimation problem config file schema +$schema: "https://json-schema.org/draft/2020-12/schema" +description: PEtab 2.0 parameter estimation problem configuration schema. definitions: list_of_files: @@ -9,90 +8,76 @@ definitions: description: List of files. items: type: string - description: File name or URL. + description: | + File name or URL, absolute or relative to the location of the PEtab + problem configuration file. version_number: type: string pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$ - description: Version number (corresponding to PEP 440). + description: Version number. properties: format_version: - anyof: + anyOf: - $ref: "#/definitions/version_number" - type: integer - description: Version of the PEtab format + description: Version of the PEtab format. - parameter_file: - oneOf: - - type: string - - type: array - description: | - File name (absolute or relative) or URL to PEtab parameter table - containing parameters of all models listed in `problems`. A single - table may be split into multiple files and described as an array here. - problems: - type: array + id: + type: string description: | - One or multiple PEtab problems (sets of model, condition, observable - and measurement files). If different model and data files are - independent, they can be specified as separate PEtab problems, which - may allow more efficient handling. Files in one problem cannot refer - to models entities or data specified inside another problem. - items: + Identifier of the PEtab problem. - type: object - description: | - A set of PEtab model, condition, observable and measurement - files and optional visualization files. - properties: - - model_files: - type: object - description: One or multiple models - - # the model ID - patternProperties: - "^[a-zA-Z_]\\w*$": - type: object - properties: - location: - type: string - description: Model file name or URL - language: - type: string - description: | - Model language, e.g., 'sbml', 'cellml', 'bngl', 'pysb' - required: - - location - - language - additionalProperties: false - - measurement_files: - description: List of PEtab measurement files. - $ref: "#/definitions/list_of_files" - - condition_files: - description: List of PEtab condition files. - $ref: "#/definitions/list_of_files" - - observable_files: - description: List of PEtab observable files. - $ref: "#/definitions/list_of_files" - - visualization_files: - description: List of PEtab visualization files. - $ref: "#/definitions/list_of_files" - - mapping_file: + This is optional and has no effect on the PEtab problem itself. + pattern: "^[a-zA-Z_]\\w*$" + + parameter_files: + description: List of PEtab parameter files. + $ref: "#/definitions/list_of_files" + + model_files: + type: object + description: One or multiple models. + + # the model ID + patternProperties: + "^[a-zA-Z_]\\w*$": + type: object + properties: + location: + type: string + description: | + Model file name or URL, absolute or relative to the location of + the PEtab problem configuration file. + language: type: string - description: Optional PEtab mapping file name or URL. + description: | + Model language, e.g., 'sbml', 'cellml', 'bngl', 'pysb' + required: + - location + - language + additionalProperties: false + + measurement_files: + description: List of PEtab measurement files. + $ref: "#/definitions/list_of_files" - required: - - model_files - - observable_files - - measurement_files - - condition_files + condition_files: + description: List of PEtab condition files. + $ref: "#/definitions/list_of_files" + + experiment_files: + description: List of PEtab experiment files. + $ref: "#/definitions/list_of_files" + + observable_files: + description: List of PEtab observable files. + $ref: "#/definitions/list_of_files" + + mapping_files: + description: List of PEtab mapping files. + $ref: "#/definitions/list_of_files" extensions: type: object @@ -107,14 +92,23 @@ properties: properties: version: $ref: "#/definitions/version_number" - + required: + type: boolean + description: | + Indicates whether the extension is required for the + mathematical interpretation of the problem. required: - version + - required additionalProperties: true additionalProperties: false required: - format_version - - parameter_file - - problems + - parameter_files + - model_files + - observable_files + - measurement_files + +additionalProperties: false diff --git a/petab/v1/C.py b/petab/v1/C.py index a013a0cc..09e94c20 100644 --- a/petab/v1/C.py +++ b/petab/v1/C.py @@ -2,6 +2,7 @@ """ This file contains constant definitions. """ + import math as _math import sys @@ -173,7 +174,8 @@ LOG10 = "log10" #: Supported observable transformations OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] - +#: Supported parameter transformations +PARAMETER_SCALES = [LIN, LOG, LOG10] # NOISE MODELS @@ -206,6 +208,13 @@ PARAMETER_SCALE_LAPLACE, ] +#: parameterScale*-type prior distributions +PARAMETER_SCALE_PRIOR_TYPES = [ + PARAMETER_SCALE_UNIFORM, + PARAMETER_SCALE_NORMAL, + PARAMETER_SCALE_LAPLACE, +] + #: Supported noise distributions NOISE_MODELS = [NORMAL, LAPLACE] diff --git a/petab/v1/__init__.py b/petab/v1/__init__.py index a8609621..cd21b88a 100644 --- a/petab/v1/__init__.py +++ b/petab/v1/__init__.py @@ -4,6 +4,7 @@ """ from ..version import __version__ # noqa: F401, E402 +from . import models # noqa: F401, E402 from .C import * # noqa: F403, F401, E402 from .calculate import * # noqa: F403, F401, E402 from .composite_problem import * # noqa: F403, F401, E402 @@ -13,6 +14,7 @@ from .lint import * # noqa: F403, F401, E402 from .mapping import * # noqa: F403, F401, E402 from .measurements import * # noqa: F403, F401, E402 +from .models import Model # noqa: F401, E402 from .observables import * # noqa: F403, F401, E402 from .parameter_mapping import * # noqa: F403, F401, E402 from .parameters import * # noqa: F403, F401, E402 diff --git a/petab/v1/calculate.py b/petab/v1/calculate.py index 3cc86f73..131d0d60 100644 --- a/petab/v1/calculate.py +++ b/petab/v1/calculate.py @@ -1,6 +1,7 @@ """Functions performing various calculations.""" import numbers +import operator from functools import reduce import numpy as np @@ -97,16 +98,16 @@ def calculate_residuals_for_table( Calculate residuals for a single measurement table. For the arguments, see `calculate_residuals`. """ + # below, we rely on a unique index + measurement_df = measurement_df.reset_index(drop=True) + # create residual df as copy of measurement df, change column residual_df = measurement_df.copy(deep=True).rename( columns={MEASUREMENT: RESIDUAL} ) residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") # matching columns - compared_cols = set(MEASUREMENT_DF_COLS) - compared_cols -= {MEASUREMENT} - compared_cols &= set(measurement_df.columns) - compared_cols &= set(simulation_df.columns) + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) # compute noise formulas for observables noise_formulas = get_symbolic_noise_formulas(observable_df) @@ -120,24 +121,36 @@ def calculate_residuals_for_table( for col in compared_cols ] mask = reduce(lambda x, y: x & y, masks) + if mask.sum() == 0: + raise ValueError( + f"Could not find simulation for measurement {row}." + ) + # if we have multiple matches, check that the rows are all identical + elif ( + mask.sum() > 1 + and simulation_df.loc[mask].drop_duplicates().shape[0] > 1 + ): + raise ValueError( + f"Multiple different simulations found for measurement " + f"{row}:\n{simulation_df.loc[mask]}" + ) + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] if scale: # apply scaling observable = observable_df.loc[row[OBSERVABLE_ID]] trafo = observable.get(OBSERVABLE_TRANSFORMATION, LIN) - simulation = petab.scale(simulation, trafo) - measurement = petab.scale(measurement, trafo) + scaled_simulation = petab.scale(simulation, trafo) + scaled_measurement = petab.scale(measurement, trafo) # non-normalized residual is just the difference - residual = simulation - measurement + residual = scaled_measurement - scaled_simulation - noise_value = 1 if normalize: - # look up noise standard deviation - noise_value = evaluate_noise_formula( + # divide by standard deviation + residual /= evaluate_noise_formula( row, noise_formulas, parameter_df, simulation ) - residual /= noise_value # fill in value residual_df.loc[irow, RESIDUAL] = residual @@ -155,13 +168,10 @@ def get_symbolic_noise_formulas(observable_df) -> dict[str, sp.Expr]: """ noise_formulas = {} # iterate over observables - for row in observable_df.itertuples(): - observable_id = row.Index - if NOISE_FORMULA not in observable_df.columns: - noise_formula = None - else: - noise_formula = sympify_petab(row.noiseFormula) - noise_formulas[observable_id] = noise_formula + for observable_id, row in observable_df.iterrows(): + noise_formulas[observable_id] = ( + sympify_petab(row.noiseFormula) if NOISE_FORMULA in row else None + ) return noise_formulas @@ -336,10 +346,7 @@ def calculate_llh_for_table( llhs = [] # matching columns - compared_cols = set(MEASUREMENT_DF_COLS) - compared_cols -= {MEASUREMENT} - compared_cols &= set(measurement_df.columns) - compared_cols &= set(simulation_df.columns) + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) # compute noise formulas for observables noise_formulas = get_symbolic_noise_formulas(observable_df) @@ -353,7 +360,7 @@ def calculate_llh_for_table( (simulation_df[col] == row[col]) | petab.is_empty(row[col]) for col in compared_cols ] - mask = reduce(lambda x, y: x & y, masks) + mask = reduce(operator.and_, masks) simulation = simulation_df.loc[mask][SIMULATION].iloc[0] @@ -364,7 +371,7 @@ def calculate_llh_for_table( # get noise standard deviation noise_value = evaluate_noise_formula( - row, noise_formulas, parameter_df, petab.scale(simulation, scale) + row, noise_formulas, parameter_df, simulation ) # get noise distribution diff --git a/petab/v1/composite_problem.py b/petab/v1/composite_problem.py index 5f07d523..f887ec03 100644 --- a/petab/v1/composite_problem.py +++ b/petab/v1/composite_problem.py @@ -1,4 +1,5 @@ """PEtab problems consisting of multiple models""" + import os import pandas as pd diff --git a/petab/v1/conditions.py b/petab/v1/conditions.py index 4e691d62..8caf04dc 100644 --- a/petab/v1/conditions.py +++ b/petab/v1/conditions.py @@ -60,9 +60,11 @@ def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab condition table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_condition_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) @@ -109,7 +111,7 @@ def get_parametric_overrides(condition_df: pd.DataFrame) -> list[str]: result = [] for column in constant_parameters: - if np.issubdtype(condition_df[column].dtype, np.number): + if not pd.api.types.is_string_dtype(condition_df[column].dtype): continue floatified = condition_df.loc[:, column].apply(core.to_float_if_float) diff --git a/petab/v1/core.py b/petab/v1/core.py index 10274b8c..6a142781 100644 --- a/petab/v1/core.py +++ b/petab/v1/core.py @@ -1,4 +1,5 @@ """PEtab core functions (or functions that don't fit anywhere else)""" + import logging import os import re @@ -60,8 +61,10 @@ def write_simulation_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab simulation table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=False) @@ -130,7 +133,9 @@ def get_notnull_columns(df: pd.DataFrame, candidates: Iterable): ] -def get_observable_replacement_id(groupvars, groupvar) -> str: +def get_observable_replacement_id( + groupvars: list[str], groupvar: Sequence +) -> str: """Get the replacement ID for an observable. Arguments: @@ -138,7 +143,8 @@ def get_observable_replacement_id(groupvars, groupvar) -> str: The columns of a PEtab measurement table that should be unique between observables in a flattened PEtab problem. groupvar: - A specific grouping of `groupvars`. + A specific grouping of `groupvars`. Same length and order as + `groupvars`. Returns: The observable replacement ID. diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py new file mode 100644 index 00000000..aed5fe2f --- /dev/null +++ b/petab/v1/distributions.py @@ -0,0 +1,808 @@ +"""Probability distributions used by PEtab. + +This module provides a set of univariate probability distributions +that can be used for sampling and evaluating the probability density +function (PDF) and cumulative distribution function (CDF). +Most of these distributions also support log transformations and truncation. + +Not all distributions that can be represented by these classes are valid +as PEtab parameter prior or noise distributions. +""" + +from __future__ import annotations + +import abc +from typing import Any + +import numpy as np + +_SCIPY_IMPORT_ERROR = ( + "scipy is required for this functionality. " + "Install it with: pip install scipy" +) + +__all__ = [ + "Distribution", + "Cauchy", + "ChiSquare", + "Exponential", + "Gamma", + "Laplace", + "Normal", + "Rayleigh", + "Uniform", + "LogUniform", +] + + +class Distribution(abc.ABC): + """A univariate probability distribution. + + This class provides a common interface for sampling from and evaluating + the probability density function of a univariate probability distribution. + + The distribution can be transformed by applying a logarithm to the samples + and the PDF. This is useful, e.g., for log-normal distributions. + + :param log: If ``True``, the distribution is transformed to its + corresponding log distribution (e.g., Normal -> LogNormal). + If a float, the distribution is transformed to its corresponding + log distribution with the given log-base (e.g., Normal -> Log10Normal). + If ``False``, no transformation is applied. + :param trunc: The truncation points (lower, upper) of the distribution + or ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + """ + + def __init__( + self, *, log: bool | float = False, trunc: tuple[float, float] = None + ): + if log is True: + log = np.exp(1) + + if trunc == (-np.inf, np.inf): + trunc = None + + if trunc is not None and trunc[0] >= trunc[1]: + raise ValueError( + "The lower truncation limit must be smaller " + "than the upper truncation limit." + ) + + self._logbase = log + self._trunc = trunc + + self._cd_low = None + self._cd_high = None + self._truncation_normalizer = 1 + + if self._trunc is not None: + try: + # the cumulative density of the transformed distribution at the + # truncation limits + self._cd_low = self._cdf_transformed_untruncated( + self.trunc_low + ) + self._cd_high = self._cdf_transformed_untruncated( + self.trunc_high + ) + # normalization factor for the PDF/CDF of the transformed + # distribution to account for truncation + self._truncation_normalizer = 1 / ( + self._cd_high - self._cd_low + ) + except NotImplementedError: + pass + + @property + def trunc_low(self) -> float: + """The lower truncation limit of the transformed distribution.""" + return self._trunc[0] if self._trunc else -np.inf + + @property + def trunc_high(self) -> float: + """The upper truncation limit of the transformed distribution.""" + return self._trunc[1] if self._trunc else np.inf + + def _exp(self, x: np.ndarray | float) -> np.ndarray | float: + """Exponentiate / undo the log transformation if applicable. + + Exponentiate if a log transformation is applied to the distribution. + Otherwise, return the input. + + :param x: The sample to transform. + :return: The transformed sample + """ + if self._logbase is False: + return x + return self._logbase**x + + def _log(self, x: np.ndarray | float) -> np.ndarray | float: + """Apply the log transformation if enabled. + + Compute the log of `x` with the specified base if a log transformation + is applied to the distribution. Otherwise, return the input. + + :param x: The value to transform. + :return: The transformed value. + """ + if self._logbase is False: + return x + with np.errstate(invalid="ignore", divide="ignore"): + return np.log(x) / np.log(self._logbase) + + def sample(self, shape=None) -> np.ndarray | float: + """Sample from the distribution. + + :param shape: The shape of the sample. + :return: A sample from the distribution. + """ + sample = ( + self._exp(self._sample(shape)) + if self._trunc is None + else self._inverse_transform_sample(shape) + ) + + return sample + + @abc.abstractmethod + def _sample(self, shape=None) -> np.ndarray | float: + """Sample from the underlying distribution. + + :param shape: The shape of the sample. + :return: A sample from the underlying distribution, + before applying, e.g., the log transformation or truncation. + """ + ... + + def pdf(self, x) -> np.ndarray | float: + """Probability density function at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF at ``x``. + NaN, if ``x`` is outside the domain of the PDF. + """ + if self._trunc is None: + return self._pdf_untruncated(x) + + return np.where( + (x >= self.trunc_low) & (x <= self.trunc_high), + self._pdf_untruncated(x) * self._truncation_normalizer, + 0, + ) + + @abc.abstractmethod + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + """Probability density function of the underlying distribution at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF at ``x``. + """ + ... + + def _pdf_untruncated(self, x) -> np.ndarray | float: + """Probability density function of the untruncated distribution at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF of the maybe-log-transformed distribution + at ``x``. + """ + if self.logbase is False: + return self._pdf_untransformed_untruncated(x) + + # handle the log transformation; see also: + # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar + with np.errstate(invalid="ignore", divide="ignore"): + chain_rule_factor = ( + (1 / (x * np.log(self._logbase))) if self._logbase else 1 + ) + + return np.where( + x >= 0, + np.where( + x > 0, + self._pdf_untransformed_untruncated(self._log(x)) + * chain_rule_factor, + 0, + ), + # NaN outside its domain + np.nan, + ) + + @property + def logbase(self) -> bool | float: + """The base of the log transformation. + + If ``False``, no transformation is applied. + """ + return self._logbase + + def cdf(self, x) -> np.ndarray | float: + """Cumulative distribution function at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + if self._trunc is None: + return self._cdf_transformed_untruncated(x) + return ( + self._cdf_transformed_untruncated(x) - self._cd_low + ) * self._truncation_normalizer + + def _cdf_transformed_untruncated(self, x) -> np.ndarray | float: + """Cumulative distribution function of the transformed, but untruncated + distribution at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + if not self.logbase: + return self._cdf_untransformed_untruncated(x) + + with np.errstate(invalid="ignore"): + return np.where( + x < 0, 0, self._cdf_untransformed_untruncated(self._log(x)) + ) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + """Cumulative distribution function of the underlying + (untransformed, untruncated) distribution at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + raise NotImplementedError + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + """Percent point function of the underlying + (untransformed, untruncated) distribution at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + raise NotImplementedError + + def _ppf_transformed_untruncated(self, q) -> np.ndarray | float: + """Percent point function of the transformed, but untruncated + distribution at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + return self._exp(self._ppf_untransformed_untruncated(q)) + + def ppf(self, q) -> np.ndarray | float: + """Percent point function at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + if self._trunc is None: + return self._ppf_transformed_untruncated(q) + + # Adjust quantiles to account for truncation + adjusted_q = self._cd_low + q * (self._cd_high - self._cd_low) + return self._ppf_transformed_untruncated(adjusted_q) + + def _inverse_transform_sample(self, shape) -> np.ndarray | float: + """Generate an inverse transform sample from the transformed and + truncated distribution. + + :param shape: The shape of the sample. + :return: The sample. + """ + uniform_sample = np.random.uniform( + low=self._cd_low, high=self._cd_high, size=shape + ) + return self._ppf_transformed_untruncated(uniform_sample) + + def _repr(self, pars: dict[str, Any] = None) -> str: + """Return a string representation of the distribution.""" + pars = ", ".join(f"{k}={v}" for k, v in pars.items()) if pars else "" + + if self._logbase is False: + log = "" + elif self._logbase == np.exp(1): + log = ", log=True" + else: + log = f", log={self._logbase}" + + trunc = f", trunc={self._trunc}" if self._trunc else "" + + return f"{self.__class__.__name__}({pars}{log}{trunc})" + + +class Normal(Distribution): + """A (log-)normal distribution. + + :param loc: The location parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + :param log: If ``True``, the distribution is transformed to a log-normal + distribution. If a float, the distribution is transformed to a + log-normal distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the location and scale parameters + are the location and scale of the underlying normal distribution. + """ + + def __init__( + self, + loc: float, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + try: + from scipy.stats import norm + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = norm + self._loc = loc + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"loc": self._loc, "scale": self._scale}) + + def _sample(self, shape=None) -> np.ndarray | float: + return np.random.normal(loc=self._loc, scale=self._scale, size=shape) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, loc=self._loc, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, loc=self._loc, scale=self._scale) + + @property + def loc(self) -> float: + """The location parameter of the underlying distribution.""" + return self._loc + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Uniform(Distribution): + """A (log-)uniform distribution. + + :param low: The lower bound of the distribution. + :param high: The upper bound of the distribution. + :param log: If ``True``, the distribution is transformed to a log-uniform + distribution. If a float, the distribution is transformed to a + log-uniform distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the lower and upper bounds are the + lower and upper bounds of the underlying uniform distribution. + Note that this differs from the usual definition of a log-uniform + distribution, where the logarithm of the variable is uniformly + distributed between the logarithms of the bounds (see also + :class:`LogUniform`). + """ + + def __init__( + self, + low: float, + high: float, + *, + log: bool | float = False, + ): + try: + from scipy.stats import uniform + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = uniform + self._low = low + self._high = high + super().__init__(log=log) + + def __repr__(self): + return self._repr({"low": self._low, "high": self._high}) + + def _sample(self, shape=None) -> np.ndarray | float: + return np.random.uniform(low=self._low, high=self._high, size=shape) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, loc=self._low, scale=self._high - self._low) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, loc=self._low, scale=self._high - self._low) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, loc=self._low, scale=self._high - self._low) + + +class LogUniform(Distribution): + """A log-uniform or reciprocal distribution. + + A random variable is log-uniformly distributed between ``low`` and ``high`` + if its logarithm is uniformly distributed between ``log(low)`` and + ``log(high)``. + + :param low: The lower bound of the distribution. + :param high: The upper bound of the distribution. + :param trunc: The truncation limits of the distribution. + """ + + def __init__( + self, + low: float, + high: float, + trunc: tuple[float, float] | None = None, + ): + try: + from scipy.stats import uniform + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = uniform + self._logbase = np.exp(1) + self._low = self._log(low) + self._high = self._log(high) + super().__init__(log=self._logbase, trunc=trunc) + + def __repr__(self): + return self._repr({"low": self._low, "high": self._high}) + + def _sample(self, shape=None) -> np.ndarray | float: + return np.random.uniform(low=self._low, high=self._high, size=shape) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, loc=self._low, scale=self._high - self._low) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, loc=self._low, scale=self._high - self._low) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, loc=self._low, scale=self._high - self._low) + + +class Laplace(Distribution): + """A (log-)Laplace distribution. + + :param loc: The location parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + :param log: If ``True``, the distribution is transformed to a log-Laplace + distribution. If a float, the distribution is transformed to a + log-Laplace distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the location and scale parameters + are the location and scale of the underlying Laplace distribution. + """ + + def __init__( + self, + loc: float, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + try: + from scipy.stats import laplace + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = laplace + self._loc = loc + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"loc": self._loc, "scale": self._scale}) + + def _sample(self, shape=None) -> np.ndarray | float: + return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, loc=self._loc, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, loc=self._loc, scale=self._scale) + + @property + def loc(self) -> float: + """The location parameter of the underlying distribution.""" + return self._loc + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Cauchy(Distribution): + """Cauchy distribution. + + A (possibly truncated) `Cauchy distribution + `__. + + :param loc: The location parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + :param log: If ``True``, the distribution is transformed to a log-Cauchy + distribution. If a float, the distribution is transformed to a + log-Cauchy distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the location and scale parameters + are the location and scale of the underlying Cauchy distribution. + """ + + def __init__( + self, + loc: float, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + try: + from scipy.stats import cauchy + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = cauchy + self._loc = loc + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"loc": self._loc, "scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, loc=self._loc, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, loc=self._loc, scale=self._scale) + + def _sample(self, shape=None) -> np.ndarray | float: + return self._dist.rvs(loc=self._loc, scale=self._scale, size=shape) + + @property + def loc(self) -> float: + """The location parameter of the underlying distribution.""" + return self._loc + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class ChiSquare(Distribution): + """Chi-squared distribution. + + A (possibly truncated) `Chi-squared distribution + `__. + + :param dof: The degrees of freedom parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + :param log: If ``True``, the distribution is transformed to a + log-Chi-squared distribution. + If a float, the distribution is transformed to a + log-Chi-squared distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the degrees of freedom parameter + is the degrees of freedom of the underlying Chi-squared distribution. + """ + + def __init__( + self, + dof: int | float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + try: + from scipy.stats import chi2 + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = chi2 + + if isinstance(dof, float): + if not dof.is_integer() or dof < 1: + raise ValueError( + f"`dof' must be a positive integer, but was `{dof}'." + ) + dof = int(dof) + + self._dof = dof + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"dof": self._dof}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, df=self._dof) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, df=self._dof) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, df=self._dof) + + def _sample(self, shape=None) -> np.ndarray | float: + return self._dist.rvs(df=self._dof, size=shape) + + @property + def dof(self) -> int: + """The degrees of freedom parameter.""" + return self._dof + + +class Exponential(Distribution): + """Exponential distribution. + + A (possibly truncated) `Exponential distribution + `__. + + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + """ + + def __init__( + self, + scale: float, + trunc: tuple[float, float] | None = None, + ): + try: + from scipy.stats import expon + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = expon + self._scale = scale + super().__init__(log=False, trunc=trunc) + + def __repr__(self): + return self._repr({"scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, scale=self._scale) + + def _sample(self, shape=None) -> np.ndarray | float: + return self._dist.rvs(scale=self._scale, size=shape) + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Gamma(Distribution): + """Gamma distribution. + + A (possibly truncated) `Gamma distribution + `__. + + :param shape: The shape parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + :param log: If ``True``, the distribution is transformed to a + log-Gamma distribution. + If a float, the distribution is transformed to a + log-Gamma distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the shape and scale parameters + are the shape and scale of the underlying Gamma distribution. + """ + + def __init__( + self, + shape: float, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + try: + from scipy.stats import gamma + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = gamma + self._shape = shape + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"shape": self._shape, "scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, a=self._shape, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, a=self._shape, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, a=self._shape, scale=self._scale) + + def _sample(self, shape=None) -> np.ndarray | float: + return self._dist.rvs(a=self._shape, scale=self._scale, size=shape) + + @property + def shape(self) -> float: + """The shape parameter of the underlying distribution.""" + return self._shape + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Rayleigh(Distribution): + """Rayleigh distribution. + + A (possibly truncated) `Rayleigh distribution + `__. + + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + :param log: If ``True``, the distribution is transformed to a + log-Rayleigh distribution. + If a float, the distribution is transformed to a + log-Rayleigh distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the scale parameter + is the scale of the underlying Rayleigh distribution. + """ + + def __init__( + self, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + try: + from scipy.stats import rayleigh + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = rayleigh + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.pdf(x, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return self._dist.cdf(x, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return self._dist.ppf(q, scale=self._scale) + + def _sample(self, shape=None) -> np.ndarray | float: + return self._dist.rvs(scale=self._scale, size=shape) + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale diff --git a/petab/v1/format_version.py b/petab/v1/format_version.py index a8d63484..f303237e 100644 --- a/petab/v1/format_version.py +++ b/petab/v1/format_version.py @@ -1,2 +1,3 @@ """PEtab file format version""" + __format_version__ = 1 diff --git a/petab/v1/lint.py b/petab/v1/lint.py index 6f70520b..434b5030 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -53,6 +53,9 @@ "observable_table_has_nontrivial_noise_formula", ] +#: Regular expression pattern for valid PEtab IDs +_petab_id_pattern = re.compile(r"^[a-zA-Z_]\w*$") + def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """Check if given columns are present in DataFrame @@ -67,7 +70,7 @@ def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """ if missing_cols := set(req_cols) - set(df.columns.values): raise AssertionError( - f"DataFrame {name} requires the columns {missing_cols}." + f"{name.capitalize()} table requires the columns {missing_cols}." ) @@ -126,7 +129,7 @@ def check_condition_df( ) for column_name in req_cols: - if not np.issubdtype(df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(df[column_name].dtype): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) @@ -170,14 +173,14 @@ def check_measurement_df( _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") for column_name in MEASUREMENT_DF_REQUIRED_COLS: - if not np.issubdtype(df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(df[column_name].dtype): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df and not np.issubdtype( - df[column_name].dtype, np.number + if column_name in df and pd.api.types.is_string_dtype( + df[column_name].dtype ): assert_no_leading_trailing_whitespace( df[column_name].values, column_name @@ -240,7 +243,7 @@ def check_parameter_df( check_ids(df.index.values, kind="parameter") for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(df[column_name].dtype): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) @@ -301,14 +304,14 @@ def check_observable_df(observable_df: pd.DataFrame) -> None: check_ids(observable_df.index.values, kind="observable") for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]: - if not np.issubdtype(observable_df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(observable_df[column_name].dtype): assert_no_leading_trailing_whitespace( observable_df[column_name].values, column_name ) for column_name in OBSERVABLE_DF_OPTIONAL_COLS: - if column_name in observable_df and not np.issubdtype( - observable_df[column_name].dtype, np.number + if column_name in observable_df and pd.api.types.is_string_dtype( + observable_df[column_name].dtype ): assert_no_leading_trailing_whitespace( observable_df[column_name].values, column_name @@ -556,7 +559,7 @@ def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: ] in [LOG, LOG10]: raise AssertionError( f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " - f"{ row.name} must be positive." + f"{row.name} must be positive." ) if ( row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10] @@ -586,8 +589,7 @@ def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: for _, row in parameter_df.iterrows(): if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): raise AssertionError( - f"{col} must be one of {PRIOR_TYPES} but is " - f"'{row[col]}'." + f"{col} must be one of {PRIOR_TYPES} but is '{row[col]}'." ) @@ -904,21 +906,6 @@ def lint_problem(problem: "petab.Problem") -> bool: else: logger.warning("Model not available. Skipping.") - if problem.measurement_df is not None: - logger.info("Checking measurement table...") - try: - check_measurement_df(problem.measurement_df, problem.observable_df) - - if problem.condition_df is not None: - assert_measurement_conditions_present_in_condition_table( - problem.measurement_df, problem.condition_df - ) - except AssertionError as e: - logger.error(e) - errors_occurred = True - else: - logger.warning("Measurement table not available. Skipping.") - if problem.condition_df is not None: logger.info("Checking condition table...") try: @@ -945,12 +932,27 @@ def lint_problem(problem: "petab.Problem") -> bool: for obs_id in problem.observable_df.index: if problem.model.has_entity_with_id(obs_id): logger.error( - f"Observable ID {obs_id} shadows model " "entity." + f"Observable ID {obs_id} shadows model entity." ) errors_occurred = True else: logger.warning("Observable table not available. Skipping.") + if problem.measurement_df is not None: + logger.info("Checking measurement table...") + try: + check_measurement_df(problem.measurement_df, problem.observable_df) + + if problem.condition_df is not None: + assert_measurement_conditions_present_in_condition_table( + problem.measurement_df, problem.condition_df + ) + except AssertionError as e: + logger.error(e) + errors_occurred = True + else: + logger.warning("Measurement table not available. Skipping.") + if problem.parameter_df is not None: logger.info("Checking parameter table...") try: @@ -1002,8 +1004,7 @@ def lint_problem(problem: "petab.Problem") -> bool: or problem.observable_df is None ): logger.warning( - "Not all files of the PEtab problem definition could " - "be checked." + "Not all files of the PEtab problem definition could be checked." ) else: logger.info("PEtab format check completed successfully.") @@ -1043,10 +1044,13 @@ def assert_model_parameters_in_condition_or_parameter_table( mapping_df[MODEL_ENTITY_ID], strict=True, ) - # mapping table entities mapping to already allowed parameters - if to_id in allowed_in_condition_cols - # mapping table entities mapping to species - or model.is_state_variable(to_id) + if not pd.isna(to_id) + and ( + # mapping table entities mapping to already allowed parameters + to_id in allowed_in_condition_cols + # mapping table entities mapping to species + or model.is_state_variable(to_id) + ) } allowed_in_parameter_table = ( @@ -1188,7 +1192,7 @@ def is_valid_identifier(x: str) -> bool: if pd.isna(x): return False - return re.match(r"^[a-zA-Z_]\w*$", x) is not None + return _petab_id_pattern.match(x) is not None def check_ids(ids: Iterable[str], kind: str = "") -> None: @@ -1213,7 +1217,7 @@ def check_ids(ids: Iterable[str], kind: str = "") -> None: offset = 2 error_output = "\n".join( [ - f"Line {index+offset}: " + f"Line {index + offset}: " + ("Missing ID" if pd.isna(_id) else _id) for index, _id in invalids ] diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py index 80c71c68..81f77017 100644 --- a/petab/v1/mapping.py +++ b/petab/v1/mapping.py @@ -1,4 +1,6 @@ """Functionality related to the PEtab entity mapping table""" + +# TODO: Move to petab.v2.mapping from pathlib import Path import pandas as pd @@ -43,9 +45,7 @@ def get_mapping_df( for col in MAPPING_DF_REQUIRED_COLS: if col not in mapping_file.columns: - raise KeyError( - f"Mapping table missing mandatory field {PETAB_ENTITY_ID}." - ) + raise KeyError(f"Mapping table missing mandatory field {col}.") lint.assert_no_leading_trailing_whitespace( mapping_file.reset_index()[col].values, col @@ -61,9 +61,11 @@ def write_mapping_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab mapping table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_mapping_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) diff --git a/petab/v1/math/SympyVisitor.py b/petab/v1/math/SympyVisitor.py index 016e872c..949366b3 100644 --- a/petab/v1/math/SympyVisitor.py +++ b/petab/v1/math/SympyVisitor.py @@ -1,4 +1,5 @@ """PEtab-math to sympy conversion.""" + import sympy as sp from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue @@ -38,8 +39,12 @@ } _unary_funcs = { "exp": sp.exp, - "log10": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 10), - "log2": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 2), + "log10": lambda x, evaluate=True: -sp.oo + if x.is_zero is True + else sp.log(x, 10, evaluate=evaluate), + "log2": lambda x, evaluate=True: -sp.oo + if x.is_zero is True + else sp.log(x, 2, evaluate=evaluate), "ln": sp.log, "sqrt": sp.sqrt, "abs": sp.Abs, @@ -74,8 +79,14 @@ class MathVisitorSympy(PetabMathExprParserVisitor): For a general introduction to ANTLR4 visitors, see: https://github.com/antlr/antlr4/blob/7d4cea92bc3f7d709f09c3f1ac77c5bbc71a6749/doc/python-target.md + + :param evaluate: Whether to evaluate the expression. """ + def __init__(self, evaluate=True): + super().__init__() + self.evaluate = evaluate + def visitPetabExpression( self, ctx: PetabMathExprParser.PetabExpressionContext ) -> sp.Expr | sp.Basic: @@ -100,9 +111,17 @@ def visitMultExpr( operand1 = bool2num(self.visit(ctx.getChild(0))) operand2 = bool2num(self.visit(ctx.getChild(2))) if ctx.ASTERISK(): - return operand1 * operand2 + return sp.Mul(operand1, operand2, evaluate=self.evaluate) if ctx.SLASH(): - return operand1 / operand2 + return ( + operand1 / operand2 + if self.evaluate + else sp.Mul( + operand1, + sp.Pow(operand2, -1, evaluate=False), + evaluate=False, + ) + ) raise AssertionError(f"Unexpected expression: {ctx.getText()}") @@ -111,9 +130,9 @@ def visitAddExpr(self, ctx: PetabMathExprParser.AddExprContext) -> sp.Expr: op1 = bool2num(self.visit(ctx.getChild(0))) op2 = bool2num(self.visit(ctx.getChild(2))) if ctx.PLUS(): - return op1 + op2 + return sp.Add(op1, op2, evaluate=self.evaluate) if ctx.MINUS(): - return op1 - op2 + return sp.Add(op1, -op2, evaluate=self.evaluate) raise AssertionError( f"Unexpected operator: {ctx.getChild(1).getText()} " @@ -145,28 +164,32 @@ def visitFunctionCall( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return _trig_funcs[func_name](*args) + return _trig_funcs[func_name](*args, evaluate=self.evaluate) if func_name in _unary_funcs: if len(args) != 1: raise AssertionError( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return _unary_funcs[func_name](*args) + return _unary_funcs[func_name](*args, evaluate=self.evaluate) if func_name in _binary_funcs: if len(args) != 2: raise AssertionError( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return _binary_funcs[func_name](*args) + return _binary_funcs[func_name](*args, evaluate=self.evaluate) if func_name == "log": if len(args) not in [1, 2]: raise AssertionError( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return -sp.oo if args[0].is_zero is True else sp.log(*args) + return ( + -sp.oo + if args[0].is_zero is True + else sp.log(*args, evaluate=self.evaluate) + ) if func_name == "piecewise": if (len(args) - 1) % 2 != 0: @@ -183,7 +206,7 @@ def visitFunctionCall( args[::2], args[1::2], strict=True ) ) - return sp.Piecewise(*sp_args) + return sp.Piecewise(*sp_args, evaluate=self.evaluate) raise ValueError(f"Unknown function: {ctx.getText()}") @@ -202,7 +225,7 @@ def visitPowerExpr( ) operand1 = bool2num(self.visit(ctx.getChild(0))) operand2 = bool2num(self.visit(ctx.getChild(2))) - return sp.Pow(operand1, operand2) + return sp.Pow(operand1, operand2, evaluate=self.evaluate) def visitUnaryExpr( self, ctx: PetabMathExprParser.UnaryExprContext @@ -239,7 +262,7 @@ def visitComparisonExpr( if op in ops: lhs = bool2num(lhs) rhs = bool2num(rhs) - return ops[op](lhs, rhs) + return ops[op](lhs, rhs, evaluate=self.evaluate) raise AssertionError(f"Unexpected operator: {op}") @@ -300,4 +323,6 @@ def num2bool(x: sp.Basic | sp.Expr) -> sp.Basic | sp.Expr: return sp.false if x.is_zero is False: return sp.true + if isinstance(x, Boolean): + return x return sp.Piecewise((True, x != 0.0), (False, True)) diff --git a/petab/v1/math/__init__.py b/petab/v1/math/__init__.py index 27ebacd2..b85ca07b 100644 --- a/petab/v1/math/__init__.py +++ b/petab/v1/math/__init__.py @@ -1,2 +1,4 @@ """Functions for parsing and evaluating mathematical expressions.""" + +from .printer import PetabStrPrinter, petab_math_str # noqa: F401 from .sympify import sympify_petab # noqa: F401 diff --git a/petab/v1/math/printer.py b/petab/v1/math/printer.py new file mode 100644 index 00000000..f2146233 --- /dev/null +++ b/petab/v1/math/printer.py @@ -0,0 +1,102 @@ +"""A PEtab-compatible sympy string-printer.""" + +from itertools import chain, islice + +import sympy as sp +from sympy.printing.str import StrPrinter + +__all__ = ["PetabStrPrinter", "petab_math_str"] + + +class PetabStrPrinter(StrPrinter): + """A PEtab-compatible sympy string-printer.""" + + #: Mapping of sympy functions to PEtab functions + _func_map = { + "asin": "arcsin", + "acos": "arccos", + "atan": "arctan", + "acot": "arccot", + "asec": "arcsec", + "acsc": "arccsc", + "asinh": "arcsinh", + "acosh": "arccosh", + "atanh": "arctanh", + "acoth": "arccoth", + "asech": "arcsech", + "acsch": "arccsch", + "Abs": "abs", + } + + def _print_BooleanTrue(self, expr): + return "true" + + def _print_BooleanFalse(self, expr): + return "false" + + def _print_Pow(self, expr: sp.Pow): + """Custom printing for the power operator""" + base, exp = expr.as_base_exp() + str_base = self._print(base) + str_exp = self._print(exp) + if not base.is_Atom: + str_base = f"({str_base})" + # A non-integer Rational exponent (e.g. sqrt -> 1/2) is an Atom but + # prints as the multi-token "1/2", so without parentheses "x ^ 1/2" + # re-parses as (x^1)/2. Parenthesize it explicitly. + if not exp.is_Atom or (exp.is_Rational and not exp.is_Integer): + str_exp = f"({str_exp})" + return f"{str_base} ^ {str_exp}" + + def _print_Infinity(self, expr): + """Custom printing for infinity""" + return "inf" + + def _print_NegativeInfinity(self, expr): + """Custom printing for negative infinity""" + return "-inf" + + def _print_Function(self, expr): + """Custom printing for specific functions""" + + if expr.func.__name__ == "Piecewise": + return self._print_Piecewise(expr) + + if func := self._func_map.get(expr.func.__name__): + return f"{func}({', '.join(map(self._print, expr.args))})" + + return super()._print_Function(expr) + + def _print_Piecewise(self, expr): + """Custom printing for Piecewise function""" + # merge the tuples and drop the final `True` condition + str_args = map( + self._print, + islice(chain.from_iterable(expr.args), 2 * len(expr.args) - 1), + ) + return f"piecewise({', '.join(str_args)})" + + def _print_Min(self, expr): + """Custom printing for Min function""" + return f"min({', '.join(map(self._print, expr.args))})" + + def _print_Max(self, expr): + """Custom printing for Max function""" + return f"max({', '.join(map(self._print, expr.args))})" + + +def petab_math_str(expr: sp.Basic | sp.Expr | None) -> str: + """Convert a sympy expression to a PEtab-compatible math expression string. + + :example: + >>> expr = sp.sympify("x**2 + sin(y)") + >>> petab_math_str(expr) + 'x ^ 2 + sin(y)' + >>> expr = sp.sympify("Piecewise((1, x > 0), (0, True))") + >>> petab_math_str(expr) + 'piecewise(1, x > 0, 0)' + """ + if expr is None: + return "" + + return PetabStrPrinter().doprint(expr) diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index cc81a000..1d44e16d 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -5,6 +5,7 @@ from antlr4 import CommonTokenStream, InputStream from antlr4.error.ErrorListener import ErrorListener +from . import petab_math_str from ._generated.PetabMathExprLexer import PetabMathExprLexer from ._generated.PetabMathExprParser import PetabMathExprParser from .SympyVisitor import MathVisitorSympy, bool2num @@ -12,28 +13,70 @@ __all__ = ["sympify_petab"] -def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: +def sympify_petab( + expr: str | int | float | sp.Basic, evaluate: bool = True +) -> sp.Expr | sp.Basic: """Convert PEtab math expression to sympy expression. + Args: expr: PEtab math expression. + evaluate: Whether to evaluate the expression. Raises: ValueError: Upon lexer/parser errors or if the expression is - otherwise invalid. + otherwise invalid. Returns: The sympy expression corresponding to `expr`. Boolean values are converted to numeric values. + + .. note:: + + All symbols in the returned expression will have the ``real=True`` + assumption. + + :example: + >>> from petab.v1.math import sympify_petab + >>> sympify_petab("sin(0)") + 0 + >>> sympify_petab("sin(0)", evaluate=False) + sin(0.0) + >>> sympify_petab("sin(0)", evaluate=True) + 0 + >>> sympify_petab("1 + 2", evaluate=True) + 3.00000000000000 + >>> sympify_petab("1 + 2", evaluate=False) + 1.0 + 2.0 + >>> sympify_petab("piecewise(1, 1 > 2, 0)", evaluate=True) + 0.0 + >>> sympify_petab("piecewise(1, 1 > 2, 0)", evaluate=False) + Piecewise((1.0, 1.0 > 2.0), (0.0, True)) + >>> # currently, boolean values are converted to numeric values + >>> # independent of the `evaluate` flag + >>> sympify_petab("true", evaluate=True) + 1.00000000000000 + >>> sympify_petab("true", evaluate=False) + 1.00000000000000 + >>> # ... and integer values are converted to floats + >>> sympify_petab("2", evaluate=True) + 2.00000000000000 """ + if isinstance(expr, sp.Basic): + return sympify_petab(petab_math_str(expr)) + if isinstance(expr, int) or isinstance(expr, np.integer): return sp.Integer(expr) if isinstance(expr, float) or isinstance(expr, np.floating): return sp.Float(expr) - # Set error listeners - input_stream = InputStream(expr) + try: + input_stream = InputStream(expr) + except TypeError as e: + raise TypeError(f"Error parsing {expr!r}: {e.args[0]}") from e + lexer = PetabMathExprLexer(input_stream) + # Set error listeners lexer.removeErrorListeners() lexer.addErrorListener(MathErrorListener()) @@ -49,13 +92,20 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: raise ValueError(f"Error parsing {expr!r}: {e.args[0]}") from None # Convert to sympy expression - visitor = MathVisitorSympy() + visitor = MathVisitorSympy(evaluate=evaluate) expr = visitor.visit(tree) expr = bool2num(expr) - # check for `False`, we'll accept both `True` and `None` - if expr.is_extended_real is False: - raise ValueError(f"Expression {expr} is not real-valued.") - + try: + # check for `False`, we'll accept both `True` and `None` + if expr.is_extended_real is False: + raise ValueError(f"Expression {expr} is not real-valued.") + except AttributeError as e: + # work-around for `sp.sec(0, evaluate=False).is_extended_real` error + if str(e) not in ( + "'One' object has no attribute '_eval_is_extended_real'", + "'Float' object has no attribute '_eval_is_extended_real'", + ): + raise return expr diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py index 757ce9ce..f23a21c1 100644 --- a/petab/v1/measurements.py +++ b/petab/v1/measurements.py @@ -58,9 +58,11 @@ def write_measurement_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab measurement table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_measurement_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=False) @@ -275,16 +277,20 @@ def assert_overrides_match_parameter_count( strict=True, ) } - noise_parameters_count = { - obs_id: len( - observables.get_formula_placeholders(formula, obs_id, "noise") - ) - for obs_id, formula in zip( - observable_df.index.values, - observable_df[NOISE_FORMULA], - strict=True, - ) - } + noise_parameters_count = ( + { + obs_id: len( + observables.get_formula_placeholders(formula, obs_id, "noise") + ) + for obs_id, formula in zip( + observable_df.index.values, + observable_df[NOISE_FORMULA], + strict=True, + ) + } + if NOISE_FORMULA in observable_df.columns + else dict.fromkeys(observable_df.index.values, 0) + ) for _, row in measurement_df.iterrows(): # check observable parameters @@ -301,7 +307,6 @@ def assert_overrides_match_parameter_count( row.get(OBSERVABLE_PARAMETERS, None) ) ) - # No overrides are also allowed if actual != expected: formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA] raise AssertionError( @@ -318,7 +323,6 @@ def assert_overrides_match_parameter_count( try: expected = noise_parameters_count[row[OBSERVABLE_ID]] - # No overrides are also allowed if len(replacements) != expected: raise AssertionError( f"Mismatch of noise parameter overrides in:\n{row}\n" diff --git a/petab/v1/models/__init__.py b/petab/v1/models/__init__.py index 938f55fb..a35ad432 100644 --- a/petab/v1/models/__init__.py +++ b/petab/v1/models/__init__.py @@ -1,4 +1,5 @@ """Handling of different model types supported by PEtab.""" + #: SBML model type as used in a PEtab v2 yaml file as `language`. MODEL_TYPE_SBML = "sbml" #: PySB model type as used in a PEtab v2 yaml file as `language`. diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py index 795c7f0b..96613757 100644 --- a/petab/v1/models/model.py +++ b/petab/v1/models/model.py @@ -1,4 +1,5 @@ """PEtab model abstraction""" + from __future__ import annotations import abc @@ -13,25 +14,29 @@ class Model(abc.ABC): """Base class for wrappers for any PEtab-supported model type""" @abc.abstractmethod - def __init__(self): - ... + def __init__(self): ... def __repr__(self): return f"<{self.__class__.__name__} {self.model_id!r}>" @staticmethod @abc.abstractmethod - def from_file(filepath_or_buffer: Any, model_id: str) -> Model: + def from_file( + filepath_or_buffer: Any, model_id: str, base_path: str | Path = None + ) -> Model: """Load the model from the given path/URL - :param filepath_or_buffer: URL or path of the model + :param filepath_or_buffer: + Absolute or relative path/URL to the model file. + If relative, it is interpreted relative to `base_path`, if given. + :param base_path: Base path for relative paths in the model file. :param model_id: Model ID :returns: A ``Model`` instance holding the given model """ ... @abc.abstractmethod - def to_file(self, filename: [str, Path]): + def to_file(self, filename: str | Path | None = None): """Save the model to the given file :param filename: Destination filename @@ -41,13 +46,11 @@ def to_file(self, filename: [str, Path]): @classmethod @property @abc.abstractmethod - def type_id(cls): - ... + def type_id(cls): ... @property @abc.abstractmethod - def model_id(self): - ... + def model_id(self): ... @abc.abstractmethod def get_parameter_value(self, id_: str) -> float: @@ -133,11 +136,16 @@ def is_state_variable(self, id_: str) -> bool: def model_factory( - filepath_or_buffer: Any, model_language: str, model_id: str = None + filepath_or_buffer: Any, + model_language: str, + model_id: str = None, + base_path: str | Path = None, ) -> Model: """Create a PEtab model instance from the given model - :param filepath_or_buffer: Path/URL of the model + :param filepath_or_buffer: Path/URL of the model. + Absolute or relative to `base_path` if given. + :param base_path: Base path for relative paths in the model file. :param model_language: PEtab model language ID for the given model :param model_id: PEtab model ID for the given model :returns: A :py:class:`Model` instance representing the given model @@ -147,12 +155,16 @@ def model_factory( if model_language == MODEL_TYPE_SBML: from .sbml_model import SbmlModel - return SbmlModel.from_file(filepath_or_buffer, model_id=model_id) + return SbmlModel.from_file( + filepath_or_buffer, model_id=model_id, base_path=base_path + ) if model_language == MODEL_TYPE_PYSB: from .pysb_model import PySBModel - return PySBModel.from_file(filepath_or_buffer, model_id=model_id) + return PySBModel.from_file( + filepath_or_buffer, model_id=model_id, base_path=base_path + ) if model_language in known_model_types: raise NotImplementedError( diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py index f0147990..6927ecb9 100644 --- a/petab/v1/models/pysb_model.py +++ b/petab/v1/models/pysb_model.py @@ -1,5 +1,7 @@ """Functions for handling PySB models""" +from __future__ import annotations + import itertools import re import sys @@ -9,6 +11,7 @@ import pysb +from ..._utils import _generate_path from .. import is_valid_identifier from . import MODEL_TYPE_PYSB from .model import Model @@ -54,9 +57,18 @@ class PySBModel(Model): type_id = MODEL_TYPE_PYSB - def __init__(self, model: pysb.Model, model_id: str = None): + def __init__( + self, + model: pysb.Model, + model_id: str = None, + rel_path: Path | str | None = None, + base_path: str | Path | None = None, + ): super().__init__() + self.rel_path = rel_path + self.base_path = base_path + self.model = model self._model_id = model_id or self.model.name @@ -68,18 +80,32 @@ def __init__(self, model: pysb.Model, model_id: str = None): ) @staticmethod - def from_file(filepath_or_buffer, model_id: str = None): + def from_file( + filepath_or_buffer, model_id: str = None, base_path: str | Path = None + ) -> PySBModel: return PySBModel( - model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id + model=_pysb_model_from_path( + _generate_path(filepath_or_buffer, base_path) + ), + model_id=model_id, + rel_path=filepath_or_buffer, + base_path=base_path, ) - def to_file(self, filename: [str, Path]): - from pysb.export import export + def to_file(self, filename: str | Path | None = None) -> None: + model_source = self.to_str() - model_source = export(self.model, "pysb_flat") - with open(filename, "w") as f: + with open( + filename or _generate_path(self.rel_path, self.base_path), "w" + ) as f: f.write(model_source) + def to_str(self) -> str: + """Get the PySB model Python code as a string.""" + from pysb.export import export + + return export(self.model, "pysb_flat") + @property def model_id(self): return self._model_id @@ -192,7 +218,7 @@ def parse_species_name( match = complex_constituent_pattern.match(complex_constituent) if not match: raise ValueError( - f"Invalid species name: '{name}' " f"('{complex_constituent}')" + f"Invalid species name: '{name}' ('{complex_constituent}')" ) monomer = match.groupdict()["monomer"] site_config_str = match.groupdict()["site_config"] @@ -208,7 +234,7 @@ def parse_species_name( elif config.startswith("'"): if not config.endswith("'"): raise ValueError( - f"Invalid species name: '{name}' " f"('{config}')" + f"Invalid species name: '{name}' ('{config}')" ) # strip quotes config = config[1:-1] diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index fd57f2dc..2d31c0b9 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -1,5 +1,7 @@ """Functions for handling SBML models""" +from __future__ import annotations + import itertools from collections.abc import Iterable from pathlib import Path @@ -8,6 +10,7 @@ import sympy as sp from sympy.abc import _clash +from ..._utils import _generate_path from ..sbml import ( get_sbml_model, is_sbml_consistent, @@ -31,9 +34,31 @@ def __init__( sbml_reader: libsbml.SBMLReader = None, sbml_document: libsbml.SBMLDocument = None, model_id: str = None, + rel_path: Path | str | None = None, + base_path: str | Path | None = None, ): + """Constructor. + + :param sbml_model: SBML model. Optional if `sbml_document` is given. + :param sbml_reader: SBML reader. Optional. + :param sbml_document: SBML document. Optional if `sbml_model` is given. + :param model_id: Model ID. Defaults to the SBML model ID.""" super().__init__() + self.rel_path = rel_path + self.base_path = base_path + + if sbml_model is None and sbml_document is None: + raise ValueError( + "Either sbml_model or sbml_document must be given." + ) + + if sbml_model is None: + sbml_model = sbml_document.getModel() + + if sbml_document is None: + sbml_document = sbml_model.getSBMLDocument() + self.sbml_reader: libsbml.SBMLReader | None = sbml_reader self.sbml_document: libsbml.SBMLDocument | None = sbml_document self.sbml_model: libsbml.Model | None = sbml_model @@ -46,9 +71,7 @@ def __getstate__(self): # libsbml stuff cannot be serialized directly if self.sbml_model: - sbml_document = self.sbml_model.getSBMLDocument() - sbml_writer = libsbml.SBMLWriter() - state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document) + state["sbml_string"] = self.to_sbml_str() exclude = ["sbml_reader", "sbml_document", "sbml_model"] for key in exclude: @@ -70,17 +93,76 @@ def __setstate__(self, state): self.__dict__.update(state) @staticmethod - def from_file(filepath_or_buffer, model_id: str = None): + def from_file( + filepath_or_buffer, model_id: str = None, base_path: str | Path = None + ) -> SbmlModel: sbml_reader, sbml_document, sbml_model = get_sbml_model( - filepath_or_buffer + _generate_path(filepath_or_buffer, base_path=base_path) ) return SbmlModel( sbml_model=sbml_model, sbml_reader=sbml_reader, sbml_document=sbml_document, model_id=model_id, + rel_path=filepath_or_buffer, + base_path=base_path, + ) + + @staticmethod + def from_string(sbml_string, model_id: str = None) -> SbmlModel: + """Create SBML model from an SBML string. + + :param sbml_string: SBML model as string. + :param model_id: Model ID. Defaults to the SBML model ID. + """ + sbml_reader, sbml_document, sbml_model = load_sbml_from_string( + sbml_string ) + if not model_id: + model_id = sbml_model.getIdAttribute() + + return SbmlModel( + sbml_model=sbml_model, + sbml_reader=sbml_reader, + sbml_document=sbml_document, + model_id=model_id, + ) + + @staticmethod + def from_antimony(ant_model: str | Path, **kwargs) -> SbmlModel: + """Create SBML model from an Antimony model. + + Requires the `antimony` package (https://github.com/sys-bio/antimony). + + :param ant_model: Antimony model as string or path to file. + Strings are interpreted as Antimony model strings. + :param kwargs: Additional keyword arguments passed to + :meth:`SbmlModel.from_string`. + """ + sbml_str = antimony2sbml(ant_model) + return SbmlModel.from_string(sbml_str, **kwargs) + + def to_antimony(self) -> str: + """Convert the SBML model to an Antimony string.""" + import antimony as ant + + sbml_str = self.to_sbml_str() + + ant.clearPreviousLoads() + ant.freeAll() + + if ant.loadSBMLString(sbml_str) < 0: + raise RuntimeError(ant.getLastError()) + + return ant.getAntimonyString() + + def to_sbml_str(self) -> str: + """Convert the SBML model to an SBML/XML string.""" + sbml_document = self.sbml_model.getSBMLDocument() + sbml_writer = libsbml.SBMLWriter() + return sbml_writer.writeSBMLToString(sbml_document) + @property def model_id(self): return self._model_id @@ -89,9 +171,10 @@ def model_id(self): def model_id(self, model_id): self._model_id = model_id - def to_file(self, filename: [str, Path]): + def to_file(self, filename: str | Path | None = None) -> None: write_sbml( - self.sbml_document or self.sbml_model.getSBMLDocument(), filename + self.sbml_document or self.sbml_model.getSBMLDocument(), + filename or _generate_path(self.rel_path, self.base_path), ) def get_parameter_value(self, id_: str) -> float: @@ -222,3 +305,43 @@ def sympify_sbml(sbml_obj: libsbml.ASTNode | libsbml.SBase) -> sp.Expr: ) return sp.sympify(formula_str, locals=_clash) + + +def antimony2sbml(ant_model: str | Path) -> str: + """Convert Antimony model to SBML. + + :param ant_model: Antimony model as string or path to file. + Strings are interpreted as Antimony model strings. + + :returns: + The SBML model as string. + """ + import antimony as ant + + # Unload everything / free memory + ant.clearPreviousLoads() + ant.freeAll() + + try: + # potentially fails because of too long file name + is_file = ant_model and Path(ant_model).exists() + except OSError: + is_file = False + + if is_file: + status = ant.loadAntimonyFile(str(ant_model)) + else: + status = ant.loadAntimonyString(ant_model) + if status < 0: + raise RuntimeError( + f"Antimony model could not be loaded: {ant.getLastError()}" + ) + + if (main_module_name := ant.getMainModuleName()) is None: + raise AssertionError("There is no Antimony module.") + + sbml_str = ant.getSBMLString(main_module_name) + if not sbml_str: + raise ValueError("Antimony model could not be converted to SBML.") + + return sbml_str diff --git a/petab/v1/observables.py b/petab/v1/observables.py index 1485302d..38c539c7 100644 --- a/petab/v1/observables.py +++ b/petab/v1/observables.py @@ -67,9 +67,11 @@ def write_observable_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab observable table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_observable_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) @@ -82,7 +84,7 @@ def get_output_parameters( ) -> list[str]: """Get output parameters - Returns IDs of parameters used in observable and noise formulas that are + Returns IDs of parameters used in observable or noise formulas that are not defined in the model. Arguments: diff --git a/petab/v1/parameter_mapping.py b/petab/v1/parameter_mapping.py index 014b4a8e..a438c6b2 100644 --- a/petab/v1/parameter_mapping.py +++ b/petab/v1/parameter_mapping.py @@ -133,7 +133,7 @@ def get_optimization_to_simulation_parameter_mapping( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) @@ -383,7 +383,7 @@ def get_parameter_mapping_for_condition( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) @@ -404,7 +404,7 @@ def get_parameter_mapping_for_condition( # initialize mapping dicts # for the case of matching simulation and optimization parameter vector par_mapping = simulation_parameters.copy() - scale_mapping = {par_id: LIN for par_id in par_mapping.keys()} + scale_mapping = dict.fromkeys(par_mapping.keys(), LIN) _output_parameters_to_nan(par_mapping) # not strictly necessary for preequilibration, be we do it to have @@ -495,7 +495,7 @@ def _apply_overrides_for_observable( overrides: list of overrides for noise or observable parameters """ for i, override in enumerate(overrides): - overridee_id = f"{override_type}Parameter{i+1}_{observable_id}" + overridee_id = f"{override_type}Parameter{i + 1}_{observable_id}" mapping[overridee_id] = override diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 8f252988..82e23669 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -3,7 +3,7 @@ import numbers import warnings from collections import OrderedDict -from collections.abc import Iterable, Sequence +from collections.abc import Iterable, Sequence, Set from pathlib import Path from typing import ( Literal, @@ -112,9 +112,11 @@ def write_parameter_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab parameter table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_parameter_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) @@ -199,7 +201,7 @@ def create_parameter_df( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) if include_optional: @@ -258,7 +260,7 @@ def get_required_parameters_for_parameter_table( observable_df: pd.DataFrame, measurement_df: pd.DataFrame, mapping_df: pd.DataFrame = None, -) -> set[str]: +) -> Set[str]: """ Get set of parameters which need to go into the parameter table @@ -332,9 +334,8 @@ def append_overrides(overrides): if not model.has_entity_with_id(p): parameter_ids[p] = None - # remove parameters that occur in the condition table and are overridden - # for ALL conditions - for p in condition_df.columns[~condition_df.isnull().any()]: + # parameters that are overridden via the condition table are not allowed + for p in condition_df.columns: try: del parameter_ids[p] except KeyError: @@ -524,7 +525,8 @@ def scale( if scale_str == LOG: return np.log(parameter) if scale_str == LOG10: - return np.log10(parameter) + with np.errstate(divide="ignore"): + return np.log10(parameter) raise ValueError(f"Invalid parameter scaling: {scale_str}") diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 52fec20d..b8bf8dcb 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -1,5 +1,9 @@ """Functions related to prior handling.""" + +from __future__ import annotations + import copy +from typing import Literal import numpy as np import pandas as pd @@ -29,12 +33,279 @@ PARAMETER_SEPARATOR, SIMULATION_CONDITION_ID, TIME, + C, Problem, ) +from .distributions import * +from .parameters import scale, unscale __all__ = ["priors_to_measurements"] +class Prior: + """A PEtab parameter prior. + + Different from the general :class:`Distribution`, this class is used to + represent the prior distribution of a PEtab parameter using the + PEtab-specific options like `parameterScale`, `*PriorType`, + `*PriorParameters`, and `lowerBound` / `upperBounds`. + + :param type_: The type of the distribution. + :param transformation: The transformation to be applied to the sample. + Ignored if `parameter_scale` is `True`. + :param parameters: The parameters of the distribution (unaffected by + `parameter_scale` and `transformation`, but in the case of + `parameterScale*` distribution types, the parameters are assumed to be + on the `parameter_scale` scale). + :param bounds: The untransformed bounds of the sample (lower, upper). + :param transformation: The transformation of the distribution. + :param _bounds_truncate: **deprecated** + Whether the generated prior will be truncated at the bounds. + If ``True``, the probability density will be rescaled + accordingly and the sample is generated from the truncated + distribution. + If ``False``, the probability density will not be rescaled + accordingly, but the sample will be generated from the truncated + distribution. + """ + + def __init__( + self, + type_: str, + parameters: tuple, + bounds: tuple = None, + transformation: str = C.LIN, + _bounds_truncate: bool = True, + ): + if transformation not in C.PARAMETER_SCALES: + raise ValueError( + f"Unknown parameter transformation: {transformation}" + ) + + if len(parameters) != 2: + raise ValueError( + f"Expected two parameters, got {len(parameters)}: {parameters}" + ) + + if bounds is not None and len(bounds) != 2: + raise ValueError( + "Expected (lowerBound, upperBound), got " + f"{len(bounds)}: {bounds}" + ) + + self._type = type_ + self._parameters = parameters + self._bounds = bounds + self._transformation = transformation + self._bounds_truncate = _bounds_truncate + + truncation = bounds + if truncation is not None: + # for uniform, we don't want to implement truncation and just + # adapt the distribution parameters + if type_ == C.PARAMETER_SCALE_UNIFORM: + parameters = ( + max(parameters[0], scale(truncation[0], transformation)), + min(parameters[1], scale(truncation[1], transformation)), + ) + elif type_ == C.UNIFORM: + parameters = ( + max(parameters[0], truncation[0]), + min(parameters[1], truncation[1]), + ) + + # create the underlying distribution + match type_, transformation: + case (C.UNIFORM, _) | (C.PARAMETER_SCALE_UNIFORM, C.LIN): + self.distribution = Uniform(*parameters) + case (C.NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LIN): + self.distribution = Normal(*parameters, trunc=truncation) + case (C.LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LIN): + self.distribution = Laplace(*parameters, trunc=truncation) + case (C.PARAMETER_SCALE_UNIFORM, C.LOG): + self.distribution = Uniform(*parameters, log=True) + case (C.LOG_NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LOG): + self.distribution = Normal( + *parameters, log=True, trunc=truncation + ) + case (C.LOG_LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LOG): + self.distribution = Laplace( + *parameters, log=True, trunc=truncation + ) + case (C.PARAMETER_SCALE_UNIFORM, C.LOG10): + self.distribution = Uniform(*parameters, log=10) + case (C.PARAMETER_SCALE_NORMAL, C.LOG10): + self.distribution = Normal( + *parameters, log=10, trunc=truncation + ) + case (C.PARAMETER_SCALE_LAPLACE, C.LOG10): + self.distribution = Laplace( + *parameters, log=10, trunc=truncation + ) + case _: + raise ValueError( + "Unsupported distribution type / transformation: " + f"{type_} / {transformation}" + ) + + def __repr__(self): + return ( + f"{self.__class__.__name__}(" + f"{self.type!r}, {self.parameters!r}," + f" bounds={self.bounds!r}, transformation={self.transformation!r}," + ")" + ) + + @property + def type(self) -> str: + return self._type + + @property + def parameters(self) -> tuple: + """The parameters of the distribution.""" + return self._parameters + + @property + def bounds(self) -> tuple[float, float] | None: + """The non-scaled bounds of the distribution.""" + return self._bounds + + @property + def transformation(self) -> str: + """The `parameterScale`.""" + return self._transformation + + def sample(self, shape=None, x_scaled=False) -> np.ndarray | float: + """Sample from the distribution. + + :param shape: The shape of the sample. + :param x_scaled: Whether the sample should be on the parameter scale. + :return: A sample from the distribution. + """ + raw_sample = self.distribution.sample(shape) + if x_scaled: + return self._scale_sample(raw_sample) + else: + return raw_sample + + def _scale_sample(self, sample): + """Scale the sample to the parameter space""" + # we also need to scale parameterScale* distributions, because + # internally, they are handled as (unscaled) log-distributions + return scale(sample, self.transformation) + + @property + def lb_scaled(self) -> float: + """The lower bound on the parameter scale.""" + return scale(self.bounds[0], self.transformation) + + @property + def ub_scaled(self) -> float: + """The upper bound on the parameter scale.""" + return scale(self.bounds[1], self.transformation) + + def _chain_rule_coeff(self, x) -> np.ndarray | float: + """The chain rule coefficient for the transformation at x.""" + x = unscale(x, self.transformation) + + # scale the PDF to the parameter scale + if self.transformation == C.LIN: + coeff = 1 + elif self.transformation == C.LOG10: + coeff = x * np.log(10) + elif self.transformation == C.LOG: + coeff = x + else: + raise ValueError(f"Unknown transformation: {self.transformation}") + + return coeff + + def pdf( + self, x, x_scaled: bool = False, rescale=False + ) -> np.ndarray | float: + """Probability density function at x. + + This accounts for truncation, independent of the `bounds_truncate` + parameter. + + :param x: The value at which to evaluate the PDF. + ``x`` is assumed to be on the parameter scale. + :param x_scaled: Whether ``x`` is on the parameter scale. + :param rescale: Whether to rescale the PDF to integrate to 1 on the + parameter scale. Only used if ``x_scaled`` is ``True``. + :return: The value of the PDF at ``x``. + """ + if x_scaled: + coeff = self._chain_rule_coeff(x) if rescale else 1 + x = unscale(x, self.transformation) + return self.distribution.pdf(x) * coeff + + return self.distribution.pdf(x) + + def neglogprior( + self, x: np.array | float, x_scaled: bool = False + ) -> np.ndarray | float: + """Negative log-prior at x. + + :param x: The value at which to evaluate the negative log-prior. + :param x_scaled: Whether ``x`` is on the parameter scale. + Note that the prior is always evaluated on the non-scaled + parameters. + :return: The negative log-prior at ``x``. + """ + if self._bounds_truncate: + # the truncation is handled by the distribution + # the prior is always evaluated on the non-scaled parameters + return -np.log(self.pdf(x, x_scaled=x_scaled, rescale=False)) + + # we want to evaluate the prior on the untruncated distribution + if x_scaled: + x = unscale(x, self.transformation) + return -np.log(self.distribution._pdf_untruncated(x)) + + @staticmethod + def from_par_dict( + d, + type_=Literal["initialization", "objective"], + _bounds_truncate: bool = True, + ) -> Prior: + """Create a distribution from a row of the parameter table. + + :param d: A dictionary representing a row of the parameter table. + :param type_: The type of the distribution. + :param _bounds_truncate: Whether the generated prior will be truncated + at the bounds. **deprecated**. + :return: A distribution object. + """ + dist_type = C.PARAMETER_SCALE_UNIFORM + if (_table_dist_type := d.get(f"{type_}PriorType")) and ( + isinstance(_table_dist_type, str) or not np.isnan(_table_dist_type) + ): + dist_type = _table_dist_type + + pscale = d.get(C.PARAMETER_SCALE, C.LIN) + params = d.get(f"{type_}PriorParameters", None) + if pd.isna(params) and dist_type == C.PARAMETER_SCALE_UNIFORM: + params = ( + scale(d[C.LOWER_BOUND], pscale), + scale(d[C.UPPER_BOUND], pscale), + ) + else: + params = tuple( + map( + float, + params.split(C.PARAMETER_SEPARATOR), + ) + ) + return Prior( + type_=dist_type, + parameters=params, + bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]), + transformation=pscale, + _bounds_truncate=_bounds_truncate, + ) + + def priors_to_measurements(problem: Problem): """Convert priors to measurements. @@ -60,6 +331,12 @@ def priors_to_measurements(problem: Problem): - `measurement`: the PDF location - `noiseFormula`: the PDF scale + .. warning:: + + This function does not account for the truncation of the prior by + the bounds in the parameter table. The resulting observable will + not be truncated, and the PDF will not be rescaled. + Arguments --------- problem: @@ -84,6 +361,7 @@ def priors_to_measurements(problem: Problem): return new_problem def scaled_observable_formula(parameter_id, parameter_scale): + # The location parameter of the prior if parameter_scale == LIN: return parameter_id if parameter_scale == LOG: @@ -112,6 +390,12 @@ def scaled_observable_formula(parameter_id, parameter_scale): # offset raise NotImplementedError("Uniform priors are not supported.") + if prior_type not in (C.NORMAL, C.LAPLACE): + # we can't (easily) handle parameterScale* priors or log*-priors + raise NotImplementedError( + f"Objective prior type {prior_type} is not implemented." + ) + parameter_id = row.name prior_parameters = tuple( map( @@ -136,7 +420,9 @@ def scaled_observable_formula(parameter_id, parameter_scale): OBSERVABLE_ID: new_obs_id, OBSERVABLE_FORMULA: scaled_observable_formula( parameter_id, - parameter_scale if "parameterScale" in prior_type else LIN, + parameter_scale + if prior_type in C.PARAMETER_SCALE_PRIOR_TYPES + else LIN, ), NOISE_FORMULA: f"noiseParameter1_{new_obs_id}", } @@ -145,12 +431,13 @@ def scaled_observable_formula(parameter_id, parameter_scale): elif OBSERVABLE_TRANSFORMATION in new_problem.observable_df: # only set default if the column is already present new_observable[OBSERVABLE_TRANSFORMATION] = LIN - + # type of the underlying distribution if prior_type in (NORMAL, PARAMETER_SCALE_NORMAL, LOG_NORMAL): new_observable[NOISE_DISTRIBUTION] = NORMAL elif prior_type in (LAPLACE, PARAMETER_SCALE_LAPLACE, LOG_LAPLACE): new_observable[NOISE_DISTRIBUTION] = LAPLACE else: + # we can't (easily) handle uniform priors in PEtab v1 raise NotImplementedError( f"Objective prior type {prior_type} is not implemented." ) @@ -174,17 +461,17 @@ def scaled_observable_formula(parameter_id, parameter_scale): ].iloc[0], } if PREEQUILIBRATION_CONDITION_ID in new_problem.measurement_df: - new_measurement[ - PREEQUILIBRATION_CONDITION_ID - ] = new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ - 0 - ] + new_measurement[PREEQUILIBRATION_CONDITION_ID] = ( + new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ + 0 + ] + ) new_measurement_dicts.append(new_measurement) # remove prior from parameter table - new_problem.parameter_df.loc[ - parameter_id, OBJECTIVE_PRIOR_TYPE - ] = np.nan + new_problem.parameter_df.loc[parameter_id, OBJECTIVE_PRIOR_TYPE] = ( + np.nan + ) new_problem.parameter_df.loc[ parameter_id, OBJECTIVE_PRIOR_PARAMETERS ] = np.nan diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 4a5577eb..6da82a2f 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -1,20 +1,23 @@ """PEtab Problem class""" + from __future__ import annotations import os import tempfile -from collections.abc import Iterable +from collections.abc import Iterable, Sequence from math import nan +from numbers import Number from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING from warnings import warn import pandas as pd +from pydantic import AnyUrl, BaseModel, Field +from ..versions import get_major_version from . import ( conditions, core, - format_version, mapping, measurements, observables, @@ -39,7 +42,9 @@ class Problem: """ - PEtab parameter estimation problem as defined by + PEtab parameter estimation problem. + + A PEtab problem as defined by: - model - condition table @@ -48,7 +53,9 @@ class Problem: - observables table - mapping table - Optionally it may contain visualization tables. + Optionally, it may contain visualization tables. + + See also :doc:`petab:v1/documentation_data_format`. Parameters: condition_df: PEtab condition table @@ -78,6 +85,7 @@ def __init__( observable_df: pd.DataFrame = None, mapping_df: pd.DataFrame = None, extensions_config: dict = None, + config: ProblemConfig = None, ): self.condition_df: pd.DataFrame | None = condition_df self.measurement_df: pd.DataFrame | None = measurement_df @@ -112,6 +120,7 @@ def __init__( self.model: Model | None = model self.extensions_config = extensions_config or {} + self.config = config def __getattr__(self, name): # For backward-compatibility, allow access to SBML model related @@ -119,7 +128,7 @@ def __getattr__(self, name): if name in {"sbml_model", "sbml_reader", "sbml_document"}: return getattr(self.model, name) if self.model else None raise AttributeError( - f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + f"'{self.__class__.__name__}' object has no attribute '{name}'" ) def __setattr__(self, name, value): @@ -251,21 +260,32 @@ def from_files( ) @staticmethod - def from_yaml(yaml_config: dict | Path | str) -> Problem: + def from_yaml( + yaml_config: dict | Path | str, base_path: str | Path = None + ) -> Problem: """ Factory method to load model and tables as specified by YAML file. Arguments: yaml_config: PEtab configuration as dictionary or YAML file name + base_path: Base directory or URL to resolve relative paths """ + # path to the yaml file + filepath = None + if isinstance(yaml_config, Path): yaml_config = str(yaml_config) - get_path = lambda filename: filename # noqa: E731 if isinstance(yaml_config, str): - path_prefix = get_path_prefix(yaml_config) + filepath = yaml_config + if base_path is None: + base_path = get_path_prefix(yaml_config) yaml_config = yaml.load_yaml(yaml_config) - get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 + + def get_path(filename): + if base_path is None: + return filename + return f"{base_path}/{filename}" if yaml.is_composite_problem(yaml_config): raise ValueError( @@ -275,38 +295,38 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: "petab.CompositeProblem.from_yaml() instead." ) - if yaml_config[FORMAT_VERSION] not in {"1", 1, "1.0.0", "2.0.0"}: + major_version = get_major_version(yaml_config) + if major_version not in {1, 2}: raise ValueError( "Provided PEtab files are of unsupported version " - f"{yaml_config[FORMAT_VERSION]}. Expected " - f"{format_version.__format_version__}." + f"{yaml_config[FORMAT_VERSION]}." ) - if yaml_config[FORMAT_VERSION] == "2.0.0": - warn("Support for PEtab2.0 is experimental!", stacklevel=2) + if major_version == 2: warn( "Using petab.v1.Problem with PEtab2.0 is deprecated. " "Use petab.v2.Problem instead.", DeprecationWarning, stacklevel=2, ) + config = ProblemConfig( + **yaml_config, base_path=base_path, filepath=filepath + ) + problem0 = config.problems[0] + # currently required for handling PEtab v2 in here + problem0_ = yaml_config["problems"][0] - problem0 = yaml_config["problems"][0] - - if isinstance(yaml_config[PARAMETER_FILE], list): + if isinstance(config.parameter_file, list): parameter_df = parameters.get_parameter_df( - [get_path(f) for f in yaml_config[PARAMETER_FILE]] + [get_path(f) for f in config.parameter_file] ) else: parameter_df = ( - parameters.get_parameter_df( - get_path(yaml_config[PARAMETER_FILE]) - ) - if yaml_config[PARAMETER_FILE] + parameters.get_parameter_df(get_path(config.parameter_file)) + if config.parameter_file else None ) - - if yaml_config[FORMAT_VERSION] in [1, "1", "1.0.0"]: - if len(problem0[SBML_FILES]) > 1: + if major_version == 1: + if len(problem0.sbml_files) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." @@ -314,24 +334,24 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: model = ( model_factory( - get_path(problem0[SBML_FILES][0]), + get_path(problem0.sbml_files[0]), MODEL_TYPE_SBML, model_id=None, ) - if problem0[SBML_FILES] + if problem0.sbml_files else None ) else: - if len(problem0[MODEL_FILES]) > 1: + if len(problem0_[MODEL_FILES]) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." ) - if not problem0[MODEL_FILES]: + if not problem0_[MODEL_FILES]: model = None else: model_id, model_info = next( - iter(problem0[MODEL_FILES].items()) + iter(problem0_[MODEL_FILES].items()) ) model = model_factory( get_path(model_info[MODEL_LOCATION]), @@ -339,9 +359,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: model_id=model_id, ) - measurement_files = [ - get_path(f) for f in problem0.get(MEASUREMENT_FILES, []) - ] + measurement_files = [get_path(f) for f in problem0.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( core.concat_tables( @@ -351,9 +369,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: else None ) - condition_files = [ - get_path(f) for f in problem0.get(CONDITION_FILES, []) - ] + condition_files = [get_path(f) for f in problem0.condition_files] # If there are multiple tables, we will merge them condition_df = ( core.concat_tables(condition_files, conditions.get_condition_df) @@ -362,7 +378,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: ) visualization_files = [ - get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) + get_path(f) for f in problem0.visualization_files ] # If there are multiple tables, we will merge them visualization_df = ( @@ -371,9 +387,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: else None ) - observable_files = [ - get_path(f) for f in problem0.get(OBSERVABLE_FILES, []) - ] + observable_files = [get_path(f) for f in problem0.observable_files] # If there are multiple tables, we will merge them observable_df = ( core.concat_tables(observable_files, observables.get_observable_df) @@ -381,7 +395,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: else None ) - mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])] + mapping_files = [get_path(f) for f in problem0_.get(MAPPING_FILES, [])] # If there are multiple tables, we will merge them mapping_df = ( core.concat_tables(mapping_files, mapping.get_mapping_df) @@ -398,6 +412,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: visualization_df=visualization_df, mapping_df=mapping_df, extensions_config=yaml_config.get(EXTENSIONS, {}), + config=config, ) @staticmethod @@ -475,7 +490,7 @@ def to_files_generic( if self.model: if not isinstance(self.model, SbmlModel): raise NotImplementedError( - "Saving non-SBML models is " "currently not supported." + "Saving non-SBML models is currently not supported." ) filenames["model_file"] = "model.xml" @@ -998,3 +1013,215 @@ def n_priors(self) -> int: return 0 return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum() + + def add_condition(self, id_: str, name: str = None, **kwargs): + """Add a simulation condition to the problem. + + Arguments: + id_: The condition id + name: The condition name + kwargs: Parameter, value pairs to add to the condition table. + """ + record = {CONDITION_ID: [id_], **kwargs} + if name is not None: + record[CONDITION_NAME] = name + tmp_df = pd.DataFrame(record).set_index([CONDITION_ID]) + self.condition_df = ( + pd.concat([self.condition_df, tmp_df]) + if self.condition_df is not None + else tmp_df + ) + + def add_observable( + self, + id_: str, + formula: str | float | int, + noise_formula: str | float | int = None, + noise_distribution: str = None, + transform: str = None, + name: str = None, + **kwargs, + ): + """Add an observable to the problem. + + Arguments: + id_: The observable id + formula: The observable formula + noise_formula: The noise formula + noise_distribution: The noise distribution + transform: The observable transformation + name: The observable name + kwargs: additional columns/values to add to the observable table + + """ + record = { + OBSERVABLE_ID: [id_], + OBSERVABLE_FORMULA: [formula], + } + if name is not None: + record[OBSERVABLE_NAME] = [name] + if noise_formula is not None: + record[NOISE_FORMULA] = [noise_formula] + if noise_distribution is not None: + record[NOISE_DISTRIBUTION] = [noise_distribution] + if transform is not None: + record[OBSERVABLE_TRANSFORMATION] = [transform] + record.update(kwargs) + + tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID]) + self.observable_df = ( + pd.concat([self.observable_df, tmp_df]) + if self.observable_df is not None + else tmp_df + ) + + def add_parameter( + self, + id_: str, + estimate: bool | str | int = True, + nominal_value: Number | None = None, + scale: str = None, + lb: Number = None, + ub: Number = None, + init_prior_type: str = None, + init_prior_pars: str | Sequence = None, + obj_prior_type: str = None, + obj_prior_pars: str | Sequence = None, + **kwargs, + ): + """Add a parameter to the problem. + + Arguments: + id_: The parameter id + estimate: Whether the parameter is estimated + nominal_value: The nominal value of the parameter + scale: The parameter scale + lb: The lower bound of the parameter + ub: The upper bound of the parameter + init_prior_type: The type of the initialization prior distribution + init_prior_pars: The parameters of the initialization prior + distribution + obj_prior_type: The type of the objective prior distribution + obj_prior_pars: The parameters of the objective prior distribution + kwargs: additional columns/values to add to the parameter table + """ + record = { + PARAMETER_ID: [id_], + } + if estimate is not None: + record[ESTIMATE] = [int(estimate)] + if nominal_value is not None: + record[NOMINAL_VALUE] = [nominal_value] + if scale is not None: + record[PARAMETER_SCALE] = [scale] + if lb is not None: + record[LOWER_BOUND] = [lb] + if ub is not None: + record[UPPER_BOUND] = [ub] + if init_prior_type is not None: + record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type] + if init_prior_pars is not None: + if not isinstance(init_prior_pars, str): + init_prior_pars = PARAMETER_SEPARATOR.join( + map(str, init_prior_pars) + ) + record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars] + if obj_prior_type is not None: + record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type] + if obj_prior_pars is not None: + if not isinstance(obj_prior_pars, str): + obj_prior_pars = PARAMETER_SEPARATOR.join( + map(str, obj_prior_pars) + ) + record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars] + record.update(kwargs) + + tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID]) + self.parameter_df = ( + pd.concat([self.parameter_df, tmp_df]) + if self.parameter_df is not None + else tmp_df + ) + + def add_measurement( + self, + obs_id: str, + sim_cond_id: str, + time: float, + measurement: float, + observable_parameters: Sequence[str | float] = None, + noise_parameters: Sequence[str | float] = None, + preeq_cond_id: str = None, + ): + """Add a measurement to the problem. + + Arguments: + obs_id: The observable ID + sim_cond_id: The simulation condition ID + time: The measurement time + measurement: The measurement value + observable_parameters: The observable parameters + noise_parameters: The noise parameters + preeq_cond_id: The pre-equilibration condition ID + """ + record = { + OBSERVABLE_ID: [obs_id], + SIMULATION_CONDITION_ID: [sim_cond_id], + TIME: [time], + MEASUREMENT: [measurement], + } + if observable_parameters is not None: + record[OBSERVABLE_PARAMETERS] = [ + PARAMETER_SEPARATOR.join(map(str, observable_parameters)) + ] + if noise_parameters is not None: + record[NOISE_PARAMETERS] = [ + PARAMETER_SEPARATOR.join(map(str, noise_parameters)) + ] + if preeq_cond_id is not None: + record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id] + + tmp_df = pd.DataFrame(record) + self.measurement_df = ( + pd.concat([self.measurement_df, tmp_df]) + if self.measurement_df is not None + else tmp_df + ) + + +class SubProblem(BaseModel): + """A `problems` object in the PEtab problem configuration.""" + + sbml_files: list[str | AnyUrl] = [] + measurement_files: list[str | AnyUrl] = [] + condition_files: list[str | AnyUrl] = [] + observable_files: list[str | AnyUrl] = [] + visualization_files: list[str | AnyUrl] = [] + + +class ProblemConfig(BaseModel): + """The PEtab problem configuration.""" + + filepath: str | AnyUrl | None = Field( + None, + description="The path to the PEtab problem configuration.", + exclude=True, + ) + base_path: str | AnyUrl | None = Field( + None, + description="The base path to resolve relative paths.", + exclude=True, + ) + format_version: str | int = 1 + parameter_file: str | AnyUrl | None = None + problems: list[SubProblem] = [] + + def to_yaml(self, filename: str | Path): + """Write the configuration to a YAML file. + + :param filename: Destination file name. The parent directory will be + created if necessary. + """ + from .yaml import write_yaml + + write_yaml(self.model_dump(), filename) diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index be154f1c..035fe7aa 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd -from . import parameters from .C import * # noqa: F403 __all__ = ["sample_from_prior", "sample_parameter_startpoints"] @@ -24,86 +23,17 @@ def sample_from_prior( Returns: Array with sampled values """ + from .priors import Prior + # unpack info p_type, p_params, scaling, bounds = prior - - # define a function to rescale the sampled points to parameter scale - def scale(x): - if scaling == LIN: - return x - if scaling == LOG: - return np.log(x) - if scaling == LOG10: - return np.log10(x) - raise NotImplementedError( - f"Parameter priors on the parameter scale {scaling} are " - "currently not implemented." - ) - - def clip_to_bounds(x: np.array): - """Clip values in array x to bounds""" - return np.maximum(np.minimum(scale(bounds[1]), x), scale(bounds[0])) - - # define lambda functions for each parameter - if p_type == UNIFORM: - sp = scale( - (p_params[1] - p_params[0]) * np.random.random((n_starts,)) - + p_params[0] - ) - - elif p_type == PARAMETER_SCALE_UNIFORM: - sp = (p_params[1] - p_params[0]) * np.random.random( - (n_starts,) - ) + p_params[0] - - elif p_type == NORMAL: - sp = scale( - np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - - elif p_type == LOG_NORMAL: - sp = scale( - np.exp( - np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - ) - - elif p_type == PARAMETER_SCALE_NORMAL: - sp = np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - - elif p_type == LAPLACE: - sp = scale( - np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - - elif p_type == LOG_LAPLACE: - sp = scale( - np.exp( - np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - ) - - elif p_type == PARAMETER_SCALE_LAPLACE: - sp = np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - - else: - raise NotImplementedError( - f"Parameter priors of type {prior[0]} are not implemented." - ) - - return clip_to_bounds(sp) + prior = Prior( + p_type, + tuple(p_params), + bounds=tuple(bounds), + transformation=scaling, + ) + return prior.sample(shape=(n_starts,), x_scaled=True) def sample_parameter_startpoints( @@ -127,14 +57,30 @@ def sample_parameter_startpoints( Array of sampled starting points with dimensions `n_startpoints` x `n_optimization_parameters` """ + from .priors import Prior + if seed is not None: np.random.seed(seed) - # get types and parameters of priors from dataframe - prior_list = parameters.get_priors_from_df( - parameter_df, mode=INITIALIZATION, parameter_ids=parameter_ids - ) + par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] - startpoints = [sample_from_prior(prior, n_starts) for prior in prior_list] + if parameter_ids is not None: + try: + par_to_estimate = par_to_estimate.loc[parameter_ids, :] + except KeyError as e: + missing_ids = set(parameter_ids) - set(par_to_estimate.index) + raise KeyError( + "Parameter table does not contain estimated parameter(s) " + f"{missing_ids}." + ) from e - return np.array(startpoints).T + # get types and parameters of priors from dataframe + return np.array( + [ + Prior.from_par_dict( + row, + type_="initialization", + ).sample(n_starts, x_scaled=True) + for row in par_to_estimate.to_dict("records") + ] + ).T diff --git a/petab/v1/sbml.py b/petab/v1/sbml.py index 0a8fd20f..b939e45e 100644 --- a/petab/v1/sbml.py +++ b/petab/v1/sbml.py @@ -43,12 +43,18 @@ def is_sbml_consistent( libsbml.LIBSBML_CAT_UNITS_CONSISTENCY, False ) - has_problems = sbml_document.checkConsistency() - if has_problems: + has_issues = sbml_document.checkConsistency() + + # we only have an issue with errors or fatals + has_problems = sbml_document.getNumErrors( + libsbml.LIBSBML_SEV_ERROR + ) + sbml_document.getNumErrors(libsbml.LIBSBML_SEV_FATAL) + if has_issues: log_sbml_errors(sbml_document) - logger.warning( - "WARNING: Generated invalid SBML model. Check messages above." - ) + if has_problems: + logger.warning( + "WARNING: Generated invalid SBML model. Check messages above." + ) return not has_problems @@ -163,6 +169,7 @@ def write_sbml(sbml_doc: libsbml.SBMLDocument, filename: Path | str) -> None: sbml_doc: SBML document containing the SBML model filename: Destination file name """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) sbml_writer = libsbml.SBMLWriter() ret = sbml_writer.writeSBMLToFile(sbml_doc, str(filename)) if not ret: @@ -252,9 +259,9 @@ def get_model_for_condition( condition_dict = {petab.SIMULATION_CONDITION_ID: sim_condition_id} if preeq_condition_id: - condition_dict[ - petab.PREEQUILIBRATION_CONDITION_ID - ] = preeq_condition_id + condition_dict[petab.PREEQUILIBRATION_CONDITION_ID] = ( + preeq_condition_id + ) cur_measurement_df = petab.measurements.get_rows_for_condition( measurement_df=petab_problem.measurement_df, condition=condition_dict, diff --git a/petab/v1/simplify.py b/petab/v1/simplify.py index c4cdeb91..78c039b1 100644 --- a/petab/v1/simplify.py +++ b/petab/v1/simplify.py @@ -1,4 +1,5 @@ """Functionality for simplifying PEtab problems""" + from math import nan import pandas as pd diff --git a/petab/v1/simulate.py b/petab/v1/simulate.py index 682c470f..46001a72 100644 --- a/petab/v1/simulate.py +++ b/petab/v1/simulate.py @@ -1,4 +1,5 @@ """PEtab simulator base class and related functions.""" + from __future__ import annotations import abc @@ -240,20 +241,39 @@ def sample_noise( simulated_value, ) - # default noise distribution is petab.C.NORMAL - noise_distribution = petab_problem.observable_df.loc[ + observable_row = petab_problem.observable_df.loc[ measurement_row[petab.C.OBSERVABLE_ID] - ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) + ] + # default noise distribution is petab.C.NORMAL + noise_distribution = observable_row.get( + petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL + ) # an empty noise distribution column in an observables table can result in # `noise_distribution == float('nan')` if pd.isna(noise_distribution): noise_distribution = petab.C.NORMAL + observable_transformation = observable_row.get( + petab.C.OBSERVABLE_TRANSFORMATION, petab.C.LIN + ) + transform = lambda x: x # noqa: E731 + # observableTransformation=log -> the log of the simulated value is + # distributed according to `noise_distribution` + if observable_transformation == petab.C.LOG: + simulated_value = np.log(simulated_value) + transform = np.exp + elif observable_transformation == petab.C.LOG10: + simulated_value = np.log10(simulated_value) + transform = lambda x: np.power(10, x) # noqa: E731 + # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)` simulated_value_with_noise = getattr(rng, noise_distribution)( loc=simulated_value, scale=noise_value * noise_scaling_factor ) + # apply observable transformation, ensure `float` type + simulated_value_with_noise = float(transform(simulated_value_with_noise)) + if zero_bounded and np.sign(simulated_value) != np.sign( simulated_value_with_noise ): diff --git a/petab/v1/visualize/__init__.py b/petab/v1/visualize/__init__.py index 924be86a..15385697 100644 --- a/petab/v1/visualize/__init__.py +++ b/petab/v1/visualize/__init__.py @@ -6,6 +6,7 @@ ``import petab.visualize``. """ + # ruff: noqa: F401 import importlib.util diff --git a/petab/v1/visualize/cli.py b/petab/v1/visualize/cli.py index 72074936..1416cae0 100644 --- a/petab/v1/visualize/cli.py +++ b/petab/v1/visualize/cli.py @@ -1,4 +1,5 @@ """Command-line interface for visualization.""" + import argparse from pathlib import Path diff --git a/petab/v1/visualize/data_overview.py b/petab/v1/visualize/data_overview.py index 349b503c..41f22ed2 100644 --- a/petab/v1/visualize/data_overview.py +++ b/petab/v1/visualize/data_overview.py @@ -69,7 +69,7 @@ def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: my_measurements[PREEQUILIBRATION_CONDITION_ID] = ( my_measurements[PREEQUILIBRATION_CONDITION_ID] .astype("object") - .fillna("", inplace=True) + .fillna("") ) index.append(PREEQUILIBRATION_CONDITION_ID) diff --git a/petab/v1/visualize/helper_functions.py b/petab/v1/visualize/helper_functions.py index b1a6f1b1..85b5d936 100644 --- a/petab/v1/visualize/helper_functions.py +++ b/petab/v1/visualize/helper_functions.py @@ -4,7 +4,6 @@ hence not be directly visible/usable when using `import petab.visualize`. """ - import pandas as pd from ..C import * diff --git a/petab/v1/visualize/lint.py b/petab/v1/visualize/lint.py index b5de74bc..29ea4f7d 100644 --- a/petab/v1/visualize/lint.py +++ b/petab/v1/visualize/lint.py @@ -1,4 +1,5 @@ """Validation of PEtab visualization files""" + from __future__ import annotations import logging diff --git a/petab/v1/visualize/plot_data_and_simulation.py b/petab/v1/visualize/plot_data_and_simulation.py index c76bcd43..5ca8c6fb 100644 --- a/petab/v1/visualize/plot_data_and_simulation.py +++ b/petab/v1/visualize/plot_data_and_simulation.py @@ -2,7 +2,6 @@ the same format. """ - import matplotlib.pyplot as plt import pandas as pd @@ -73,7 +72,7 @@ def plot_with_vis_spec( plotter = MPLPlotter(figure, dataprovider) else: raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." + "Currently, only visualization with matplotlib is possible." ) return plotter.generate_figure(subplot_dir, format_=format_) @@ -150,7 +149,7 @@ def plot_without_vis_spec( plotter = MPLPlotter(figure, dataprovider) else: raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." + "Currently, only visualization with matplotlib is possible." ) return plotter.generate_figure(subplot_dir, format_=format_) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 90298154..a1f2ec9b 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -1,6 +1,7 @@ """ Functions for plotting residuals. """ + from pathlib import Path import matplotlib @@ -133,7 +134,9 @@ def plot_goodness_of_fit( petab_problem: Problem, simulations_df: str | Path | pd.DataFrame, size: tuple = (10, 7), + color=None, ax: plt.Axes | None = None, + normalized_error: bool = True, ) -> matplotlib.axes.Axes: """ Plot goodness of fit. @@ -147,8 +150,15 @@ def plot_goodness_of_fit( output data file. size: Figure size. + color: + The marker colors, matches the `c` parameter of + `matplotlib.pyplot.scatter`. ax: Axis object. + normalized_error: + Type of error to display. + If True, mean of squared normalized residuals is shown, + otherwise mean of squared residuals. Returns ------- @@ -163,15 +173,29 @@ def plot_goodness_of_fit( "are needed for goodness_of_fit" ) - residual_df = calculate_residuals( - measurement_dfs=petab_problem.measurement_df, - simulation_dfs=simulations_df, - observable_dfs=petab_problem.observable_df, - parameter_dfs=petab_problem.parameter_df, - )[0] + if normalized_error: + residual_df = calculate_residuals( + measurement_dfs=petab_problem.measurement_df, + simulation_dfs=simulations_df, + observable_dfs=petab_problem.observable_df, + parameter_dfs=petab_problem.parameter_df, + normalize=True, + )[0] + error_name = "mean of squared\nnormalized residuals" + else: + residual_df = calculate_residuals( + measurement_dfs=petab_problem.measurement_df, + simulation_dfs=simulations_df, + observable_dfs=petab_problem.observable_df, + parameter_dfs=petab_problem.parameter_df, + normalize=False, + )[0] + error_name = "mean of squared residuals" + error = np.mean(np.power(residual_df["residual"], 2)) + slope, intercept, r_value, p_value, std_err = stats.linregress( - petab_problem.measurement_df["measurement"], simulations_df["simulation"], + petab_problem.measurement_df["measurement"], ) # x, y if ax is None: @@ -179,8 +203,9 @@ def plot_goodness_of_fit( fig.set_layout_engine("tight") ax.scatter( - petab_problem.measurement_df["measurement"], simulations_df["simulation"], + petab_problem.measurement_df["measurement"], + c=color, ) ax.axis("square") @@ -193,19 +218,18 @@ def plot_goodness_of_fit( ax.plot(x, x, linestyle="--", color="gray") ax.plot(x, intercept + slope * x, "r", label="fitted line") - mse = np.mean(np.abs(residual_df["residual"])) ax.text( 0.1, 0.70, f"$R^2$: {r_value**2:.2f}\n" f"slope: {slope:.2f}\n" f"intercept: {intercept:.2f}\n" - f"pvalue: {std_err:.2e}\n" - f"mean squared error: {mse:.2e}\n", + f"p-value: {p_value:.2e}\n" + f"{error_name}: {error:.2e}\n", transform=ax.transAxes, ) ax.set_title("Goodness of fit") - ax.set_xlabel("simulated values") - ax.set_ylabel("measurements") + ax.set_xlabel("Simulated value") + ax.set_ylabel("Measurement") return ax diff --git a/petab/v1/visualize/plotter.py b/petab/v1/visualize/plotter.py index 2a1eaaa9..14af5650 100644 --- a/petab/v1/visualize/plotter.py +++ b/petab/v1/visualize/plotter.py @@ -1,4 +1,5 @@ """PEtab visualization plotter classes""" + import os from abc import ABC, abstractmethod diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py index b607350b..8ff813a3 100644 --- a/petab/v1/visualize/plotting.py +++ b/petab/v1/visualize/plotting.py @@ -1,4 +1,5 @@ """PEtab visualization data selection and visualization settings classes""" + import warnings from numbers import Number, Real from pathlib import Path @@ -609,9 +610,9 @@ def get_data_series( isinstance(tmp_noise, Number) or tmp_noise.dtype == "float64" ): - measurements_to_plot.at[ - var_cond_id, "noise_model" - ] = tmp_noise + measurements_to_plot.at[var_cond_id, "noise_model"] = ( + tmp_noise + ) # standard error of mean measurements_to_plot.at[var_cond_id, "sem"] = np.std( @@ -619,9 +620,9 @@ def get_data_series( ) / np.sqrt(len(data_measurements)) # single replicates - measurements_to_plot.at[ - var_cond_id, "repl" - ] = data_measurements.values + measurements_to_plot.at[var_cond_id, "repl"] = ( + data_measurements.values + ) data_series = DataSeries(conditions_, measurements_to_plot) data_series.add_offsets(dataplot.xOffset, dataplot.yOffset) @@ -832,9 +833,10 @@ def parse_from_id_list( :: - dataset_ids_per_plot = [['dataset_1', 'dataset_2'], - ['dataset_1', 'dataset_4', - 'dataset_5']] + dataset_ids_per_plot = [ + ["dataset_1", "dataset_2"], + ["dataset_1", "dataset_4", "dataset_5"], + ] or @@ -859,9 +861,15 @@ def parse_from_id_list( """ if ids_per_plot is None: # this is the default case. If no grouping is specified, - # all observables are plotted. One observable per plot. - unique_obs_list = self._data_df[OBSERVABLE_ID].unique() - ids_per_plot = [[obs_id] for obs_id in unique_obs_list] + # each group_by category will be plotted on a separate plot + unique_ids_list = self._data_df[ + { + "dataset": DATASET_ID, + "observable": OBSERVABLE_ID, + "simulation": SIMULATION_CONDITION_ID, + }[group_by] + ].unique() + ids_per_plot = [[id_] for id_ in unique_ids_list] if group_by == "dataset" and DATASET_ID not in self._data_df: raise ValueError( @@ -964,7 +972,7 @@ def _get_vis_spec_dependent_columns_dict( # get number of plots and create plotId-lists plot_id_column = [ - "plot%s" % str(ind + 1) + f"plot{ind + 1}" for ind, inner_list in enumerate(dataset_id_list) for _ in inner_list ] diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index ecffc48e..cefc594c 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -1,4 +1,5 @@ """Code regarding the PEtab YAML config files""" + from __future__ import annotations import os @@ -12,15 +13,15 @@ import yaml from pandas.io.common import get_handle +from ..versions import parse_version from .C import * # noqa: F403 # directory with PEtab yaml schema files SCHEMA_DIR = Path(__file__).parent.parent / "schemas" # map of version number to validation schema SCHEMAS = { - "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml", + (1, 0): SCHEMA_DIR / "petab_schema.v1.0.0.yaml", + (2, 0): SCHEMA_DIR / "petab_schema.v2.0.0.yaml", } __all__ = [ @@ -71,17 +72,19 @@ def validate_yaml_syntax( yaml_config = load_yaml(yaml_config) if schema is None: - # try get PEtab version from yaml file + # try to get PEtab version from the yaml file # if this is not the available, the file is not valid anyways, # but let's still use the latest PEtab schema for full validation + version = yaml_config.get(FORMAT_VERSION, None) version = ( - yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1] + parse_version(version)[:2] if version else list(SCHEMAS.keys())[-1] ) + try: - schema = SCHEMAS[str(version)] + schema = SCHEMAS[version] except KeyError as e: raise ValueError( - "Unknown PEtab version given in problem " + "No or unknown PEtab version given in problem " f"specification: {version}" ) from e schema = load_yaml(schema) @@ -234,10 +237,12 @@ def write_yaml(yaml_config: dict[str, Any], filename: str | Path) -> None: Arguments: yaml_config: Data to write - filename: File to create + filename: Destination file name. The parent directory will be created + if necessary. """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) with open(filename, "w") as outfile: - yaml.dump( + yaml.safe_dump( yaml_config, outfile, default_flow_style=False, sort_keys=False ) diff --git a/petab/v2/C.py b/petab/v2/C.py index 11fede25..e640ae5c 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -2,6 +2,7 @@ """ This file contains constant definitions. """ + import math as _math import sys @@ -10,11 +11,8 @@ #: Observable ID column in the observable and measurement tables OBSERVABLE_ID = "observableId" -#: Preequilibration condition ID column in the measurement table -PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" - -#: Simulation condition ID column in the measurement table -SIMULATION_CONDITION_ID = "simulationConditionId" +#: Experiment ID column in the measurement table +EXPERIMENT_ID = "experimentId" #: Measurement value column in the measurement table MEASUREMENT = "measurement" @@ -25,6 +23,9 @@ #: Time value that indicates steady-state measurements TIME_STEADY_STATE = _math.inf +#: Time value that indicates pre-equilibration in the experiments table +TIME_PREEQUILIBRATION = -_math.inf + #: Observable parameters column in the measurement table OBSERVABLE_PARAMETERS = "observableParameters" @@ -37,21 +38,24 @@ #: Replicate ID column in the measurement table REPLICATE_ID = "replicateId" +#: The model ID column in the measurement table +MODEL_ID = "modelId" + #: Mandatory columns of measurement table MEASUREMENT_DF_REQUIRED_COLS = [ OBSERVABLE_ID, - SIMULATION_CONDITION_ID, + EXPERIMENT_ID, MEASUREMENT, TIME, ] #: Optional columns of measurement table MEASUREMENT_DF_OPTIONAL_COLS = [ - PREEQUILIBRATION_CONDITION_ID, OBSERVABLE_PARAMETERS, NOISE_PARAMETERS, DATASET_ID, REPLICATE_ID, + MODEL_ID, ] #: Measurement table columns @@ -69,8 +73,6 @@ PARAMETER_ID = "parameterId" #: Parameter name column in the parameter table PARAMETER_NAME = "parameterName" -#: Parameter scale column in the parameter table -PARAMETER_SCALE = "parameterScale" #: Lower bound column in the parameter table LOWER_BOUND = "lowerBound" #: Upper bound column in the parameter table @@ -79,19 +81,14 @@ NOMINAL_VALUE = "nominalValue" #: Estimate column in the parameter table ESTIMATE = "estimate" -#: Initialization prior type column in the parameter table -INITIALIZATION_PRIOR_TYPE = "initializationPriorType" -#: Initialization prior parameters column in the parameter table -INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters" -#: Objective prior type column in the parameter table -OBJECTIVE_PRIOR_TYPE = "objectivePriorType" -#: Objective prior parameters column in the parameter table -OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters" +#: Prior distribution type column in the parameter table +PRIOR_DISTRIBUTION = "priorDistribution" +#: Prior parameters column in the parameter table +PRIOR_PARAMETERS = "priorParameters" #: Mandatory columns of parameter table PARAMETER_DF_REQUIRED_COLS = [ PARAMETER_ID, - PARAMETER_SCALE, LOWER_BOUND, UPPER_BOUND, ESTIMATE, @@ -101,10 +98,8 @@ PARAMETER_DF_OPTIONAL_COLS = [ PARAMETER_NAME, NOMINAL_VALUE, - INITIALIZATION_PRIOR_TYPE, - INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_TYPE, - OBJECTIVE_PRIOR_PARAMETERS, + PRIOR_DISTRIBUTION, + PRIOR_PARAMETERS, ] #: Parameter table columns @@ -125,34 +120,51 @@ #: Condition ID column in the condition table CONDITION_ID = "conditionId" -#: Condition name column in the condition table -CONDITION_NAME = "conditionName" +#: Column in the condition table with the ID of an entity that is changed +TARGET_ID = "targetId" +#: Column in the condition table with the new value of the target entity +TARGET_VALUE = "targetValue" + +CONDITION_DF_COLS = [ + CONDITION_ID, + TARGET_ID, + TARGET_VALUE, +] +CONDITION_DF_REQUIRED_COLS = CONDITION_DF_COLS + +# EXPERIMENTS +EXPERIMENT_DF_REQUIRED_COLS = [ + EXPERIMENT_ID, + TIME, + CONDITION_ID, +] # OBSERVABLES -#: Observable name column in the observables table +#: Observable name column in the observable table OBSERVABLE_NAME = "observableName" -#: Observable formula column in the observables table +#: Observable formula column in the observable table OBSERVABLE_FORMULA = "observableFormula" -#: Noise formula column in the observables table +#: Observable placeholders column in the observable table +OBSERVABLE_PLACEHOLDERS = "observablePlaceholders" +#: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" -#: Observable transformation column in the observables table -OBSERVABLE_TRANSFORMATION = "observableTransformation" -#: Noise distribution column in the observables table +#: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" +#: Noise placeholders column in the observable table +NOISE_PLACEHOLDERS = "noisePlaceholders" -#: Mandatory columns of observables table +#: Mandatory columns of observable table OBSERVABLE_DF_REQUIRED_COLS = [ OBSERVABLE_ID, OBSERVABLE_FORMULA, NOISE_FORMULA, ] -#: Optional columns of observables table +#: Optional columns of observable table OBSERVABLE_DF_OPTIONAL_COLS = [ OBSERVABLE_NAME, - OBSERVABLE_TRANSFORMATION, NOISE_DISTRIBUTION, ] @@ -171,157 +183,59 @@ LOG = "log" #: Logarithmic base 10 transformation LOG10 = "log10" -#: Supported observable transformations -OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] # NOISE MODELS -#: Uniform distribution -UNIFORM = "uniform" -#: Uniform distribution on the parameter scale -PARAMETER_SCALE_UNIFORM = "parameterScaleUniform" -#: Normal distribution -NORMAL = "normal" -#: Normal distribution on the parameter scale -PARAMETER_SCALE_NORMAL = "parameterScaleNormal" + +#: Cauchy distribution. +CAUCHY = "cauchy" +#: Chi-squared distribution. +# FIXME: "chisquare" in PEtab and sbml-distrib, but usually "chi-squared" +CHI_SQUARED = "chisquare" +#: Exponential distribution. +EXPONENTIAL = "exponential" +#: Gamma distribution. +GAMMA = "gamma" #: Laplace distribution LAPLACE = "laplace" -#: Laplace distribution on the parameter scale -PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace" -#: Log-normal distribution -LOG_NORMAL = "logNormal" #: Log-Laplace distribution -LOG_LAPLACE = "logLaplace" +LOG_LAPLACE = "log-laplace" +#: Log-normal distribution +LOG_NORMAL = "log-normal" +#: Log-uniform distribution. +LOG_UNIFORM = "log-uniform" +#: Normal distribution +NORMAL = "normal" +#: Rayleigh distribution. +RAYLEIGH = "rayleigh" +#: Uniform distribution +UNIFORM = "uniform" -#: Supported prior types -PRIOR_TYPES = [ - UNIFORM, - NORMAL, +#: Supported prior distribution types +PRIOR_DISTRIBUTIONS = [ + CAUCHY, + CHI_SQUARED, + EXPONENTIAL, + GAMMA, LAPLACE, - LOG_NORMAL, LOG_LAPLACE, - PARAMETER_SCALE_UNIFORM, - PARAMETER_SCALE_NORMAL, - PARAMETER_SCALE_LAPLACE, -] - -#: Supported noise distributions -NOISE_MODELS = [NORMAL, LAPLACE] - - -# VISUALIZATION - -#: Plot ID column in the visualization table -PLOT_ID = "plotId" -#: Plot name column in the visualization table -PLOT_NAME = "plotName" -#: Value for plot type 'simulation' in the visualization table -PLOT_TYPE_SIMULATION = "plotTypeSimulation" -#: Value for plot type 'data' in the visualization table -PLOT_TYPE_DATA = "plotTypeData" -#: X values column in the visualization table -X_VALUES = "xValues" -#: X offset column in the visualization table -X_OFFSET = "xOffset" -#: X label column in the visualization table -X_LABEL = "xLabel" -#: X scale column in the visualization table -X_SCALE = "xScale" -#: Y values column in the visualization table -Y_VALUES = "yValues" -#: Y offset column in the visualization table -Y_OFFSET = "yOffset" -#: Y label column in the visualization table -Y_LABEL = "yLabel" -#: Y scale column in the visualization table -Y_SCALE = "yScale" -#: Legend entry column in the visualization table -LEGEND_ENTRY = "legendEntry" - -#: Mandatory columns of visualization table -VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID] - -#: Optional columns of visualization table -VISUALIZATION_DF_OPTIONAL_COLS = [ - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_VALUES, - X_OFFSET, - X_LABEL, - X_SCALE, - Y_VALUES, - Y_OFFSET, - Y_LABEL, - Y_SCALE, - LEGEND_ENTRY, - DATASET_ID, -] - -#: Visualization table columns -VISUALIZATION_DF_COLS = [ - *VISUALIZATION_DF_REQUIRED_COLS, - *VISUALIZATION_DF_OPTIONAL_COLS, -] - -#: Visualization table columns that contain subplot specifications -VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [ - PLOT_ID, - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_LABEL, - X_SCALE, - Y_LABEL, - Y_SCALE, -] - -#: Visualization table columns that contain single plot specifications -VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [ - X_VALUES, - X_OFFSET, - Y_VALUES, - Y_OFFSET, - LEGEND_ENTRY, - DATASET_ID, + LOG_NORMAL, + LOG_UNIFORM, + NORMAL, + RAYLEIGH, + UNIFORM, ] -#: Plot type value in the visualization table for line plot -LINE_PLOT = "LinePlot" -#: Plot type value in the visualization table for bar plot -BAR_PLOT = "BarPlot" -#: Plot type value in the visualization table for scatter plot -SCATTER_PLOT = "ScatterPlot" -#: Supported plot types -PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT] - -#: Supported xScales -X_SCALES = [LIN, LOG, LOG10] - -#: Supported yScales -Y_SCALES = [LIN, LOG, LOG10] - - -#: Plot type "data" value in the visualization table for mean and standard -# deviation -MEAN_AND_SD = "MeanAndSD" -#: Plot type "data" value in the visualization table for mean and standard -# error -MEAN_AND_SEM = "MeanAndSEM" -#: Plot type "data" value in the visualization table for replicates -REPLICATE = "replicate" -#: Plot type "data" value in the visualization table for provided noise values -PROVIDED = "provided" -#: Supported settings for handling replicates -PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED] +#: Supported noise distributions +NOISE_DISTRIBUTIONS = [NORMAL, LAPLACE, LOG_NORMAL, LOG_LAPLACE] # YAML #: PEtab version key in the YAML file FORMAT_VERSION = "format_version" -#: Parameter file key in the YAML file -PARAMETER_FILE = "parameter_file" +#: Parameter files key in the YAML file +PARAMETER_FILES = "parameter_files" #: Problems key in the YAML file PROBLEMS = "problems" #: Model files key in the YAML file @@ -332,6 +246,8 @@ MODEL_LANGUAGE = "language" #: Condition files key in the YAML file CONDITION_FILES = "condition_files" +#: Experiment files key in the YAML file +EXPERIMENT_FILES = "experiment_files" #: Measurement files key in the YAML file MEASUREMENT_FILES = "measurement_files" #: Observable files key in the YAML file @@ -350,6 +266,9 @@ PETAB_ENTITY_ID = "petabEntityId" #: Model entity ID column in the mapping table MODEL_ENTITY_ID = "modelEntityId" +#: Arbitrary name +NAME = "name" + #: Required columns of the mapping table MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] @@ -357,14 +276,15 @@ #: Simulated value column in the simulation table SIMULATION = "simulation" -#: Residual value column in the residuals table +#: Residual value column in the residual table RESIDUAL = "residual" -#: ??? -NOISE_VALUE = "noiseValue" #: separator for multiple parameter values (bounds, observableParameters, ...) PARAMETER_SEPARATOR = ";" +#: The time symbol for use in any PEtab-specific mathematical expressions +TIME_SYMBOL = "time" + __all__ = [ x diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 98084fa5..a9f018c5 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -2,15 +2,38 @@ Contains all functionality related to handling PEtab 2.0 problems. """ -from warnings import warn -from ..v1 import * # noqa: F403, F401, E402 +# TODO: move this module to v2 +from petab.v1.distributions import * # noqa: F401, E402 +from petab.v1.mapping import ( # noqa: F403, F401, E402 + get_mapping_df, + write_mapping_df, +) +from petab.v1.measurements import ( # noqa: F401, E402 + get_measurement_df, + write_measurement_df, +) +from petab.v1.observables import ( # noqa: F401, E402 + get_observable_df, + write_observable_df, +) +from petab.v1.parameters import ( # noqa: F401, E402 + get_parameter_df, + write_parameter_df, +) +from petab.v1.yaml import load_yaml # noqa: F401, E402 # import after v1 -from .problem import Problem # noqa: F401 - -warn( - "Support for PEtab2.0 and all of petab.v2 is experimental " - "and subject to changes!", - stacklevel=1, +from ..version import __version__ # noqa: F401, E402 +from . import ( # noqa: F401, E402 + C, # noqa: F401, E402 + models, # noqa: F401, E402 +) +from .conditions import * # noqa: F403, F401, E402 +from .core import * # noqa: F401, E402 +from .experiments import ( # noqa: F401, E402 + get_experiment_df, + write_experiment_df, ) +from .lint import lint_problem # noqa: F401, E402 +from .models import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, Model # noqa: F401, E402 diff --git a/petab/v2/_helpers.py b/petab/v2/_helpers.py new file mode 100644 index 00000000..3201769a --- /dev/null +++ b/petab/v2/_helpers.py @@ -0,0 +1,3 @@ +"""Various internal helper functions.""" + +from ..v1.core import to_float_if_float # noqa: F401, E402 diff --git a/petab/v2/calculate.py b/petab/v2/calculate.py new file mode 100644 index 00000000..854e51c9 --- /dev/null +++ b/petab/v2/calculate.py @@ -0,0 +1,484 @@ +"""Functions performing various calculations.""" + +import numbers +import operator +from functools import reduce + +import numpy as np +import pandas as pd +import sympy as sp + +from petab.v1 import is_empty, split_parameter_replacement_list + +from .C import * +from .math import sympify_petab + +__all__ = [ + "calculate_residuals", + "calculate_residuals_for_table", + "get_symbolic_noise_formulas", + "evaluate_noise_formula", + "calculate_chi2", + "calculate_chi2_for_table_from_residuals", + "calculate_llh", + "calculate_llh_for_table", + "calculate_single_llh", +] + + +def calculate_residuals( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> list[pd.DataFrame]: + """Calculate residuals. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + List of DataFrames in the same structure as `measurement_dfs` + with a field `residual` instead of measurement. + """ + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + residual_dfs = [] + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + strict=True, + ): + residual_df = calculate_residuals_for_table( + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize, + scale, + ) + residual_dfs.append(residual_df) + return residual_dfs + + +def calculate_residuals_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> pd.DataFrame: + """ + Calculate residuals for a single measurement table. + For the arguments, see `calculate_residuals`. + """ + from petab.v1 import scale + + # below, we rely on a unique index + measurement_df = measurement_df.reset_index(drop=True) + + # create residual df as copy of measurement df, change column + residual_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: RESIDUAL} + ) + residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") + # matching columns + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for irow, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + # look up in simulation df + masks = [ + (simulation_df[col] == row[col]) | is_empty(row[col]) + for col in compared_cols + ] + mask = reduce(operator.and_, masks) + if mask.sum() == 0: + raise ValueError( + f"Could not find simulation for measurement {row}." + ) + # if we have multiple matches, check that the rows are all identical + elif ( + mask.sum() > 1 + and simulation_df.loc[mask].drop_duplicates().shape[0] > 1 + ): + raise ValueError( + f"Multiple different simulations found for measurement " + f"{row}:\n{simulation_df.loc[mask]}" + ) + + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + if scale: + # apply scaling + observable = observable_df.loc[row[OBSERVABLE_ID]] + # for v2, the transformation is part of the noise distribution + noise_distr = observable.get(NOISE_DISTRIBUTION, NORMAL) + if noise_distr.startswith("log-"): + trafo = LOG + elif noise_distr.startswith("log10-"): + trafo = LOG10 + else: + trafo = LIN + + # scale simulation and measurement + + scaled_simulation = scale(simulation, trafo) + scaled_measurement = scale(measurement, trafo) + + # non-normalized residual is just the difference + residual = scaled_measurement - scaled_simulation + + if normalize: + # divide by standard deviation + residual /= evaluate_noise_formula( + row, noise_formulas, parameter_df, simulation, observable + ) + + # fill in value + residual_df.loc[irow, RESIDUAL] = residual + return residual_df + + +def get_symbolic_noise_formulas(observable_df) -> dict[str, sp.Expr]: + """Sympify noise formulas. + + Arguments: + observable_df: The observable table. + + Returns: + Dictionary of {observable_id}: {noise_formula}. + """ + noise_formulas = {} + # iterate over observables + for observable_id, row in observable_df.iterrows(): + noise_formulas[observable_id] = ( + sympify_petab(row.noiseFormula) if NOISE_FORMULA in row else None + ) + return noise_formulas + + +def evaluate_noise_formula( + measurement: pd.Series, + noise_formulas: dict[str, sp.Expr], + parameter_df: pd.DataFrame, + simulation: numbers.Number, + observable: dict, +) -> float: + """Fill in parameters for `measurement` and evaluate noise_formula. + + Arguments: + measurement: A measurement table row. + noise_formulas: The noise formulas as computed by + `get_symbolic_noise_formulas`. + parameter_df: The parameter table. + simulation: The simulation corresponding to the measurement, scaled. + observable: The observable table row corresponding to the measurement. + + Returns: + The noise value. + """ + # the observable id + observable_id = measurement[OBSERVABLE_ID] + + # extract measurement specific overrides + observable_parameter_overrides = split_parameter_replacement_list( + measurement.get(OBSERVABLE_PARAMETERS, None) + ) + noise_parameter_overrides = split_parameter_replacement_list( + measurement.get(NOISE_PARAMETERS, None) + ) + observable_parameter_placeholders = observable.get( + OBSERVABLE_PLACEHOLDERS, "" + ).split(PARAMETER_SEPARATOR) + noise_parameter_placeholders = observable.get( + NOISE_PLACEHOLDERS, "" + ).split(PARAMETER_SEPARATOR) + + # fill in measurement specific parameters + overrides = { + sp.Symbol(placeholder, real=True): override + for placeholder, override in zip( + [ + p.strip() + for p in observable_parameter_placeholders + + noise_parameter_placeholders + if p.strip() + ], + observable_parameter_overrides + noise_parameter_overrides, + strict=False, + ) + } + + # fill in observables + overrides[sp.Symbol(observable_id, real=True)] = simulation + + # fill in general parameters + for row in parameter_df.itertuples(): + overrides[sp.Symbol(row.Index, real=True)] = row.nominalValue + + # replace parametric measurement specific parameters + for key, value in overrides.items(): + if not isinstance(value, numbers.Number): + # is parameter + overrides[key] = parameter_df.loc[value, NOMINAL_VALUE] + + # replace parameters by values in formula + noise_formula = noise_formulas[observable_id] + noise_value = noise_formula.subs(overrides) + + # conversion is possible if all parameters are replaced + try: + noise_value = float(noise_value) + except TypeError as e: + raise ValueError( + f"Cannot replace all parameters in noise formula {noise_value} " + f"for observable {observable_id}. " + f"Missing {noise_formula.free_symbols}. Note that model states " + "are currently not supported." + ) from e + return noise_value + + +def calculate_chi2( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> float: + """Calculate the chi2 value. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + The aggregated chi2 value. + """ + residual_dfs = calculate_residuals( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + normalize, + scale, + ) + chi2s = [ + calculate_chi2_for_table_from_residuals(df) for df in residual_dfs + ] + return float(sum(chi2s)) + + +def calculate_chi2_for_table_from_residuals( + residual_df: pd.DataFrame, +) -> float: + """Compute chi2 value for a single residual table.""" + return float((np.array(residual_df[RESIDUAL]) ** 2).sum()) + + +def calculate_llh( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, +) -> float: + """Calculate total log likelihood. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + + Returns: + The log-likelihood. + """ + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + llhs = [] + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + strict=True, + ): + _llh = calculate_llh_for_table( + measurement_df, simulation_df, observable_df, parameter_df + ) + llhs.append(_llh) + return float(sum(llhs)) + + +def calculate_llh_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, +) -> float: + """Calculate log-likelihood for one set of tables. For the arguments, see + `calculate_llh`. + """ + + llhs = [] + + # matching columns + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for _, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + + # look up in simulation df + masks = [ + (simulation_df[col] == row[col]) | is_empty(row[col]) + for col in compared_cols + ] + mask = reduce(lambda x, y: x & y, masks) + + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + + observable = observable_df.loc[row[OBSERVABLE_ID]] + + # get noise distribution + noise_distr = observable.get(NOISE_DISTRIBUTION, NORMAL) + + if noise_distr.startswith("log-"): + obs_scale = LOG + noise_distr = noise_distr.removeprefix("log-") + elif noise_distr.startswith("log10-"): + obs_scale = LOG10 + noise_distr = noise_distr.removeprefix("log10-") + else: + obs_scale = LIN + + # get noise standard deviation + noise_value = evaluate_noise_formula( + row, + noise_formulas, + parameter_df, + simulation, + observable, + ) + + llh = calculate_single_llh( + measurement, simulation, obs_scale, noise_distr, noise_value + ) + llhs.append(llh) + return float(sum(llhs)) + + +def calculate_single_llh( + measurement: float, + simulation: float, + scale: str, + noise_distribution: str, + noise_value: float, +) -> float: + """Calculate a single log likelihood. + + Arguments: + measurement: The measurement value. + simulation: The simulated value. + scale: The scale on which the noise model is to be applied. + noise_distribution: The noise distribution. + noise_value: The considered noise models possess a single noise + parameter, e.g. the normal standard deviation. + + Returns: + The computed likelihood for the given values. + """ + # PEtab v2: + if noise_distribution == LOG_NORMAL and scale == LIN: + noise_distribution = NORMAL + scale = LOG + + # short-hand + m, s, sigma = measurement, simulation, noise_value + pi, log, log10 = np.pi, np.log, np.log10 + + # go over the possible cases + if noise_distribution == NORMAL and scale == LIN: + nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2 + elif noise_distribution == NORMAL and scale == LOG: + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2) + + 0.5 * ((log(s) - log(m)) / sigma) ** 2 + ) + elif noise_distribution == NORMAL and scale == LOG10: + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2 + ) + elif noise_distribution == LAPLACE and scale == LIN: + nllh = log(2 * sigma) + abs((s - m) / sigma) + elif noise_distribution == LAPLACE and scale == LOG: + nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma) + elif noise_distribution == LAPLACE and scale == LOG10: + nllh = log(2 * sigma * m * log(10)) + abs( + (log10(s) - log10(m)) / sigma + ) + else: + raise NotImplementedError( + "Unsupported combination of noise_distribution and scale " + f"specified: {noise_distribution}, {scale}." + ) + return -nllh diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py new file mode 100644 index 00000000..deea1a0c --- /dev/null +++ b/petab/v2/conditions.py @@ -0,0 +1,48 @@ +"""Functions operating on the PEtab condition table""" + +from __future__ import annotations + +from pathlib import Path + +import pandas as pd + +from ..v1.lint import assert_no_leading_trailing_whitespace + +__all__ = [ + "get_condition_df", + "write_condition_df", +] + + +def get_condition_df( + condition_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame | None: + """Read the provided condition file into a ``pandas.Dataframe``. + + Arguments: + condition_file: File name of PEtab condition file or pandas.Dataframe + """ + if condition_file is None: + return condition_file + + if isinstance(condition_file, str | Path): + condition_file = pd.read_csv( + condition_file, sep="\t", float_precision="round_trip" + ) + + assert_no_leading_trailing_whitespace( + condition_file.columns.values, "condition" + ) + + return condition_file + + +def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab condition table + + Arguments: + df: PEtab condition table + filename: Destination file name + """ + df = get_condition_df(df) + df.to_csv(filename, sep="\t", index=False) diff --git a/petab/v2/converters.py b/petab/v2/converters.py new file mode 100644 index 00000000..1c9acfd8 --- /dev/null +++ b/petab/v2/converters.py @@ -0,0 +1,618 @@ +"""Conversion of PEtab problems.""" + +from __future__ import annotations + +import warnings +from copy import deepcopy + +import libsbml +import sympy as sp +from sbmlmath import sbml_math_to_sympy, set_math + +from .core import ( + Change, + Condition, + ConditionTable, + Experiment, + ExperimentPeriod, + Problem, +) +from .models._sbml_utils import add_sbml_parameter, check +from .models.sbml_model import SbmlModel + +__all__ = ["ExperimentsToSbmlConverter"] + + +class ExperimentsToSbmlConverter: + """Convert PEtab experiments to SBML. + + For an SBML-model-based PEtab problem, this class converts the PEtab + experiments to initial assignments and events as far as possible. + + If the model already contains events, PEtab events are added with a higher + priority than the existing events to guarantee that PEtab condition changes + are applied before any pre-existing assignments. + This requires that all event priorities in the original model are numeric + constants. + + The PEtab problem must not contain any identifiers starting with + ``_petab``. + + All periods and condition changes that are represented by initial + assignments or events will be removed from the condition table. + Each experiment will have at most one period with a start time of ``-inf`` + and one period with a finite start time. The associated changes with + these periods are only the pre-equilibration indicator + (if necessary), and the experiment indicator parameter. + """ + + #: ID of the parameter that indicates whether the model is in + # the pre-equilibration phase (1) or not (0). + PREEQ_INDICATOR = "_petab_preequilibration_indicator" + + #: The condition ID of the condition that sets the + #: pre-equilibration indicator to 1. + CONDITION_ID_PREEQ_ON = "_petab_preequilibration_on" + + #: The condition ID of the condition that sets the + #: pre-equilibration indicator to 0. + CONDITION_ID_PREEQ_OFF = "_petab_preequilibration_off" + + def __init__(self, problem: Problem, default_priority: float = None): + """Initialize the converter. + + :param problem: The PEtab problem to convert. + This will not be modified. + :param default_priority: The priority value to apply to any events that + preexist in the model and do not have a priority set. + + In SBML, for event assignments that are to be applied at the same + simulation time, the order of event execution is determined by the + priority of the respective events. + If no priority is set, the order is undefined. + See SBML specs for details. + To ensure that the PEtab condition-start-events are executed before + any other events, all events should have a priority set. + """ + if len(problem.models) > 1: + # https://github.com/PEtab-dev/libpetab-python/issues/392 + raise NotImplementedError( + "Only single-model PEtab problems are supported." + ) + if not isinstance(problem.model, SbmlModel): + raise ValueError("Only SBML models are supported.") + + self._original_problem = problem + self._new_problem = deepcopy(self._original_problem) + + self._model: libsbml.Model = self._new_problem.model.sbml_model + self._preeq_indicator = self.PREEQ_INDICATOR + + # The maximum event priority that was found in the unprocessed model. + self._max_event_priority = None + # The priority that will be used for the PEtab events. + self._petab_event_priority = None + self._default_priority = default_priority + self._preprocess() + + @staticmethod + def _get_experiment_indicator_condition_id(experiment_id: str) -> str: + """Get the condition ID for the experiment indicator parameter.""" + return f"_petab_experiment_condition_{experiment_id}" + + def _preprocess(self) -> None: + """Check whether we can handle the given problem and store some model + information.""" + model = self._model + if model.getLevel() < 3: + # try to upgrade the SBML model + if not model.getSBMLDocument().setLevelAndVersion(3, 2): + raise ValueError( + "Cannot handle SBML models with SBML level < 3, " + "because they do not support initial values for event " + "triggers and automatic upconversion of the model failed." + ) + + # Apply default priority to all events that do not have a priority + if self._default_priority is not None: + for event in model.getListOfEvents(): + if ( + not event.getPriority() + or event.getPriority().getMath() is None + ): + priority = event.createPriority() + priority.setMath( + libsbml.parseL3Formula(str(self._default_priority)) + ) + + # Collect event priorities + event_priorities = { + ev.getId() or str(ev): sbml_math_to_sympy(ev.getPriority()) + for ev in model.getListOfEvents() + if ev.getPriority() and ev.getPriority().getMath() is not None + } + + # Check for non-constant event priorities and track the maximum + # priority used so far. + for e, priority in event_priorities.items(): + if priority.free_symbols: + # We'd need to find the maximum priority of all events, + # which is challenging/impossible to do in general. + raise NotImplementedError( + f"Event `{e}` has a non-constant priority: {priority}. " + "This is currently not supported." + ) + self._max_event_priority = max( + self._max_event_priority or 0, float(priority) + ) + + self._petab_event_priority = ( + self._max_event_priority + 1 + if self._max_event_priority is not None + else None + ) + + for event in model.getListOfEvents(): + # Check for undefined event priorities and warn + if (prio := event.getPriority()) and prio.getMath() is None: + warnings.warn( + f"Event `{event.getId()}` has no priority set. " + "Make sure that this event cannot trigger at the time of " + "a PEtab condition change, otherwise the behavior is " + "undefined. To avoid this warning, see the " + "`default_priority` parameter of " + f"{self.__class__.__name__}.", + stacklevel=1, + ) + + # Check for useValuesFromTrigger time + if event.getUseValuesFromTriggerTime(): + # Non-PEtab-condition-change events must be executed *after* + # PEtab condition changes have been applied, based on the + # updated model state. This would be violated by + # useValuesFromTriggerTime=true. + warnings.warn( + f"Event `{event.getId()}` has " + "`useValuesFromTriggerTime=true'. " + "Make sure that this event cannot trigger at the time of " + "a PEtab condition change, or consider changing " + "`useValuesFromTriggerTime' to `false'. Otherwise " + "simulation results may be incorrect.", + stacklevel=1, + ) + + def convert(self) -> Problem: + """Convert the PEtab experiments to SBML events. + + :return: The converted PEtab problem. + """ + + self._add_preequilibration_indicator() + + for experiment in self._new_problem.experiments: + self._convert_experiment(experiment) + + self._add_indicators_to_conditions() + + validation_results = self._new_problem.validate() + validation_results.log() + + return self._new_problem + + def _convert_experiment(self, experiment: Experiment) -> None: + """ + Convert a single experiment to SBML events or initial assignments. + """ + model = self._model + experiment.sort_periods() + has_preequilibration = experiment.has_preequilibration + + # add experiment indicator + exp_ind_id = self.get_experiment_indicator(experiment.id) + if model.getElementBySId(exp_ind_id) is not None: + raise ValueError( + f"The model has entity with ID `{exp_ind_id}`. " + "IDs starting with `petab_` are reserved for " + f"{self.__class__.__name__} and should not be used in the " + "model." + ) + add_sbml_parameter(model, id_=exp_ind_id, constant=False, value=0) + kept_periods: list[ExperimentPeriod] = [] + # Collect values for initial assignments for the different experiments. + # All expressions must be combined into a single initial assignment + # per target. + # target_id -> [(experiment_indicator, target_value), ...] + period0_assignments: dict[str, list[tuple[str, sp.Basic]]] = {} + + for i_period, period in enumerate(experiment.sorted_periods): + if period.is_preequilibration: + # pre-equilibration cannot be represented in SBML, + # so we need to keep this period in the Problem. + kept_periods.append(period) + elif i_period == int(has_preequilibration): + # we always keep the first non-pre-equilibration period + # to set the indicator parameters + kept_periods.append(period) + elif not period.condition_ids: + # no condition, no changes, no need for an event, + # no need to keep the period unless it's the pre-equilibration + # or the only non-equilibration period (handled above) + continue + + # Encode the period changes in the SBML model as events + # that trigger at the start of the period or, + # for the first period, as initial assignments. + # Initial assignments are required for the first period, + # because other initial assignments may depend on + # the changed values. + # Additionally, tools that don't support events can still handle + # single-period experiments. + if i_period == 0: + exp_ind_id = self.get_experiment_indicator(experiment.id) + for change in self._new_problem.get_changes_for_period(period): + period0_assignments.setdefault( + change.target_id, [] + ).append((exp_ind_id, change.target_value)) + else: + ev = self._create_period_start_event( + experiment=experiment, + i_period=i_period, + period=period, + ) + self._create_event_assignments_for_period( + ev, + self._new_problem.get_changes_for_period(period), + ) + + # Create initial assignments for the first period + if period0_assignments: + free_symbols_in_assignments = set() + for target_id, changes in period0_assignments.items(): + # The initial value might only be changed for a subset of + # experiments. We need to keep the original initial value + # for all other experiments. + + # Is there an initial assignment for this target already? + # If not, fall back to the initial value of the target. + if ( + ia := model.getInitialAssignmentBySymbol(target_id) + ) is not None: + default = sbml_math_to_sympy(ia.getMath()) + else: + # use the initial value of the target as default + target = model.getElementBySId(target_id) + default = self._initial_value_from_element(target) + + # Only create the initial assignment if there is + # actually something to change. + if expr_cond_pairs := [ + (target_value, sp.Symbol(exp_ind) > 0.5) + for exp_ind, target_value in changes + if target_value != default + ]: + # Unlike events, we can't have different initial + # assignments for different experiments, so we need to + # combine all changes into a single piecewise + # expression. + + expr = sp.Piecewise( + *expr_cond_pairs, + (default, True), + ) + + # Create a new initial assignment if necessary, otherwise + # overwrite the existing one. + if ia is None: + ia = model.createInitialAssignment() + ia.setSymbol(target_id) + + set_math(ia, expr) + free_symbols_in_assignments |= expr.free_symbols + + # the target value may depend on parameters that are only + # introduced in the PEtab parameter table - those need + # to be added to the model + for sym in free_symbols_in_assignments: + if model.getElementBySId(sym.name) is None: + add_sbml_parameter( + model, id_=sym.name, constant=True, value=0 + ) + + if len(kept_periods) > 2: + raise AssertionError("Expected at most two periods to be kept.") + + # add conditions that set the indicator parameters + for period in kept_periods: + period.condition_ids = [ + self._get_experiment_indicator_condition_id(experiment.id), + self.CONDITION_ID_PREEQ_ON + if period.is_preequilibration + else self.CONDITION_ID_PREEQ_OFF, + ] + + experiment.periods = kept_periods + + @staticmethod + def _initial_value_from_element(target: libsbml.SBase) -> sp.Basic: + """Get the initial value of an SBML element. + + The value of the size attribute of compartments, + the initial concentration or amount of species (amount for + `hasOnlySubstanceUnits=true`, concentration otherwise), and + the value of parameters, not considering any initial assignment + constructs. + """ + if target is None: + raise ValueError("`target` is None.") + + if target.getTypeCode() == libsbml.SBML_COMPARTMENT: + return sp.Float(target.getSize()) + + if target.getTypeCode() == libsbml.SBML_SPECIES: + if target.getHasOnlySubstanceUnits(): + # amount-based -> return amount + if target.isSetInitialAmount(): + return sp.Float(target.getInitialAmount()) + return sp.Float(target.getInitialConcentration()) * sp.Symbol( + target.getCompartment() + ) + # concentration-based -> return concentration + if target.isSetInitialConcentration(): + return sp.Float(target.getInitialConcentration()) + + return sp.Float(target.getInitialAmount()) / sp.Symbol( + target.getCompartment() + ) + + if target.getTypeCode() == libsbml.SBML_PARAMETER: + return sp.Float(target.getValue()) + + raise NotImplementedError( + "Cannot create initial assignment for unsupported SBML " + f"entity type {target.getTypeCode()}." + ) + + def _create_period_start_event( + self, experiment: Experiment, i_period: int, period: ExperimentPeriod + ) -> libsbml.Event: + """Create an event that triggers at the start of a period.""" + + # TODO: for now, add separate events for each experiment x period, + # this could be optimized to reuse events + + ev = self._model.createEvent() + check(ev.setId(f"_petab_event_{experiment.id}_{i_period}")) + check(ev.setUseValuesFromTriggerTime(True)) + trigger = ev.createTrigger() + check(trigger.setInitialValue(False)) # may trigger at t=0 + check(trigger.setPersistent(True)) + if self._petab_event_priority is not None: + priority = ev.createPriority() + set_math(priority, self._petab_event_priority) + + exp_ind_id = self.get_experiment_indicator(experiment.id) + + # Create trigger expressions + # Since handling of == and !=, and distinguishing < and <= + # (and > and >=), is a bit tricky in terms of root-finding, + # we use these slightly more convoluted expressions. + # (assuming that the indicator parameters are {0, 1}) + if period.is_preequilibration: + trig_math = libsbml.parseL3Formula( + f"({exp_ind_id} > 0.5) && ({self._preeq_indicator} > 0.5)" + ) + else: + trig_math = libsbml.parseL3Formula( + f"({exp_ind_id} > 0.5) " + f"&& ({self._preeq_indicator} < 0.5) " + f"&& (time >= {period.time})" + ) + check(trigger.setMath(trig_math)) + + return ev + + def _add_preequilibration_indicator( + self, + ) -> None: + """Add an indicator parameter for the pre-equilibration to the SBML + model.""" + par_id = self._preeq_indicator + if self._model.getElementBySId(par_id) is not None: + raise ValueError( + f"Entity with ID {par_id} already exists in the SBML model." + ) + + # add the pre-steady-state indicator parameter + add_sbml_parameter(self._model, id_=par_id, value=0, constant=False) + + @staticmethod + def get_experiment_indicator(experiment_id: str) -> str: + """The ID of the experiment indicator parameter. + + The experiment indicator parameter is used to identify the + experiment in the SBML model. It is a parameter that is set + to 1 for the current experiment and 0 for all other + experiments. The parameter is used in the event trigger + to determine whether the event should be triggered. + + :param experiment_id: The ID of the experiment for which to create + the experiment indicator parameter ID. + """ + return f"_petab_experiment_indicator_{experiment_id}" + + @staticmethod + def _create_event_assignments_for_period( + event: libsbml.Event, changes: list[Change] + ) -> None: + """Create event assignments for a given period. + + Converts PEtab ``Change``s to equivalent SBML event assignments. + + Note that the SBML event assignment formula is not necessarily the same + as the `targetValue` in PEtab. + In SBML, concentrations are treated as derived quantities. + Therefore, changing the size of a compartment will update the + concentrations of all contained concentration-based species. + In PEtab, such a change would not automatically update the species + concentrations, but only the compartment size. + + Therefore, to correctly implement a PEtab change of a compartment size + in SBML, we need to compensate for the automatic update of species + concentrations by adding event assignments for all contained + concentration-based species. + + :param event: The SBML event to which the assignments should be added. + :param changes: The PEtab condition changes that are to be applied + at the start of the period. + """ + _add_assignment = ExperimentsToSbmlConverter._add_assignment + sbml_model = event.getModel() + # collect IDs of compartments that are changed in this period + changed_compartments = { + change.target_id + for change in changes + if sbml_model.getElementBySId(change.target_id) is not None + and sbml_model.getElementBySId(change.target_id).getTypeCode() + == libsbml.SBML_COMPARTMENT + } + + for change in changes: + sbml_target = sbml_model.getElementBySId(change.target_id) + + if sbml_target is None: + raise ValueError( + f"Cannot create event assignment for change of " + f"`{change.target_id}`: No such entity in the SBML model." + ) + + target_type = sbml_target.getTypeCode() + if target_type == libsbml.SBML_COMPARTMENT: + # handle the actual compartment size change + _add_assignment(event, change.target_id, change.target_value) + + # Changing a compartment size affects all contained + # concentration-based species - we need to add event + # assignments for those to compensate for the automatic + # update of their concentrations. + # The event assignment will set the concentration to + # new_conc = assigned_amount / new_volume + # = assigned_conc * old_volume / new_volume + # <=> assigned_conc = new_conc * new_volume / old_volume + # Therefore, the event assignment is not just `new_conc`, + # but `new_conc * new_volume / old_volume`. + + # concentration-based species in the changed compartment + conc_species = [ + species.getId() + for species in sbml_model.getListOfSpecies() + if species.getCompartment() == change.target_id + and not species.getHasOnlySubstanceUnits() + ] + for species_id in conc_species: + if species_change := next( + (c for c in changes if c.target_id == species_id), None + ): + # there is an explicit change for this species + # in this period + new_conc = species_change.target_value + else: + # no explicit change, use the pre-event concentration + new_conc = sp.Symbol(species_id) + + _add_assignment( + event, + species_id, + # new_conc * new_volume / old_volume + new_conc + * change.target_value + / sp.Symbol(change.target_id), + ) + elif ( + target_type != libsbml.SBML_SPECIES + or sbml_target.getCompartment() not in changed_compartments + or sbml_target.getHasOnlySubstanceUnits() is True + ): + # Handle any changes other than compartments and + # concentration-based species inside resized compartments + # that we already handled above. + # Those translate directly to event assignments. + _add_assignment(event, change.target_id, change.target_value) + + @staticmethod + def _add_assignment( + event: libsbml.Event, target_id: str, target_value: sp.Basic + ) -> None: + """Add a single event assignment to the given event + and apply any necessary changes to the model.""" + sbml_model = event.getModel() + ea = event.createEventAssignment() + ea.setVariable(target_id) + set_math(ea, target_value) + + # target needs const=False, and target may not exist yet + # (e.g., in case of output parameters added in the observable + # table) + target = sbml_model.getElementBySId(target_id) + if target is None: + add_sbml_parameter( + sbml_model, id_=target_id, constant=False, value=0 + ) + else: + # We can safely change the `constant` attribute of the target. + # "Constant" does not imply "boundary condition" in SBML. + target.setConstant(False) + + # the target value may depend on parameters that are only + # introduced in the PEtab parameter table - those need + # to be added to the model + for sym in target_value.free_symbols: + if sbml_model.getElementBySId(sym.name) is None: + add_sbml_parameter( + sbml_model, id_=sym.name, constant=True, value=0 + ) + + def _add_indicators_to_conditions(self) -> None: + """After converting the experiments to events, add the indicator + parameters for the pre-equilibration period and for the different + experiments to the remaining conditions. + Then remove all other conditions.""" + problem = self._new_problem + + # create conditions for indicator parameters + problem += Condition( + id=self.CONDITION_ID_PREEQ_ON, + changes=[Change(target_id=self._preeq_indicator, target_value=1)], + ) + + problem += Condition( + id=self.CONDITION_ID_PREEQ_OFF, + changes=[Change(target_id=self._preeq_indicator, target_value=0)], + ) + + # add conditions for the experiment indicators + for experiment in problem.experiments: + cond_id = self._get_experiment_indicator_condition_id( + experiment.id + ) + changes = [ + Change( + target_id=self.get_experiment_indicator(experiment.id), + target_value=1, + ) + ] + problem += Condition( + id=cond_id, + changes=changes, + ) + + # All changes have been encoded in event assignments and can be + # removed. Only keep the conditions setting our indicators. + problem.condition_tables = [ + ConditionTable( + [ + condition + for condition in problem.conditions + if condition.id.startswith("_petab") + ] + ) + ] diff --git a/petab/v2/core.py b/petab/v2/core.py new file mode 100644 index 00000000..fb206502 --- /dev/null +++ b/petab/v2/core.py @@ -0,0 +1,2557 @@ +"""Types around the PEtab object model.""" + +from __future__ import annotations + +import copy +import logging +import os +import tempfile +import traceback +from abc import abstractmethod +from collections.abc import Sequence +from enum import Enum +from itertools import chain +from math import nan +from numbers import Number +from pathlib import Path +from typing import ( + TYPE_CHECKING, + Annotated, + Any, + Generic, + Self, + TypeVar, + get_args, +) + +import numpy as np +import pandas as pd +import sympy as sp +from pydantic import ( + AfterValidator, + AnyUrl, + BaseModel, + BeforeValidator, + ConfigDict, + Field, + ValidationInfo, + field_serializer, + field_validator, + model_validator, +) + +from .._utils import _generate_path +from ..v1 import ( + validate_yaml_syntax, + yaml, +) +from ..v1.distributions import * +from ..v1.lint import is_valid_identifier +from ..v1.math import petab_math_str, sympify_petab +from ..v1.models.model import Model, model_factory +from ..v1.yaml import get_path_prefix +from ..versions import parse_version +from . import C, get_observable_df + +if TYPE_CHECKING: + from ..v2.lint import ValidationResultList, ValidationTask + + +__all__ = [ + "Problem", + "ProblemConfig", + "Observable", + "ObservableTable", + "NoiseDistribution", + "Change", + "Condition", + "ConditionTable", + "ExperimentPeriod", + "Experiment", + "ExperimentTable", + "Measurement", + "MeasurementTable", + "Mapping", + "MappingTable", + "Parameter", + "ParameterScale", + "ParameterTable", + "PriorDistribution", +] + +logger = logging.getLogger(__name__) + + +def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != -np.inf: + raise ValueError( + f"{info.field_name} value must be finite or -inf but got {v}" + ) + return v + + +def _is_finite_or_pos_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != np.inf: + raise ValueError( + f"{info.field_name} value must be finite or inf but got {v}" + ) + return v + + +def _not_nan(v: float, info: ValidationInfo) -> float: + if np.isnan(v): + raise ValueError(f"{info.field_name} value must not be nan.") + return v + + +def _convert_nan_to_none(v): + """Convert NaN or "" to None.""" + if isinstance(v, float) and np.isnan(v): + return None + if isinstance(v, str) and v == "": + return None + return v + + +def _valid_petab_id(v: str) -> str: + """Field validator for PEtab IDs.""" + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + +def _valid_petab_id_or_none(v: str) -> str | None: + """Field validator for optional PEtab IDs.""" + if not v: + return None + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + +class ParameterScale(str, Enum): + """Parameter scales. + + Parameter scales as used in the PEtab parameter table. + """ + + LIN = C.LIN + LOG = C.LOG + LOG10 = C.LOG10 + + +class NoiseDistribution(str, Enum): + """Noise distribution types. + + Noise distributions as used in the PEtab observable table. + """ + + #: Normal distribution + NORMAL = C.NORMAL + #: Laplace distribution + LAPLACE = C.LAPLACE + #: Log-normal distribution + LOG_NORMAL = C.LOG_NORMAL + #: Log-Laplace distribution + LOG_LAPLACE = C.LOG_LAPLACE + + +class PriorDistribution(str, Enum): + """Prior types. + + Prior types as used in the PEtab parameter table. + """ + + #: Cauchy distribution. + CAUCHY = C.CAUCHY + #: Chi-squared distribution. + CHI_SQUARED = C.CHI_SQUARED + #: Exponential distribution. + EXPONENTIAL = C.EXPONENTIAL + #: Gamma distribution. + GAMMA = C.GAMMA + #: Laplace distribution. + LAPLACE = C.LAPLACE + #: Log-Laplace distribution + LOG_LAPLACE = C.LOG_LAPLACE + #: Log-normal distribution. + LOG_NORMAL = C.LOG_NORMAL + #: Log-uniform distribution. + LOG_UNIFORM = C.LOG_UNIFORM + #: Normal distribution. + NORMAL = C.NORMAL + #: Rayleigh distribution. + RAYLEIGH = C.RAYLEIGH + #: Uniform distribution. + UNIFORM = C.UNIFORM + + +assert set(C.PRIOR_DISTRIBUTIONS) == {e.value for e in PriorDistribution}, ( + "PriorDistribution enum does not match C.PRIOR_DISTRIBUTIONS " + f"{set(C.PRIOR_DISTRIBUTIONS)} vs { {e.value for e in PriorDistribution} }" +) + +_prior_to_cls = { + PriorDistribution.CAUCHY: Cauchy, + PriorDistribution.CHI_SQUARED: ChiSquare, + PriorDistribution.EXPONENTIAL: Exponential, + PriorDistribution.GAMMA: Gamma, + PriorDistribution.LAPLACE: Laplace, + PriorDistribution.LOG_LAPLACE: Laplace, + PriorDistribution.LOG_NORMAL: Normal, + PriorDistribution.LOG_UNIFORM: LogUniform, + PriorDistribution.NORMAL: Normal, + PriorDistribution.RAYLEIGH: Rayleigh, + PriorDistribution.UNIFORM: Uniform, +} + +assert not (_mismatch := set(PriorDistribution) ^ set(_prior_to_cls)), ( + "PriorDistribution enum does not match _prior_to_cls. " + f"Mismatches: {_mismatch}" +) + + +T = TypeVar("T", bound=BaseModel) + + +class BaseTable(BaseModel, Generic[T]): + """Base class for PEtab tables.""" + + #: The table elements + elements: list[T] + #: The path to the table file, if applicable. + #: Relative to the base path, if the base path is set and rel_path is not + #: an absolute path. + rel_path: AnyUrl | Path | None = Field(exclude=True, default=None) + #: The base path for the table file, if applicable. + #: This is usually the directory of the PEtab YAML file. + base_path: AnyUrl | Path | None = Field(exclude=True, default=None) + + def __init__(self, elements: list[T] = None, **kwargs) -> None: + """Initialize the BaseTable with a list of elements.""" + if elements is None: + elements = [] + super().__init__(elements=elements, **kwargs) + + def __getitem__(self, id_: str) -> T: + """Get an element by ID. + + :param id_: The ID of the element to retrieve. + :return: The element with the given ID. + :raises KeyError: If no element with the given ID exists. + :raises NotImplementedError: + If the element type does not have an ID attribute. + """ + if "id" not in self._element_class().model_fields: + raise NotImplementedError( + f"__getitem__ is not implemented for {self.__class__.__name__}" + ) + + for element in self.elements: + if element.id == id_: + return element + + raise KeyError(f"{T.__name__} ID {id_} not found") + + @classmethod + @abstractmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> BaseTable[T]: + """Create a table from a DataFrame.""" + pass + + @abstractmethod + def to_df(self) -> pd.DataFrame: + """Convert the table to a DataFrame.""" + pass + + @classmethod + def from_tsv( + cls, file_path: str | Path, base_path: str | Path | None = None + ) -> BaseTable[T]: + """Create table from a TSV file.""" + df = pd.read_csv(_generate_path(file_path, base_path), sep="\t") + return cls.from_df(df, rel_path=file_path, base_path=base_path) + + def to_tsv(self, file_path: str | Path = None) -> None: + """Write the table to a TSV file.""" + df = self.to_df() + df.to_csv( + file_path or _generate_path(self.rel_path, self.base_path), + sep="\t", + index=not isinstance(df.index, pd.RangeIndex), + ) + + @classmethod + def _element_class(cls) -> type[T]: + """Get the class of the elements in the table.""" + return get_args(cls.model_fields["elements"].annotation)[0] + + def __add__(self, other: T) -> BaseTable[T]: + """Add an item to the table.""" + if not isinstance(other, self._element_class()): + raise TypeError( + f"Can only add {self._element_class().__name__} " + f"to {self.__class__.__name__}" + ) + return self.__class__(elements=self.elements + [other]) + + def __iadd__(self, other: T) -> BaseTable[T]: + """Add an item to the table in place.""" + if not isinstance(other, self._element_class()): + raise TypeError( + f"Can only add {self._element_class().__name__} " + f"to {self.__class__.__name__}" + ) + self.elements.append(other) + return self + + +class Observable(BaseModel): + """Observable definition.""" + + #: Observable ID. + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.OBSERVABLE_ID + ) + #: Observable name. + name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) + #: Observable formula. + formula: sp.Basic | None = Field(alias=C.OBSERVABLE_FORMULA, default=None) + #: Noise formula. + noise_formula: sp.Basic | None = Field(alias=C.NOISE_FORMULA, default=None) + #: Noise distribution. + noise_distribution: NoiseDistribution = Field( + alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL + ) + #: Placeholder symbols for the observable formula. + observable_placeholders: list[sp.Symbol] = Field( + alias=C.OBSERVABLE_PLACEHOLDERS, default=[] + ) + #: Placeholder symbols for the noise formula. + noise_placeholders: list[sp.Symbol] = Field( + alias=C.NOISE_PLACEHOLDERS, default=[] + ) + + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + extra="allow", + validate_assignment=True, + ) + + @field_validator( + "name", + "formula", + "noise_formula", + "noise_distribution", + mode="before", + ) + @classmethod + def _convert_nan_to_default(cls, v, info: ValidationInfo): + if isinstance(v, float) and np.isnan(v): + return cls.model_fields[info.field_name].default + return v + + @field_validator("formula", "noise_formula", mode="before") + @classmethod + def _sympify(cls, v): + if v is None or isinstance(v, sp.Basic): + return v + if isinstance(v, float) and np.isnan(v): + return None + + return sympify_petab(v) + + @field_validator( + "observable_placeholders", "noise_placeholders", mode="before" + ) + @classmethod + def _sympify_id_list(cls, v): + if v is None: + return [] + + if isinstance(v, float) and np.isnan(v): + return [] + + if isinstance(v, str): + v = v.split(C.PARAMETER_SEPARATOR) + elif not isinstance(v, Sequence): + v = [v] + + v = [pid.strip() for pid in v] + return [sympify_petab(_valid_petab_id(pid)) for pid in v if pid] + + +class ObservableTable(BaseTable[Observable]): + """PEtab observable table.""" + + @property + def observables(self) -> list[Observable]: + """List of observables.""" + return self.elements + + @classmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> ObservableTable: + """Create an ObservableTable from a DataFrame.""" + if df is None: + return cls(**kwargs) + + df = get_observable_df(df) + observables = [ + Observable(**row.to_dict()) + for _, row in df.reset_index().iterrows() + ] + return cls(observables, **kwargs) + + def to_df(self) -> pd.DataFrame: + """Convert the ObservableTable to a DataFrame.""" + records = self.model_dump(by_alias=True)["elements"] + for record in records: + obs = record[C.OBSERVABLE_FORMULA] + noise = record[C.NOISE_FORMULA] + record[C.OBSERVABLE_FORMULA] = petab_math_str(obs) + record[C.NOISE_FORMULA] = petab_math_str(noise) + record[C.OBSERVABLE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PLACEHOLDERS]) + ) + record[C.NOISE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PLACEHOLDERS]) + ) + return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) + + +class Change(BaseModel): + """A change to the model or model state. + + A change to the model or model state, corresponding to an individual + row of the PEtab condition table. + + >>> Change( + ... target_id="k1", + ... target_value="10", + ... ) # doctest: +NORMALIZE_WHITESPACE + Change(target_id='k1', target_value=10.0000000000000) + """ + + #: The ID of the target entity to change. + target_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.TARGET_ID + ) + #: The value to set the target entity to. + target_value: sp.Basic = Field(alias=C.TARGET_VALUE) + + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + use_enum_values=True, + extra="allow", + validate_assignment=True, + ) + + @field_validator("target_value", mode="before") + @classmethod + def _sympify(cls, v): + if v is None or isinstance(v, sp.Basic): + return v + if isinstance(v, float) and np.isnan(v): + return None + + return sympify_petab(v) + + +class Condition(BaseModel): + """A set of changes to the model or model state. + + A set of simultaneously occurring changes to the model or model state, + corresponding to a perturbation of the underlying system. This corresponds + to all rows of the PEtab condition table with the same condition ID. + + >>> Condition( + ... id="condition1", + ... changes=[ + ... Change( + ... target_id="k1", + ... target_value="10", + ... ) + ... ], + ... ) # doctest: +NORMALIZE_WHITESPACE + Condition(id='condition1', + changes=[Change(target_id='k1', target_value=10.0000000000000)]) + """ + + #: The condition ID. + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.CONDITION_ID + ) + #: The changes associated with this condition. + changes: list[Change] + + #: :meta private: + model_config = ConfigDict( + populate_by_name=True, extra="allow", validate_assignment=True + ) + + def __add__(self, other: Change) -> Condition: + """Add a change to the set.""" + if not isinstance(other, Change): + raise TypeError("Can only add Change to Condition") + return Condition(id=self.id, changes=self.changes + [other]) + + def __iadd__(self, other: Change) -> Condition: + """Add a change to the set in place.""" + if not isinstance(other, Change): + raise TypeError("Can only add Change to Condition") + self.changes.append(other) + return self + + +class ConditionTable(BaseTable[Condition]): + """PEtab condition table.""" + + @property + def conditions(self) -> list[Condition]: + """List of conditions.""" + return self.elements + + @classmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> ConditionTable: + """Create a ConditionTable from a DataFrame.""" + if df is None or df.empty: + return cls(**kwargs) + + conditions = [] + for condition_id, sub_df in df.groupby(C.CONDITION_ID): + changes = [Change(**row) for row in sub_df.to_dict("records")] + conditions.append(Condition(id=condition_id, changes=changes)) + + return cls(conditions, **kwargs) + + def to_df(self) -> pd.DataFrame: + """Convert the ConditionTable to a DataFrame.""" + records = [ + {C.CONDITION_ID: condition.id, **change.model_dump(by_alias=True)} + for condition in self.conditions + for change in condition.changes + ] + for record in records: + record[C.TARGET_VALUE] = ( + float(record[C.TARGET_VALUE]) + if record[C.TARGET_VALUE].is_number + else str(record[C.TARGET_VALUE]) + ) + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.CONDITION_DF_REQUIRED_COLS) + ) + + @property + def free_symbols(self) -> set[sp.Symbol]: + """Get all free symbols in the condition table. + + This includes all free symbols in the target values of the changes, + independently of whether it is referenced by any experiment, or + (indirectly) by any measurement. + """ + return set( + chain.from_iterable( + change.target_value.free_symbols + for condition in self.conditions + for change in condition.changes + if change.target_value is not None + ) + ) + + +class ExperimentPeriod(BaseModel): + """A period of a timecourse or experiment defined by a start time + and a list of condition IDs. + + This corresponds to a row of the PEtab experiment table. + """ + + #: The start time of the period in time units as defined in the model. + time: Annotated[float, AfterValidator(_is_finite_or_neg_inf)] = Field( + alias=C.TIME + ) + #: The IDs of the conditions to be applied at the start time. + condition_ids: list[str] = Field(default_factory=list) + + #: :meta private: + model_config = ConfigDict( + populate_by_name=True, extra="allow", validate_assignment=True + ) + + @field_validator("condition_ids", mode="before") + @classmethod + def _validate_ids(cls, condition_ids): + if condition_ids in [None, "", [], [""]]: + # unspecified, or "use-model-as-is" + return [] + + for condition_id in condition_ids: + # The empty condition ID for "use-model-as-is" has been handled + # above. Having a combination of empty and non-empty IDs is an + # error, since the targets of conditions to be combined must be + # disjoint. + if not is_valid_identifier(condition_id): + raise ValueError(f"Invalid {C.CONDITION_ID}: `{condition_id}'") + return condition_ids + + @property + def is_preequilibration(self) -> bool: + """Check if this period is a preequilibration period.""" + return self.time == C.TIME_PREEQUILIBRATION + + +class Experiment(BaseModel): + """An experiment or a timecourse defined by an ID and a set of different + periods. + + Corresponds to a group of rows of the PEtab experiment table with the same + experiment ID. + """ + + #: The experiment ID. + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.EXPERIMENT_ID + ) + #: The periods of the experiment. + periods: list[ExperimentPeriod] = [] + + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + extra="allow", + validate_assignment=True, + ) + + def __add__(self, other: ExperimentPeriod) -> Experiment: + """Add a period to the experiment.""" + if not isinstance(other, ExperimentPeriod): + raise TypeError("Can only add ExperimentPeriod to Experiment") + return Experiment(id=self.id, periods=self.periods + [other]) + + def __iadd__(self, other: ExperimentPeriod) -> Experiment: + """Add a period to the experiment in place.""" + if not isinstance(other, ExperimentPeriod): + raise TypeError("Can only add ExperimentPeriod to Experiment") + self.periods.append(other) + return self + + @property + def has_preequilibration(self) -> bool: + """Check if the experiment has preequilibration enabled.""" + return any(period.is_preequilibration for period in self.periods) + + @property + def sorted_periods(self) -> list[ExperimentPeriod]: + """Get the periods of the experiment sorted by time.""" + return sorted(self.periods, key=lambda period: period.time) + + def sort_periods(self) -> None: + """Sort the periods of the experiment by time.""" + self.periods.sort(key=lambda period: period.time) + + +class ExperimentTable(BaseTable[Experiment]): + """PEtab experiment table.""" + + @property + def experiments(self) -> list[Experiment]: + """List of experiments.""" + return self.elements + + @classmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> ExperimentTable: + """Create an ExperimentTable from a DataFrame.""" + if df is None: + return cls(**kwargs) + + experiments = [] + for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): + periods = [] + for timepoint in cur_exp_df[C.TIME].unique(): + condition_ids = [ + cid + for cid in cur_exp_df.loc[ + cur_exp_df[C.TIME] == timepoint, C.CONDITION_ID + ] + if not pd.isna(cid) + ] + periods.append( + ExperimentPeriod( + time=timepoint, + condition_ids=condition_ids, + ) + ) + experiments.append(Experiment(id=experiment_id, periods=periods)) + + return cls(experiments, **kwargs) + + def to_df(self) -> pd.DataFrame: + """Convert the ExperimentTable to a DataFrame.""" + records = [ + { + C.EXPERIMENT_ID: experiment.id, + C.TIME: period.time, + C.CONDITION_ID: condition_id, + } + for experiment in self.experiments + for period in experiment.periods + for condition_id in period.condition_ids or [""] + ] + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.EXPERIMENT_DF_REQUIRED_COLS) + ) + + +class Measurement(BaseModel): + """A measurement. + + A measurement of an observable at a specific time point in a specific + experiment. + """ + + #: The model ID. + model_id: Annotated[ + str | None, BeforeValidator(_valid_petab_id_or_none) + ] = Field(alias=C.MODEL_ID, default=None) + #: The observable ID. + observable_id: Annotated[str, BeforeValidator(_valid_petab_id)] = Field( + alias=C.OBSERVABLE_ID + ) + #: The experiment ID. + experiment_id: Annotated[ + str | None, BeforeValidator(_valid_petab_id_or_none) + ] = Field(alias=C.EXPERIMENT_ID, default=None) + #: The time point of the measurement in time units as defined in the model. + time: Annotated[float, AfterValidator(_is_finite_or_pos_inf)] = Field( + alias=C.TIME + ) + #: The measurement value. + measurement: Annotated[float, AfterValidator(_not_nan)] = Field( + alias=C.MEASUREMENT + ) + #: Values for placeholder parameters in the observable formula. + observable_parameters: list[sp.Basic] = Field( + alias=C.OBSERVABLE_PARAMETERS, default_factory=list + ) + #: Values for placeholder parameters in the noise formula. + noise_parameters: list[sp.Basic] = Field( + alias=C.NOISE_PARAMETERS, default_factory=list + ) + + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + extra="allow", + validate_assignment=True, + ) + + @field_validator( + "experiment_id", + "observable_parameters", + "noise_parameters", + mode="before", + ) + @classmethod + def convert_nan_to_none(cls, v, info: ValidationInfo): + if isinstance(v, float) and np.isnan(v): + return cls.model_fields[info.field_name].default + return v + + @field_validator( + "observable_parameters", "noise_parameters", mode="before" + ) + @classmethod + def _sympify_list(cls, v): + if v is None: + return [] + + if isinstance(v, float) and np.isnan(v): + return [] + + if isinstance(v, str): + v = v.split(C.PARAMETER_SEPARATOR) + elif not isinstance(v, Sequence): + v = [v] + + return [sympify_petab(x) for x in v] + + +class MeasurementTable(BaseTable[Measurement]): + """PEtab measurement table.""" + + @property + def measurements(self) -> list[Measurement]: + """List of measurements.""" + return self.elements + + @classmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> MeasurementTable: + """Create a MeasurementTable from a DataFrame.""" + if df is None: + return cls(**kwargs) + + if C.MODEL_ID in df.columns: + df[C.MODEL_ID] = df[C.MODEL_ID].apply(_convert_nan_to_none) + + measurements = [ + Measurement( + **row.to_dict(), + ) + for _, row in df.reset_index().iterrows() + ] + + return cls(measurements, **kwargs) + + def to_df(self) -> pd.DataFrame: + """Convert the MeasurementTable to a DataFrame.""" + records = self.model_dump(by_alias=True)["elements"] + for record in records: + record[C.OBSERVABLE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PARAMETERS]) + ) + record[C.NOISE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PARAMETERS]) + ) + + return pd.DataFrame(records) + + +class Mapping(BaseModel): + """Mapping PEtab entities to model entities.""" + + #: PEtab entity ID. + petab_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.PETAB_ENTITY_ID + ) + #: Model entity ID. + model_id: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = ( + Field(alias=C.MODEL_ENTITY_ID, default=None) + ) + #: Arbitrary name + name: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.NAME, default=None + ) + + #: :meta private: + model_config = ConfigDict( + populate_by_name=True, extra="allow", validate_assignment=True + ) + + @model_validator(mode="after") + def _validate(self) -> Self: + if ( + self.model_id + and self.model_id != self.petab_id + and is_valid_identifier(self.model_id) + ): + raise ValueError( + "Aliasing of entities that already have a valid identifier " + "is not allowed. Simplify your PEtab problem by removing the " + f"mapping entry for `{self.petab_id} -> {self.model_id}`, " + f"and replacing all occurrences of `{self.petab_id}` with " + f"`{self.model_id}`." + ) + return self + + +class MappingTable(BaseTable[Mapping]): + """PEtab mapping table.""" + + @property + def mappings(self) -> list[Mapping]: + """List of mappings.""" + return self.elements + + @classmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> MappingTable: + """Create a MappingTable from a DataFrame.""" + if df is None: + return cls(**kwargs) + + mappings = [ + Mapping(**row.to_dict()) for _, row in df.reset_index().iterrows() + ] + return cls(mappings, **kwargs) + + def to_df(self) -> pd.DataFrame: + """Convert the MappingTable to a DataFrame.""" + res = ( + pd.DataFrame(self.model_dump(by_alias=True)["elements"]) + if self.mappings + else pd.DataFrame(columns=C.MAPPING_DF_REQUIRED_COLS) + ) + return res.set_index([C.PETAB_ENTITY_ID]) + + def __getitem__(self, petab_id: str) -> Mapping: + """Get a mapping by PEtab ID.""" + for mapping in self.mappings: + if mapping.petab_id == petab_id: + return mapping + raise KeyError(f"PEtab ID {petab_id} not found") + + def get(self, petab_id, default=None): + """Get a mapping by PEtab ID or return a default value.""" + try: + return self[petab_id] + except KeyError: + return default + + +class Parameter(BaseModel): + """Parameter definition.""" + + #: Parameter ID. + id: Annotated[str, BeforeValidator(_valid_petab_id)] = Field( + alias=C.PARAMETER_ID + ) + #: Lower bound. + lb: Annotated[float | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.LOWER_BOUND, default=None + ) + #: Upper bound. + ub: Annotated[float | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.UPPER_BOUND, default=None + ) + #: Nominal value. + nominal_value: Annotated[ + float | None, BeforeValidator(_convert_nan_to_none) + ] = Field(alias=C.NOMINAL_VALUE, default=None) + #: Is the parameter to be estimated? + estimate: bool = Field(alias=C.ESTIMATE, default=True) + #: Type of parameter prior distribution. + prior_distribution: Annotated[ + PriorDistribution | None, BeforeValidator(_convert_nan_to_none) + ] = Field(alias=C.PRIOR_DISTRIBUTION, default=None) + #: Prior distribution parameters. + prior_parameters: list[float] = Field( + alias=C.PRIOR_PARAMETERS, default_factory=list + ) + + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + use_enum_values=True, + extra="allow", + validate_assignment=True, + ) + + @field_validator("prior_parameters", mode="before") + @classmethod + def _validate_prior_parameters( + cls, v: str | list[str] | float | None | np.ndarray + ): + if v is None: + return [] + + if isinstance(v, float) and np.isnan(v): + return [] + + if isinstance(v, str): + if v == "": + return [] + v = v.split(C.PARAMETER_SEPARATOR) + elif not isinstance(v, Sequence): + v = [v] + + return [float(x) for x in v] + + @field_validator("estimate", mode="before") + @classmethod + def _validate_estimate_before(cls, v: bool | str): + if isinstance(v, bool): + return v + + if isinstance(v, str): + v = v.strip().lower() + if v == "true": + return True + if v == "false": + return False + + raise ValueError( + f"Invalid value for estimate: {v}. Must be `true` or `false`." + ) + + @field_serializer("estimate") + def _serialize_estimate(self, estimate: bool, _info): + return str(estimate).lower() + + @field_serializer("prior_distribution") + def _serialize_prior_distribution( + self, prior_distribution: PriorDistribution | None, _info + ): + if prior_distribution is None: + return "" + return str(prior_distribution) + + @field_serializer("prior_parameters") + def _serialize_prior_parameters( + self, prior_parameters: list[float], _info + ) -> str: + return C.PARAMETER_SEPARATOR.join(map(str, prior_parameters)) + + @model_validator(mode="after") + def _validate(self) -> Self: + if not self.estimate and self.nominal_value is None: + raise ValueError( + "Non-estimated parameter must have a nominal value" + ) + + if self.estimate and (self.lb is None or self.ub is None): + raise ValueError( + "Estimated parameter must have lower and upper bounds set" + ) + + if self.lb is not None and self.ub is not None and self.lb > self.ub: + raise ValueError( + "Lower bound must be less than or equal to upper bound." + ) + + # NOTE: priorType and priorParameters are currently checked in + # `CheckPriorDistribution` + + return self + + @property + def prior_dist(self) -> Distribution | None: + """Get the prior distribution of the parameter. + + :return: The prior distribution of the parameter, or None if no prior + distribution is set. + """ + if not self.estimate: + raise ValueError(f"Parameter `{self.id}' is not estimated.") + + if self.prior_distribution is None: + return None + + if not (cls := _prior_to_cls.get(self.prior_distribution)): + raise ValueError( + f"Prior distribution `{self.prior_distribution}' not " + "supported." + ) + + if str(self.prior_distribution).startswith("log-"): + log = True + elif str(self.prior_distribution).startswith("log10-"): + log = 10 + else: + log = False + + if cls == Exponential: + # `Exponential.__init__` does not accept the `log` parameter + if log is not False: + raise ValueError( + "Exponential distribution does not support log " + "transformation." + ) + return cls(*self.prior_parameters, trunc=[self.lb, self.ub]) + + if cls == Uniform: + # `Uniform.__init__` does not accept the `trunc` parameter + low = max(self.prior_parameters[0], self.lb) + high = min(self.prior_parameters[1], self.ub) + return cls(low, high) + + if cls == LogUniform: + # Mind the different interpretation of distribution parameters for + # Uniform(..., log=True) and LogUniform!! + return cls(*self.prior_parameters, trunc=[self.lb, self.ub]) + + return cls(*self.prior_parameters, log=log, trunc=[self.lb, self.ub]) + + +class ParameterTable(BaseTable[Parameter]): + """PEtab parameter table.""" + + @property + def parameters(self) -> list[Parameter]: + """List of parameters.""" + return self.elements + + @classmethod + def from_df(cls, df: pd.DataFrame, **kwargs) -> ParameterTable: + """Create a ParameterTable from a DataFrame.""" + if df is None: + return cls(**kwargs) + + parameters = [ + Parameter(**row.to_dict()) + for _, row in df.reset_index().iterrows() + ] + + return cls(parameters, **kwargs) + + def to_df(self) -> pd.DataFrame: + """Convert the ParameterTable to a DataFrame.""" + return pd.DataFrame( + self.model_dump(by_alias=True)["elements"] + ).set_index([C.PARAMETER_ID]) + + @property + def n_estimated(self) -> int: + """Number of estimated parameters.""" + return sum(p.estimate for p in self.parameters) + + +class Problem: + """ + PEtab parameter estimation problem + + A PEtab parameter estimation problem as defined by + + - models + - condition tables + - experiment tables + - measurement tables + - parameter tables + - observable tables + - mapping tables + + See also :doc:`petab:v2/documentation_data_format`. + """ + + def __init__( + self, + models: list[Model] = None, + condition_tables: list[ConditionTable] = None, + experiment_tables: list[ExperimentTable] = None, + observable_tables: list[ObservableTable] = None, + measurement_tables: list[MeasurementTable] = None, + parameter_tables: list[ParameterTable] = None, + mapping_tables: list[MappingTable] = None, + config: ProblemConfig = None, + ): + from ..v2.lint import default_validation_tasks + + self.config = config + self.models: list[Model] = models or [] + self.validation_tasks: list[ValidationTask] = ( + default_validation_tasks.copy() + ) + + self.observable_tables = observable_tables or [ObservableTable()] + self.condition_tables = condition_tables or [ConditionTable()] + self.experiment_tables = experiment_tables or [ExperimentTable()] + self.measurement_tables = measurement_tables or [MeasurementTable()] + self.mapping_tables = mapping_tables or [MappingTable()] + self.parameter_tables = parameter_tables or [ParameterTable()] + + def __repr__(self): + return f"<{self.__class__.__name__} id={self.id!r}>" + + def __str__(self): + pid = repr(self.id) if self.id else "without ID" + + model = f"with models {self.models}" if self.model else "without model" + + ne = len(self.experiments) + experiments = f"{ne} experiments" + + nc = len(self.conditions) + conditions = f"{nc} conditions" + + no = len(self.observables) + observables = f"{no} observables" + + nm = len(self.measurements) + measurements = f"{nm} measurements" + + nest = sum(pt.n_estimated for pt in self.parameter_tables) + parameters = f"{nest} estimated parameters" + + return ( + f"PEtab Problem {pid} {model}, {conditions}, {experiments}, " + f"{observables}, {measurements}, {parameters}" + ) + + def __getitem__( + self, key + ) -> ( + Condition | Experiment | Observable | Measurement | Parameter | Mapping + ): + """Get PEtab entity by ID. + + This allows accessing PEtab entities such as conditions, experiments, + observables, and parameters by their ID. + + Accessing model entities is not currently not supported. + """ + for table_list in ( + self.condition_tables, + self.experiment_tables, + self.observable_tables, + self.measurement_tables, + self.parameter_tables, + self.mapping_tables, + ): + for table in table_list: + try: + return table[key] + except (KeyError, NotImplementedError): + pass + + raise KeyError( + f"Entity with ID '{key}' not found in the PEtab problem" + ) + + @staticmethod + def from_yaml( + yaml_config: dict | Path | str, base_path: str | Path = None + ) -> Problem: + """ + Factory method to load model and tables as specified by YAML file. + + Arguments: + yaml_config: PEtab configuration as dictionary or YAML file name + base_path: Base directory or URL to resolve relative paths + """ + if isinstance(yaml_config, Path): + yaml_config = str(yaml_config) + + if isinstance(yaml_config, str): + yaml_file = yaml_config + if base_path is None: + base_path = get_path_prefix(yaml_file) + yaml_config = yaml.load_yaml(yaml_file) + else: + yaml_file = None + + validate_yaml_syntax(yaml_config) + + if (format_version := parse_version(yaml_config[C.FORMAT_VERSION]))[ + 0 + ] != 2: + # If we got a path to a v1 yaml file, try to auto-upgrade + from tempfile import TemporaryDirectory + + from .petab1to2 import petab1to2 + + if format_version[0] == 1 and yaml_file: + logger.debug( + "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" + ) + with TemporaryDirectory() as tmpdirname: + try: + petab1to2(yaml_file, output_dir=tmpdirname) + except Exception as e: + raise ValueError( + "Failed to auto-upgrade PEtab 1.0 problem to " + "PEtab 2.0" + ) from e + return Problem.from_yaml( + Path(tmpdirname) / Path(yaml_file).name + ) + raise ValueError( + "Provided PEtab files are of unsupported version " + f"{yaml_config[C.FORMAT_VERSION]}." + ) + + config = ProblemConfig( + **yaml_config, base_path=base_path, filepath=yaml_file + ) + + parameter_tables = [ + ParameterTable.from_tsv(f, base_path=base_path) + for f in config.parameter_files + ] + + models = [ + model_factory( + model_info.location, + base_path=base_path, + model_language=model_info.language, + model_id=model_id, + ) + for model_id, model_info in (config.model_files or {}).items() + ] + + measurement_tables = ( + [ + MeasurementTable.from_tsv(f, base_path) + for f in config.measurement_files + ] + if config.measurement_files + else None + ) + + condition_tables = ( + [ + ConditionTable.from_tsv(f, base_path) + for f in config.condition_files + ] + if config.condition_files + else None + ) + + experiment_tables = ( + [ + ExperimentTable.from_tsv(f, base_path) + for f in config.experiment_files + ] + if config.experiment_files + else None + ) + + observable_tables = ( + [ + ObservableTable.from_tsv(f, base_path) + for f in config.observable_files + ] + if config.observable_files + else None + ) + + mapping_tables = ( + [MappingTable.from_tsv(f, base_path) for f in config.mapping_files] + if config.mapping_files + else None + ) + + return Problem( + config=config, + models=models, + condition_tables=condition_tables, + experiment_tables=experiment_tables, + observable_tables=observable_tables, + measurement_tables=measurement_tables, + parameter_tables=parameter_tables, + mapping_tables=mapping_tables, + ) + + @staticmethod + def from_combine(filename: Path | str) -> Problem: + """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). + + See also :py:func:`petab.v2.create_combine_archive`. + + Arguments: + filename: Path to the PEtab-COMBINE archive + + Returns: + A :py:class:`petab.v2.Problem` instance. + """ + # function-level import, because module-level import interfered with + # other SWIG interfaces + try: + import libcombine + except ImportError as e: + raise ImportError( + "To use PEtab's COMBINE functionality, libcombine " + "(python-libcombine) must be installed." + ) from e + + archive = libcombine.CombineArchive() + if archive.initializeFromArchive(str(filename)) is None: + raise ValueError(f"Invalid Combine Archive: {filename}") + + with tempfile.TemporaryDirectory() as tmpdirname: + archive.extractTo(tmpdirname) + problem = Problem.from_yaml( + os.path.join(tmpdirname, archive.getMasterFile().getLocation()) + ) + archive.cleanUp() + + return problem + + @staticmethod + def get_problem(problem: str | Path | Problem) -> Problem: + """Get a PEtab problem from a file or a problem object. + + Arguments: + problem: Path to a PEtab problem file or a PEtab problem object. + + Returns: + A PEtab problem object. + """ + if isinstance(problem, Problem): + return problem + + if isinstance(problem, str | Path): + return Problem.from_yaml(problem) + + raise TypeError( + "The argument `problem` must be a path to a PEtab problem file " + "or a PEtab problem object." + ) + + def to_files(self, base_path: str | Path | None) -> None: + """Write the PEtab problem to files. + + Writes the model, condition, experiment, measurement, parameter, + observable, and mapping tables to their respective files as specified + by the `rel_path` and `base_path` of their respective objects. + + This expects that all objects have their `rel_path` and `base_path` + set correctly, which is usually done by :meth:`Problem.from_yaml`. + + :param base_path: + The base path the yaml file and tables will be written to. + If ``None``, the `base_path` of the individual tables and + :obj:`Problem.config.base_path` will be used. + """ + config = copy.deepcopy(self.config) or ProblemConfig( + format_version="2.0.0" + ) + + for model in self.models: + model.to_file( + _generate_path(model.rel_path, base_path or model.base_path) + ) + + config.model_files = { + model.model_id: ModelFile( + location=model.rel_path, language=model.type_id + ) + for model in self.models + } + + config.condition_files = [ + table.rel_path for table in self.condition_tables if table.rel_path + ] + config.experiment_files = [ + table.rel_path + for table in self.experiment_tables + if table.rel_path + ] + config.observable_files = [ + table.rel_path + for table in self.observable_tables + if table.rel_path + ] + config.measurement_files = [ + table.rel_path + for table in self.measurement_tables + if table.rel_path + ] + config.parameter_files = [ + table.rel_path for table in self.parameter_tables if table.rel_path + ] + config.mapping_files = [ + table.rel_path for table in self.mapping_tables if table.rel_path + ] + + for table in chain( + self.condition_tables, + self.experiment_tables, + self.observable_tables, + self.measurement_tables, + self.parameter_tables, + self.mapping_tables, + ): + if table.rel_path: + table.to_tsv( + _generate_path( + table.rel_path, base_path or table.base_path + ) + ) + + config.to_yaml( + _generate_path( + Path(str(config.filepath)).name, base_path or config.base_path + ) + ) + + @property + def model(self) -> Model | None: + """The model of the problem. + + This is a convenience property for `Problem`s with only one single + model. + + :return: + The model of the problem, or None if no model is defined. + :raises: + ValueError: If the problem has more than one model defined. + """ + if len(self.models) == 1: + return self.models[0] + + if len(self.models) == 0: + return None + + raise ValueError( + "Problem contains more than one model. " + "Use `Problem.models` to access all models." + ) + + @model.setter + def model(self, value: Model): + """Set the model of the problem. + + This is a convenience setter for `Problem`s with only one single + model. This will replace any existing models in the problem with the + provided model. + """ + self.models = [value] + + @property + def condition_df(self) -> pd.DataFrame | None: + """Combined condition tables as DataFrame.""" + return ( + ConditionTable(conditions).to_df() + if (conditions := self.conditions) + else None + ) + + @condition_df.setter + def condition_df(self, value: pd.DataFrame): + self.condition_tables = [ConditionTable.from_df(value)] + + @property + def experiment_df(self) -> pd.DataFrame | None: + """Experiment table as DataFrame.""" + return ( + ExperimentTable(experiments).to_df() + if (experiments := self.experiments) + else None + ) + + @experiment_df.setter + def experiment_df(self, value: pd.DataFrame): + self.experiment_tables = [ExperimentTable.from_df(value)] + + @property + def measurement_df(self) -> pd.DataFrame | None: + """Combined measurement tables as DataFrame.""" + return ( + MeasurementTable(measurements).to_df() + if (measurements := self.measurements) + else None + ) + + @measurement_df.setter + def measurement_df(self, value: pd.DataFrame): + self.measurement_tables = [MeasurementTable.from_df(value)] + + @property + def parameter_df(self) -> pd.DataFrame | None: + """Combined parameter tables as DataFrame.""" + return ( + ParameterTable(parameters).to_df() + if (parameters := self.parameters) + else None + ) + + @parameter_df.setter + def parameter_df(self, value: pd.DataFrame): + self.parameter_tables = [ParameterTable.from_df(value)] + + @property + def observable_df(self) -> pd.DataFrame | None: + """Combined observable tables as DataFrame.""" + return ( + ObservableTable(observables).to_df() + if (observables := self.observables) + else None + ) + + @observable_df.setter + def observable_df(self, value: pd.DataFrame): + self.observable_tables = [ObservableTable.from_df(value)] + + @property + def mapping_df(self) -> pd.DataFrame | None: + """Combined mapping tables as DataFrame.""" + return ( + MappingTable(mappings).to_df() + if (mappings := self.mappings) + else None + ) + + @mapping_df.setter + def mapping_df(self, value: pd.DataFrame): + self.mapping_tables = [MappingTable.from_df(value)] + + @property + def conditions(self) -> list[Condition]: + """List of conditions in the condition table(s).""" + return list( + chain.from_iterable(ct.conditions for ct in self.condition_tables) + ) + + @property + def experiments(self) -> list[Experiment]: + """List of experiments in the experiment table(s).""" + return list( + chain.from_iterable( + et.experiments for et in self.experiment_tables + ) + ) + + @property + def observables(self) -> list[Observable]: + """List of observables in the observable table(s).""" + return list( + chain.from_iterable( + ot.observables for ot in self.observable_tables + ) + ) + + @property + def measurements(self) -> list[Measurement]: + """List of measurements in the measurement table(s).""" + return list( + chain.from_iterable( + mt.measurements for mt in self.measurement_tables + ) + ) + + @property + def parameters(self) -> list[Parameter]: + """List of parameters in the parameter table(s).""" + return list( + chain.from_iterable(pt.parameters for pt in self.parameter_tables) + ) + + @property + def mappings(self) -> list[Mapping]: + """List of mappings in the mapping table(s).""" + return list( + chain.from_iterable(mt.mappings for mt in self.mapping_tables) + ) + + @property + def id(self) -> str | None: + """The ID of the PEtab problem if set, ``None`` otherwise.""" + return self.config.id if self.config else None + + @id.setter + def id(self, value: str): + """Set the ID of the PEtab problem.""" + if self.config is None: + self.config = ProblemConfig(format_version="2.0.0") + self.config.id = value + + def get_optimization_parameters(self) -> list[str]: + """ + Get the list of optimization parameter IDs from parameter table. + + Returns: + A list of IDs of parameters selected for optimization + (i.e., those with estimate = True). + """ + return [p.id for p in self.parameters if p.estimate] + + def get_observable_ids(self) -> list[str]: + """ + Returns dictionary of observable ids. + """ + return [o.id for o in self.observables] + + def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): + """Apply mask of only free or only fixed values. + + Parameters + ---------- + v: + The full vector the mask is to be applied to. + free: + Whether to return free parameters, i.e., parameters to estimate. + fixed: + Whether to return fixed parameters, i.e., parameters not to + estimate. + + Returns + ------- + The reduced vector with applied mask. + """ + if not free and not fixed: + return [] + if not free: + return [v[ix] for ix in self.x_fixed_indices] + if not fixed: + return [v[ix] for ix in self.x_free_indices] + return v + + def get_x_ids(self, free: bool = True, fixed: bool = True): + """Generic function to get parameter ids. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The parameter IDs. + """ + v = [p.id for p in self.parameters] + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def x_ids(self) -> list[str]: + """Parameter table parameter IDs""" + return self.get_x_ids() + + @property + def x_free_ids(self) -> list[str]: + """Parameter table parameter IDs, for free parameters.""" + return self.get_x_ids(fixed=False) + + @property + def x_fixed_ids(self) -> list[str]: + """Parameter table parameter IDs, for fixed parameters.""" + return self.get_x_ids(free=False) + + def get_x_nominal(self, free: bool = True, fixed: bool = True) -> list: + """Generic function to get parameter nominal values. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The parameter nominal values. + """ + v = [ + p.nominal_value if p.nominal_value is not None else nan + for p in self.parameters + ] + + return self._apply_mask(v, free=free, fixed=fixed) + + def get_x_nominal_dict( + self, free: bool = True, fixed: bool = True + ) -> dict[str, float]: + """Get parameter nominal values as dict. + + :param free: + Whether to return free parameters, i.e. parameters to estimate. + :param fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + :returns: + A dictionary mapping parameter IDs to their nominal values. + """ + return dict( + zip( + self.get_x_ids(free=free, fixed=fixed), + self.get_x_nominal(free=free, fixed=fixed), + strict=True, + ) + ) + + @property + def x_nominal(self) -> list: + """Parameter table nominal values""" + return self.get_x_nominal() + + @property + def x_nominal_free(self) -> list: + """Parameter table nominal values, for free parameters.""" + return self.get_x_nominal(fixed=False) + + @property + def x_nominal_fixed(self) -> list: + """Parameter table nominal values, for fixed parameters.""" + return self.get_x_nominal(free=False) + + def get_lb(self, free: bool = True, fixed: bool = True): + """Generic function to get lower parameter bounds. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The lower parameter bounds. + """ + v = [p.lb if p.lb is not None else nan for p in self.parameters] + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def lb(self) -> list: + """Parameter table lower bounds.""" + return self.get_lb() + + def get_ub(self, free: bool = True, fixed: bool = True): + """Generic function to get upper parameter bounds. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The upper parameter bounds. + """ + v = [p.ub if p.ub is not None else nan for p in self.parameters] + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def ub(self) -> list: + """Parameter table upper bounds""" + return self.get_ub() + + @property + def x_free_indices(self) -> list[int]: + """Parameter table estimated parameter indices.""" + return [i for i, p in enumerate(self.parameters) if p.estimate] + + @property + def x_fixed_indices(self) -> list[int]: + """Parameter table non-estimated parameter indices.""" + return [i for i, p in enumerate(self.parameters) if not p.estimate] + + @property + def has_map_objective(self) -> bool: + """Whether this problem encodes a maximum a posteriori (MAP) objective. + + A PEtab problem is considered to have a MAP objective if there is a + prior distribution specified for at least one estimated parameter. + + :returns: ``True`` if MAP objective, ``False`` otherwise. + """ + return any( + p.prior_distribution is not None + for p in self.parameters + if p.estimate + ) + + @property + def has_ml_objective(self) -> bool: + """Whether this problem encodes a maximum likelihood (ML) objective. + + A PEtab problem is considered to have an ML objective if there are no + prior distributions specified for any estimated parameters. + + :returns: ``True`` if ML objective, ``False`` otherwise. + """ + return not self.has_map_objective + + def get_priors(self) -> dict[str, Distribution]: + """Get prior distributions. + + Note that this will default to uniform distributions over the + parameter bounds for parameters without an explicit prior. + + :returns: The prior distributions for the estimated parameters in case + the problem has a MAP objective, an empty dictionary otherwise. + """ + if not self.has_map_objective: + return {} + + return { + p.id: p.prior_dist if p.prior_distribution else Uniform(p.lb, p.ub) + for p in self.parameters + if p.estimate + } + + def get_startpoint_distributions(self) -> dict[str, Distribution]: + """Get distributions for sampling startpoints. + + The distributions are the prior distributions for estimated parameters + that have a prior distribution defined, and uniform distributions + over the parameter bounds for estimated parameters without an explicit + prior. + + :returns: Mapping of parameter IDs to distributions for sampling + startpoints. + """ + return { + p.id: p.prior_dist if p.prior_distribution else Uniform(p.lb, p.ub) + for p in self.parameters + if p.estimate + } + + def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): + """Create 2D array with starting points for optimization""" + priors = self.get_priors() + return np.vstack([p.sample(n_starts) for p in priors.values()]).T + + def sample_parameter_startpoints_dict( + self, n_starts: int = 100 + ) -> list[dict[str, float]]: + """Create dictionaries with starting points for optimization + + :returns: + A list of dictionaries with parameter IDs mapping to sampled + parameter values. + """ + return [ + dict(zip(self.x_free_ids, parameter_values, strict=True)) + for parameter_values in self.sample_parameter_startpoints( + n_starts=n_starts + ) + ] + + @property + def n_estimated(self) -> int: + """The number of estimated parameters.""" + return len(self.x_free_indices) + + @property + def n_measurements(self) -> int: + """Number of measurements.""" + return sum(len(mt.measurements) for mt in self.measurement_tables) + + @property + def n_priors(self) -> int: + """Number of priors.""" + return sum(p.prior_distribution is not None for p in self.parameters) + + def validate( + self, validation_tasks: list[ValidationTask] = None + ) -> ValidationResultList: + """Validate the PEtab problem. + + Arguments: + validation_tasks: List of validation tasks to run. If ``None`` + or empty, :attr:`Problem.validation_tasks` are used. + Returns: + A list of validation results. + """ + from ..v2.lint import ( + ValidationIssue, + ValidationIssueSeverity, + ValidationResultList, + ) + + validation_results = ValidationResultList() + + if self.config and self.config.extensions: + extensions = ",".join(self.config.extensions.keys()) + validation_results.append( + ValidationIssue( + ValidationIssueSeverity.WARNING, + "Validation of PEtab extensions is not yet implemented, " + "but the given problem uses the following extensions: " + f"{extensions}", + ) + ) + + if len(self.models) > 1: + # TODO https://github.com/PEtab-dev/libpetab-python/issues/392 + # We might just want to split the problem into multiple + # problems, one for each model, and then validate each + # problem separately. + validation_results.append( + ValidationIssue( + ValidationIssueSeverity.WARNING, + "Problem contains multiple models. " + "Validation is not yet fully supported.", + ) + ) + + for task in validation_tasks or self.validation_tasks: + try: + cur_result = task.run(self) + except Exception as e: + cur_result = ValidationIssue( + ValidationIssueSeverity.CRITICAL, + f"Validation task {task} failed with exception: {e}\n" + f"{traceback.format_exc()}", + ) + + if cur_result: + validation_results.append(cur_result) + + if cur_result.level == ValidationIssueSeverity.CRITICAL: + break + + return validation_results + + def assert_valid(self, **kwargs) -> None: + """Assert that the PEtab problem is valid. + + :param kwargs: Additional arguments passed to :meth:`Problem.validate`. + + :raises AssertionError: If the PEtab problem is not valid. + """ + from ..v2.lint import ValidationIssueSeverity + + validation_results = self.validate(**kwargs) + errors = [ + r + for r in validation_results + if r.level >= ValidationIssueSeverity.ERROR + ] + if errors: + raise AssertionError( + "PEtab problem is not valid:\n" + + "\n".join(e.message for e in errors) + ) + + def add_condition( + self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr + ): + """Add a simulation condition to the problem. + + If there are more than one condition tables, the condition + is added to the last one. + + Arguments: + id_: The condition id + name: The condition name. If given, this will be added to the + last mapping table. If no mapping table exists, + a new mapping table will be created. + kwargs: Entities to be added to the condition table in the form + `target_id=target_value`. + """ + if not kwargs: + raise ValueError("Cannot add condition without any changes") + + changes = [ + Change(target_id=target_id, target_value=target_value) + for target_id, target_value in kwargs.items() + ] + if not self.condition_tables: + self.condition_tables.append(ConditionTable()) + self.condition_tables[-1].conditions.append( + Condition(id=id_, changes=changes) + ) + if name is not None: + self.add_mapping(petab_id=id_, name=name) + + def add_observable( + self, + id_: str, + formula: str, + noise_formula: str | float | int = None, + noise_distribution: str = None, + observable_placeholders: list[str] = None, + noise_placeholders: list[str] = None, + name: str = None, + **kwargs, + ): + """Add an observable to the problem. + + If there are more than one observable tables, the observable + is added to the last one. + + Arguments: + id_: The observable id + formula: The observable formula + noise_formula: The noise formula + noise_distribution: The noise distribution + observable_placeholders: Placeholders for the observable formula + noise_placeholders: Placeholders for the noise formula + name: The observable name + kwargs: additional columns/values to add to the observable table + + """ + record = { + C.OBSERVABLE_ID: id_, + C.OBSERVABLE_FORMULA: formula, + } + if name is not None: + record[C.OBSERVABLE_NAME] = name + if noise_formula is not None: + record[C.NOISE_FORMULA] = noise_formula + if noise_distribution is not None: + record[C.NOISE_DISTRIBUTION] = noise_distribution + if observable_placeholders is not None: + record[C.OBSERVABLE_PLACEHOLDERS] = observable_placeholders + if noise_placeholders is not None: + record[C.NOISE_PLACEHOLDERS] = noise_placeholders + record.update(kwargs) + + if not self.observable_tables: + self.observable_tables.append(ObservableTable()) + + self.observable_tables[-1] += Observable(**record) + + def add_parameter( + self, + id_: str, + estimate: bool | str = True, + nominal_value: Number | None = None, + lb: Number = None, + ub: Number = None, + prior_dist: str = None, + prior_pars: str | Sequence = None, + **kwargs, + ): + """Add a parameter to the problem. + + If there are more than one parameter tables, the parameter + is added to the last one. + + Arguments: + id_: The parameter id + estimate: Whether the parameter is estimated + nominal_value: The nominal value of the parameter + lb: The lower bound of the parameter + ub: The upper bound of the parameter + prior_dist: The type of the prior distribution + prior_pars: The parameters of the prior distribution + kwargs: additional columns/values to add to the parameter table + """ + record = { + C.PARAMETER_ID: id_, + } + if estimate is not None: + record[C.ESTIMATE] = estimate + if nominal_value is not None: + record[C.NOMINAL_VALUE] = nominal_value + if lb is not None: + record[C.LOWER_BOUND] = lb + if ub is not None: + record[C.UPPER_BOUND] = ub + if prior_dist is not None: + record[C.PRIOR_DISTRIBUTION] = prior_dist + if prior_pars is not None: + if isinstance(prior_pars, Sequence) and not isinstance( + prior_pars, str + ): + prior_pars = C.PARAMETER_SEPARATOR.join(map(str, prior_pars)) + record[C.PRIOR_PARAMETERS] = prior_pars + record.update(kwargs) + + if not self.parameter_tables: + self.parameter_tables.append(ParameterTable()) + + self.parameter_tables[-1] += Parameter(**record) + + def add_measurement( + self, + obs_id: str, + *, + time: float, + measurement: float, + experiment_id: str | None = None, + observable_parameters: Sequence[str | float] | str | float = None, + noise_parameters: Sequence[str | float] | str | float = None, + ): + """Add a measurement to the problem. + + If there are more than one measurement tables, the measurement + is added to the last one. + + Arguments: + obs_id: The observable ID + experiment_id: The experiment ID + time: The measurement time + measurement: The measurement value + observable_parameters: The observable parameters + noise_parameters: The noise parameters + """ + if observable_parameters is not None and not isinstance( + observable_parameters, Sequence + ): + observable_parameters = [observable_parameters] + if noise_parameters is not None and not isinstance( + noise_parameters, Sequence + ): + noise_parameters = [noise_parameters] + + if not self.measurement_tables: + self.measurement_tables.append(MeasurementTable()) + + self.measurement_tables[-1].measurements.append( + Measurement( + observable_id=obs_id, + experiment_id=experiment_id, + time=time, + measurement=measurement, + observable_parameters=observable_parameters, + noise_parameters=noise_parameters, + ) + ) + + def add_mapping( + self, petab_id: str, model_id: str = None, name: str = None + ): + """Add a mapping table entry to the problem. + + If there are more than one mapping tables, the mapping + is added to the last one. + + Arguments: + petab_id: The new PEtab-compatible ID mapping to `model_id` + model_id: The ID of some entity in the model + name: A name (any string) for the entity referenced by `petab_id`. + """ + if not self.mapping_tables: + self.mapping_tables.append(MappingTable()) + self.mapping_tables[-1].mappings.append( + Mapping(petab_id=petab_id, model_id=model_id, name=name) + ) + + def add_experiment(self, id_: str, *args): + """Add an experiment to the problem. + + If there are more than one experiment tables, the experiment + is added to the last one. + + :param id_: The experiment ID. + :param args: Timepoints and associated conditions + (single condition ID as string or multiple condition IDs as lists + of strings). + + :example: + >>> p = Problem() + >>> p.add_experiment( + ... "experiment1", + ... 1, + ... "condition1", + ... 2, + ... ["condition2a", "condition2b"], + ... ) + >>> p.experiments[0] # doctest: +NORMALIZE_WHITESPACE + Experiment(id='experiment1', periods=[\ +ExperimentPeriod(time=1.0, condition_ids=['condition1']), \ +ExperimentPeriod(time=2.0, condition_ids=['condition2a', 'condition2b'])]) + """ + if len(args) % 2 != 0: + raise ValueError( + "Arguments must be pairs of timepoints and condition IDs." + ) + + periods = [ + ExperimentPeriod( + time=args[i], + condition_ids=[cond] + if isinstance((cond := args[i + 1]), str) + else cond, + ) + for i in range(0, len(args), 2) + ] + + if not self.experiment_tables: + self.experiment_tables.append(ExperimentTable()) + self.experiment_tables[-1].experiments.append( + Experiment(id=id_, periods=periods) + ) + + def __iadd__(self, other): + """Add Observable, Parameter, Measurement, Condition, or Experiment""" + from .core import ( + Condition, + Experiment, + Measurement, + Observable, + Parameter, + ) + + if isinstance(other, Observable): + if not self.observable_tables: + self.observable_tables.append(ObservableTable()) + self.observable_tables[-1] += other + elif isinstance(other, Parameter): + if not self.parameter_tables: + self.parameter_tables.append(ParameterTable()) + self.parameter_tables[-1] += other + elif isinstance(other, Measurement): + if not self.measurement_tables: + self.measurement_tables.append(MeasurementTable()) + self.measurement_tables[-1] += other + elif isinstance(other, Condition): + if not self.condition_tables: + self.condition_tables.append(ConditionTable()) + self.condition_tables[-1] += other + elif isinstance(other, Experiment): + if not self.experiment_tables: + self.experiment_tables.append(ExperimentTable()) + self.experiment_tables[-1] += other + else: + raise ValueError( + f"Cannot add object of type {type(other)} to Problem." + ) + return self + + def model_dump(self, **kwargs) -> dict[str, Any]: + """Convert this Problem to a dictionary. + + This function is intended for debugging purposes and should not be + used for serialization. The output of this function may change + without notice. + + The output includes all PEtab tables, but not the models. + + See `pydantic.BaseModel.model_dump `__ + for details. + + :example: + + >>> from pprint import pprint + >>> p = Problem() + >>> p += Parameter(id="par", lb=0, ub=1) + >>> pprint(p.model_dump()) + {'conditions': [], + 'config': {'condition_files': [], + 'experiment_files': [], + 'extensions': {}, + 'format_version': '2.0.0', + 'id': None, + 'mapping_files': [], + 'measurement_files': [], + 'model_files': {}, + 'observable_files': [], + 'parameter_files': []}, + 'experiments': [], + 'mappings': [], + 'measurements': [], + 'observables': [], + 'parameters': [{'estimate': 'true', + 'id': 'par', + 'lb': 0.0, + 'nominal_value': None, + 'prior_distribution': '', + 'prior_parameters': '', + 'ub': 1.0}]} + """ + res = { + "config": (self.config or ProblemConfig()).model_dump( + **kwargs, by_alias=True + ), + } + for field, table_list in ( + ("conditions", self.condition_tables), + ("experiments", self.experiment_tables), + ("observables", self.observable_tables), + ("measurements", self.measurement_tables), + ("parameters", self.parameter_tables), + ("mappings", self.mapping_tables), + ): + res[field] = ( + list( + chain.from_iterable( + table.model_dump(**kwargs)["elements"] + for table in table_list + ) + ) + if table_list + else [] + ) + return res + + def get_changes_for_period(self, period: ExperimentPeriod) -> list[Change]: + """Get the changes for a given experiment period. + + :param period: The experiment period to get the changes for. + :return: A list of changes for the given period. + """ + return list( + chain.from_iterable( + self[condition].changes for condition in period.condition_ids + ) + ) + + def get_measurements_for_experiment( + self, experiment: Experiment + ) -> list[Measurement]: + """Get the measurements for a given experiment. + + :param experiment: The experiment to get the measurements for. + :return: A list of measurements for the given experiment. + """ + return [ + measurement + for measurement in self.measurements + if measurement.experiment_id == experiment.id + ] + + def get_output_parameters( + self, observable: bool = True, noise: bool = True + ) -> list[str]: + """Get output parameters. + + Returns IDs of symbols used in observable and noise formulas that are + not observables and that are not defined in the model. + + :param observable: + Include parameters from observableFormulas + :param noise: + Include parameters from noiseFormulas + :returns: + List of output parameter IDs, including any placeholder parameters. + """ + # collect free symbols from observable and noise formulas, + # skipping observable IDs + candidates = set() + if observable: + candidates |= { + str_sym + for o in self.observables + if o.formula is not None + for sym in o.formula.free_symbols + if (str_sym := str(sym)) != o.id + } + if noise: + candidates |= { + str_sym + for o in self.observables + if o.noise_formula is not None + for sym in o.noise_formula.free_symbols + if (str_sym := str(sym)) != o.id + } + + output_parameters = [] + + # filter out symbols that are defined in the model or mapped to + # such symbols + for candidate in sorted(candidates): + if self.model and self.model.symbol_allowed_in_observable_formula( + candidate + ): + continue + + # does it map to a model entity? + for mapping in self.mappings: + if ( + mapping.petab_id == candidate + and mapping.model_id is not None + ): + if ( + self.model + and self.model.symbol_allowed_in_observable_formula( + mapping.model_id + ) + ): + break + else: + # no mapping to a model entity, so it is an output parameter + output_parameters.append(candidate) + + return output_parameters + + +class ModelFile(BaseModel): + """A file in the PEtab problem configuration.""" + + location: AnyUrl | Path + language: str + + model_config = ConfigDict( + validate_assignment=True, + ) + + +class ExtensionConfig(BaseModel): + """The configuration of a PEtab extension.""" + + version: str + config: dict + + +class ProblemConfig(BaseModel): + """The PEtab problem configuration.""" + + #: The path to the PEtab problem configuration. + filepath: AnyUrl | Path | None = Field( + None, + description="The path to the PEtab problem configuration.", + exclude=True, + ) + #: The base path to resolve relative paths. + base_path: AnyUrl | Path | None = Field( + None, + description="The base path to resolve relative paths.", + exclude=True, + ) + #: The PEtab format version. + format_version: str = "2.0.0" + + #: The problem ID. + id: str | None = None + + #: The paths to the parameter tables. + # Absolute or relative to `base_path`. + parameter_files: list[AnyUrl | Path] = [] + #: The model IDs and files used by the problem (`id->ModelFile`). + model_files: dict[str, ModelFile] | None = {} + #: The paths to the measurement tables. + # Absolute or relative to `base_path`. + measurement_files: list[AnyUrl | Path] = [] + #: The paths to the condition tables. + # Absolute or relative to `base_path`. + condition_files: list[AnyUrl | Path] = [] + #: The paths to the experiment tables. + # Absolute or relative to `base_path`. + experiment_files: list[AnyUrl | Path] = [] + #: The paths to the observable tables. + # Absolute or relative to `base_path`. + observable_files: list[AnyUrl | Path] = [] + #: The paths to the mapping tables. + # Absolute or relative to `base_path`. + mapping_files: list[AnyUrl | Path] = [] + + #: Extensions used by the problem. + extensions: list[ExtensionConfig] | dict = {} + + model_config = ConfigDict( + validate_assignment=True, + ) + + # convert parameter_file to list + @field_validator( + "parameter_files", + mode="before", + ) + def _convert_parameter_file(cls, v): + """Convert parameter_file to a list.""" + if isinstance(v, str): + return [v] + if isinstance(v, list): + return v + raise ValueError( + "parameter_files must be a string or a list of strings." + ) + + def to_yaml(self, filename: str | Path): + """Write the configuration to a YAML file. + + :param filename: Destination file name. The parent directory will be + created if necessary. + """ + from ..v1.yaml import write_yaml + + data = self.model_dump(by_alias=True) + # convert Paths to strings for YAML serialization + for key in ( + "measurement_files", + "condition_files", + "experiment_files", + "observable_files", + "mapping_files", + "parameter_files", + ): + data[key] = list(map(str, data[key])) + + for model_id in data.get("model_files", {}): + data["model_files"][model_id][C.MODEL_LOCATION] = str( + data["model_files"][model_id]["location"] + ) + if data["id"] is None: + # The schema requires a valid id or no id field at all. + del data["id"] + + write_yaml(data, filename) + + @property + def format_version_tuple(self) -> tuple[int, int, int, str]: + """The format version as a tuple of major/minor/patch `int`s and a + suffix.""" + return parse_version(self.format_version) diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py new file mode 100644 index 00000000..3a06ea76 --- /dev/null +++ b/petab/v2/experiments.py @@ -0,0 +1,41 @@ +"""Functions operating on the PEtab experiments table.""" + +from pathlib import Path + +import pandas as pd + +__all__ = ["get_experiment_df", "write_experiment_df"] + + +def get_experiment_df( + experiments_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame | None: + """ + Read the provided experiments file into a ``pandas.Dataframe``. + + Arguments: + experiments_file: Name of the file to read from or pandas.Dataframe. + + Returns: + Experiments DataFrame + """ + + if isinstance(experiments_file, str | Path): + experiments_file = pd.read_csv( + experiments_file, sep="\t", float_precision="round_trip" + ) + + return experiments_file + + +def write_experiment_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab experiments table + + Arguments: + df: PEtab experiments table + filename: Destination file name. The parent directory will be created + if necessary. + """ + df = get_experiment_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) + df.to_csv(filename, sep="\t", index=False) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 87554e64..687d58f2 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -1,55 +1,21 @@ """Validation of PEtab problems""" + from __future__ import annotations import logging from abc import ABC, abstractmethod +from collections import Counter, OrderedDict +from collections.abc import Set from dataclasses import dataclass, field from enum import IntEnum +from itertools import chain from pathlib import Path -import numpy as np import pandas as pd +import sympy as sp -from petab.v1 import ( - assert_model_parameters_in_condition_or_parameter_table, -) -from petab.v1.C import ( - ESTIMATE, - MODEL_ENTITY_ID, - NOISE_PARAMETERS, - NOMINAL_VALUE, - OBSERVABLE_PARAMETERS, - PARAMETER_DF_REQUIRED_COLS, - PARAMETER_ID, -) -from petab.v1.conditions import get_parametric_overrides -from petab.v1.lint import ( - _check_df, - assert_no_leading_trailing_whitespace, - assert_parameter_bounds_are_numeric, - assert_parameter_estimate_is_boolean, - assert_parameter_id_is_string, - assert_parameter_prior_parameters_are_valid, - assert_parameter_prior_type_is_valid, - assert_parameter_scale_is_valid, - assert_unique_parameter_ids, - check_ids, - check_parameter_bounds, -) -from petab.v1.measurements import split_parameter_replacement_list -from petab.v1.observables import get_output_parameters, get_placeholders -from petab.v1.parameters import ( - get_valid_parameters_for_parameter_table, -) -from petab.v1.visualize.lint import validate_visualization_df - -from ..v1 import ( - assert_measurement_conditions_present_in_condition_table, - check_condition_df, - check_measurement_df, - check_observable_df, -) -from .problem import Problem +from ..v2.C import * +from .core import PriorDistribution, Problem logger = logging.getLogger(__name__) @@ -60,14 +26,24 @@ "ValidationError", "ValidationTask", "CheckModel", - "CheckTableExists", - "CheckMeasurementTable", - "CheckConditionTable", - "CheckObservableTable", - "CheckParameterTable", + "CheckProblemConfig", + "CheckMeasuredObservablesDefined", + "CheckOverridesMatchPlaceholders", + "CheckMeasuredExperimentsDefined", + "CheckMeasurementModelId", + "CheckPosLogMeasurements", + "CheckValidConditionTargets", + "CheckUniquePrimaryKeys", + "CheckExperimentTable", + "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", - "CheckVisualizationTable", + "CheckUnusedExperiments", + "CheckObservablesDoNotShadowModelEntities", + "CheckUnusedConditions", + "CheckPriorDistribution", + "CheckUndefinedExperiments", + "CheckInitialChangeSymbols", "lint_problem", "default_validation_tasks", ] @@ -97,6 +73,7 @@ class ValidationIssue: level: ValidationIssueSeverity message: str + task: str | None = None def __post_init__(self): if not isinstance(self.level, ValidationIssueSeverity): @@ -107,6 +84,23 @@ def __post_init__(self): def __str__(self): return f"{self.level.name}: {self.message}" + @staticmethod + def _get_task_name() -> str | None: + """Get the name of the ValidationTask that raised this error. + + Expected to be called from below a `ValidationTask.run`. + """ + import inspect + + # walk up the stack until we find the ValidationTask.run method + for frame_info in inspect.stack(): + frame = frame_info.frame + if "self" in frame.f_locals: + task = frame.f_locals["self"] + if isinstance(task, ValidationTask): + return task.__class__.__name__ + return None + @dataclass class ValidationError(ValidationIssue): @@ -116,6 +110,23 @@ class ValidationError(ValidationIssue): default=ValidationIssueSeverity.ERROR, init=False ) + def __post_init__(self): + if self.task is None: + self.task = self._get_task_name() + + +@dataclass +class ValidationWarning(ValidationIssue): + """A validation result with level WARNING.""" + + level: ValidationIssueSeverity = field( + default=ValidationIssueSeverity.WARNING, init=False + ) + + def __post_init__(self): + if self.task is None: + self.task = self._get_task_name() + class ValidationResultList(list[ValidationIssue]): """A list of validation results. @@ -128,17 +139,25 @@ def log( *, logger: logging.Logger = logger, min_level: ValidationIssueSeverity = ValidationIssueSeverity.INFO, + max_level: ValidationIssueSeverity = ValidationIssueSeverity.CRITICAL, ): - """Log the validation results.""" + """Log the validation results. + + :param logger: The logger to use for logging. + Defaults to the module logger. + :param min_level: The minimum severity level to log. + :param max_level: The maximum severity level to log. + """ for result in self: - if result.level < min_level: + if result.level < min_level or result.level > max_level: continue + msg = f"{result.level.name}: {result.message} [{result.task}]" if result.level == ValidationIssueSeverity.INFO: - logger.info(result.message) + logger.info(msg) elif result.level == ValidationIssueSeverity.WARNING: - logger.warning(result.message) + logger.warning(msg) elif result.level >= ValidationIssueSeverity.ERROR: - logger.error(result.message) + logger.error(msg) if not self: logger.info("PEtab format check completed successfully.") @@ -156,7 +175,7 @@ def lint_problem(problem: Problem | str | Path) -> ValidationResultList: Arguments: problem: PEtab problem to check. Instance of :class:`Problem` or path - to a PEtab problem yaml file. + to a PEtab problem YAML file. Returns: A list of validation results. Empty if no issues were found. """ @@ -184,6 +203,40 @@ def __call__(self, *args, **kwargs): return self.run(*args, **kwargs) +class CheckProblemConfig(ValidationTask): + """A task to validate the configuration of a PEtab problem. + + This corresponds to checking the problem YAML file semantics. + """ + + def run(self, problem: Problem) -> ValidationIssue | None: + if (config := problem.config) is None or config.base_path is None: + # This is allowed, so we can validate in-memory problems + # that don't have the list of files populated + return None + # TODO: decide when this should be emitted + # return ValidationWarning("Problem configuration is missing.") + + # TODO: we need some option for validating partial vs full problems + # check for unset but required files + missing_files = [] + if not config.parameter_files: + missing_files.append("parameters") + + if not config.measurement_files: + missing_files.append("measurements") + + if not config.observable_files: + missing_files.append("observables") + + if missing_files: + return ValidationError( + f"Missing files: {', '.join(missing_files)}" + ) + + return None + + class CheckModel(ValidationTask): """A task to validate the model of a PEtab problem.""" @@ -195,165 +248,305 @@ def run(self, problem: Problem) -> ValidationIssue | None: # TODO get actual model validation messages return ValidationError("Model is invalid.") + return None -class CheckTableExists(ValidationTask): - """A task to check if a table exists in the PEtab problem.""" - def __init__(self, table_name: str): - if table_name not in ["measurement", "observable", "parameter"]: - # all others are optional - raise ValueError( - f"Table name {table_name} is not supported. " - "Supported table names are 'measurement', 'observable', " - "'parameter'." - ) - self.table_name = table_name +class CheckMeasuredObservablesDefined(ValidationTask): + """A task to check that all observables referenced by the measurements + are defined.""" def run(self, problem: Problem) -> ValidationIssue | None: - if getattr(problem, f"{self.table_name}_df") is None: - return ValidationError(f"{self.table_name} table is missing.") + used_observables = {m.observable_id for m in problem.measurements} + defined_observables = {o.id for o in problem.observables} + if undefined_observables := (used_observables - defined_observables): + return ValidationError( + f"Observable(s) {undefined_observables} are used in the " + "measurement table but are not defined in the observable " + "table." + ) + + return None -class CheckMeasurementTable(ValidationTask): - """A task to validate the measurement table of a PEtab problem.""" +class CheckOverridesMatchPlaceholders(ValidationTask): + """A task to check that the number of observable/noise parameters + in the measurements matches the number of placeholders in the observables. + """ def run(self, problem: Problem) -> ValidationIssue | None: - if problem.measurement_df is None: - return + observable_parameters_count = { + o.id: len(o.observable_placeholders) for o in problem.observables + } + noise_parameters_count = { + o.id: len(o.noise_placeholders) for o in problem.observables + } + messages = [] + observables = {o.id: o for o in problem.observables} + for m in problem.measurements: + # check observable parameters + try: + expected = observable_parameters_count[m.observable_id] + except KeyError: + messages.append( + f"Observable {m.observable_id} is used in the measurement " + f"table but is not defined in the observable table." + ) + continue - try: - check_measurement_df(problem.measurement_df, problem.observable_df) + actual = len(m.observable_parameters) - if problem.condition_df is not None: - # TODO: handle missing condition_df - assert_measurement_conditions_present_in_condition_table( - problem.measurement_df, problem.condition_df + if actual != expected: + formula = observables[m.observable_id].formula + messages.append( + f"Mismatch of observable parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" ) - except AssertionError as e: - return ValidationError(str(e)) + + # check noise parameters + expected = noise_parameters_count[m.observable_id] + actual = len(m.noise_parameters) + if actual != expected: + # no overrides defined, but a numerical sigma can be provided + # anyway + if len(m.noise_parameters) != 1 or ( + len(m.noise_parameters) == 1 + and m.noise_parameters[0].is_number + ): + messages.append( + "No placeholders have been specified in the " + f"noise model for observable {m.observable_id}, " + "but a parameter ID " + "or multiple overrides were specified in the " + "noiseParameters column." + ) + else: + formula = observables[m.observable_id].noise_formula + messages.append( + f"Mismatch of noise parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None + + +class CheckPosLogMeasurements(ValidationTask): + """Check that measurements for observables with + log-transformation are positive.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + from .core import NoiseDistribution as ND # noqa: N813 + + log_observables = { + o.id + for o in problem.observables + if o.noise_distribution in [ND.LOG_NORMAL, ND.LOG_LAPLACE] + } + if log_observables: + for m in problem.measurements: + if m.measurement <= 0 and m.observable_id in log_observables: + return ValidationError( + "Measurements with observable " + f"log transformation must be " + f"positive, but {m.measurement} <= 0 for {m}" + ) + + return None + + +class CheckMeasuredExperimentsDefined(ValidationTask): + """A task to check that all experiments referenced by measurements + are defined.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + # TODO: introduce some option for validation of partial vs full + # problem. if this is supposed to be a complete problem, a missing + # condition table should be an error if the measurement table refers + # to conditions, otherwise it should maximally be a warning + used_experiments = { + m.experiment_id + for m in problem.measurements + if m.experiment_id is not None + } + + # check that measured experiments exist + available_experiments = {e.id for e in problem.experiments} + if missing_experiments := (used_experiments - available_experiments): + return ValidationError( + "Measurement table references experiments that " + "are not specified in the experiments table: " + + str(missing_experiments) + ) + + return None -class CheckConditionTable(ValidationTask): - """A task to validate the condition table of a PEtab problem.""" +class CheckValidConditionTargets(ValidationTask): + """Check that all condition table targets are valid.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.condition_df is None: - return - - try: - check_condition_df( - problem.condition_df, - model=problem.model, - observable_df=problem.observable_df, - mapping_df=problem.mapping_df, + allowed_targets = ( + set(problem.model.get_valid_ids_for_condition_table()) + if problem.model + else set() + ) + allowed_targets |= set(problem.get_output_parameters()) + allowed_targets |= { + m.petab_id for m in problem.mappings if m.model_id is not None + } + + used_targets = { + change.target_id + for cond in problem.conditions + for change in cond.changes + } + + if invalid := (used_targets - allowed_targets): + return ValidationError( + f"Condition table contains invalid targets: {invalid}" ) - except AssertionError as e: - return ValidationError(str(e)) + # Check that changes of simultaneously applied conditions don't + # intersect + for experiment in problem.experiments: + for period in experiment.periods: + if not period.condition_ids: + continue + period_targets = set() + for condition_id in period.condition_ids: + condition_targets = { + change.target_id + for cond in problem.conditions + if cond.id == condition_id + for change in cond.changes + } + if invalid := (period_targets & condition_targets): + return ValidationError( + "Simultaneously applied conditions for experiment " + f"{experiment.id} have overlapping targets " + f"{invalid} at time {period.time}." + ) + period_targets |= condition_targets + return None + + +class CheckUniquePrimaryKeys(ValidationTask): + """Check that all primary keys are unique.""" -class CheckObservableTable(ValidationTask): - """A task to validate the observable table of a PEtab problem.""" + def run(self, problem: Problem) -> ValidationIssue | None: + # TODO: check that IDs are globally unique + # -- replaces CheckObservablesDoNotShadowModelEntities - def run(self, problem: Problem): - if problem.observable_df is None: - return + # check for uniqueness of all primary keys + counter = Counter(c.id for c in problem.conditions) + duplicates = {id_ for id_, count in counter.items() if count > 1} - try: - check_observable_df( - problem.observable_df, + if duplicates: + return ValidationError( + f"Condition table contains duplicate IDs: {duplicates}" ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + + counter = Counter(o.id for o in problem.observables) + duplicates = {id_ for id_, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Observable table contains duplicate IDs: {duplicates}" ) + counter = Counter(e.id for e in problem.experiments) + duplicates = {id_ for id_, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Experiment table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(p.id for p in problem.parameters) + duplicates = {id_ for id_, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Parameter table contains duplicate IDs: {duplicates}" + ) + + return None + class CheckObservablesDoNotShadowModelEntities(ValidationTask): """A task to check that observable IDs do not shadow model entities.""" + # TODO: all PEtab entity IDs must be disjoint from the model entity IDs def run(self, problem: Problem) -> ValidationIssue | None: - if problem.observable_df is None or problem.model is None: - return + if not problem.observables or problem.model is None: + return None shadowed_entities = [ - obs_id - for obs_id in problem.observable_df.index - if problem.model.has_entity_with_id(obs_id) + o.id + for o in problem.observables + if problem.model.has_entity_with_id(o.id) ] if shadowed_entities: return ValidationError( f"Observable IDs {shadowed_entities} shadow model entities." ) + return None + -class CheckParameterTable(ValidationTask): - """A task to validate the parameter table of a PEtab problem.""" +class CheckExperimentTable(ValidationTask): + """A task to validate the experiment table of a PEtab problem.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.parameter_df is None: - return + messages = [] + for experiment in problem.experiments: + # Check that there are no duplicate timepoints + counter = Counter(period.time for period in experiment.periods) + duplicates = {time for time, count in counter.items() if count > 1} + if duplicates: + messages.append( + f"Experiment {experiment.id} contains duplicate " + f"timepoints: {duplicates}" + ) - try: - df = problem.parameter_df - _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") + if messages: + return ValidationError("\n".join(messages)) - if df.index.name != PARAMETER_ID: - return ValidationError( - f"Parameter table has wrong index {df.index.name}." - f" Expected {PARAMETER_ID}.", - ) + return None - check_ids(df.index.values, kind="parameter") - for column_name in PARAMETER_DF_REQUIRED_COLS[ - 1: - ]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) +class CheckExperimentConditionsExist(ValidationTask): + """A task to validate that all conditions in the experiment table exist + in the condition table.""" - # nominal value is required for non-estimated parameters - non_estimated_par_ids = list( - df.index[ - (df[ESTIMATE] != 1) - | ( - pd.api.types.is_string_dtype(df[ESTIMATE]) - and df[ESTIMATE] != "1" + def run(self, problem: Problem) -> ValidationIssue | None: + messages = [] + available_conditions = {c.id for c in problem.conditions} + for experiment in problem.experiments: + missing_conditions = ( + set( + chain.from_iterable( + period.condition_ids for period in experiment.periods ) - ] + ) + - available_conditions ) - # TODO implement as validators - # `assert_has_fixed_parameter_nominal_values` - # and `assert_correct_table_dtypes` - if non_estimated_par_ids: - if NOMINAL_VALUE not in df: - return ValidationError( - "Parameter table contains parameters " - f"{non_estimated_par_ids} that are not " - "specified to be estimated, " - f"but column {NOMINAL_VALUE} is missing." - ) - try: - df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) - except ValueError: - return ValidationError( - f"Expected numeric values for `{NOMINAL_VALUE}` " - "in parameter table " - "for all non-estimated parameters." - ) + if missing_conditions: + messages.append( + f"Experiment {experiment.id} requires conditions that are " + f"not present in the condition table: {missing_conditions}" + ) - assert_parameter_id_is_string(df) - assert_parameter_scale_is_valid(df) - assert_parameter_bounds_are_numeric(df) - assert_parameter_estimate_is_boolean(df) - assert_unique_parameter_ids(df) - check_parameter_bounds(df) - assert_parameter_prior_type_is_valid(df) - assert_parameter_prior_parameters_are_valid(df) + if messages: + return ValidationError("\n".join(messages)) - except AssertionError as e: - return ValidationError(str(e)) + return None class CheckAllParametersPresentInParameterTable(ValidationTask): @@ -361,41 +554,25 @@ class CheckAllParametersPresentInParameterTable(ValidationTask): with no additional ones.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.parameter_df is None - or problem.observable_df is None - or problem.measurement_df is None - ): - return + if problem.model is None: + return None required = get_required_parameters_for_parameter_table(problem) + allowed = get_valid_parameters_for_parameter_table(problem) - allowed = get_valid_parameters_for_parameter_table( - model=problem.model, - condition_df=problem.condition_df, - observable_df=problem.observable_df, - measurement_df=problem.measurement_df, - mapping_df=problem.mapping_df, - ) - - actual = set(problem.parameter_df.index) + actual = {p.id for p in problem.parameters} missing = required - actual extraneous = actual - allowed # missing parameters might be present under a different name based on # the mapping table - if missing and problem.mapping_df is not None: + if missing: model_to_petab_mapping = {} - for map_from, map_to in zip( - problem.mapping_df.index.values, - problem.mapping_df[MODEL_ENTITY_ID], - strict=True, - ): - if map_to in model_to_petab_mapping: - model_to_petab_mapping[map_to].append(map_from) + for m in problem.mappings: + if m.model_id in model_to_petab_mapping: + model_to_petab_mapping[m.model_id].append(m.petab_id) else: - model_to_petab_mapping[map_to] = [map_from] + model_to_petab_mapping[m.model_id] = [m.petab_id] missing = { missing_id for missing_id in missing @@ -418,51 +595,379 @@ def run(self, problem: Problem) -> ValidationIssue | None: + str(extraneous) ) + return None + class CheckValidParameterInConditionOrParameterTable(ValidationTask): """A task to check that all required and only allowed model parameters are present in the condition or parameter table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.condition_df is None - or problem.parameter_df is None - ): - return - - try: - assert_model_parameters_in_condition_or_parameter_table( - problem.model, - problem.condition_df, - problem.parameter_df, - problem.mapping_df, + if problem.model is None: + return None + + allowed_in_condition_cols = set( + problem.model.get_valid_ids_for_condition_table() + ) + allowed_in_condition_cols |= { + m.petab_id + for m in problem.mappings + if not pd.isna(m.model_id) + and ( + # mapping table entities mapping to already allowed parameters + m.model_id in allowed_in_condition_cols + # mapping table entities mapping to species + or problem.model.is_state_variable(m.model_id) + ) + } + + allowed_in_parameter_table = get_valid_parameters_for_parameter_table( + problem + ) + + entities_in_condition_table = { + change.target_id + for cond in problem.conditions + for change in cond.changes + } + entities_in_parameter_table = {p.id for p in problem.parameters} + + disallowed_in_condition = { + x + for x in (entities_in_condition_table - allowed_in_condition_cols) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + if disallowed_in_condition: + is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" + return ValidationError( + f"{disallowed_in_condition} {is_or_are} not " + "allowed to occur in condition table " + "columns." + ) + + disallowed_in_parameters = { + x + for x in (entities_in_parameter_table - allowed_in_parameter_table) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + + if disallowed_in_parameters: + is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" + return ValidationError( + f"{disallowed_in_parameters} {is_or_are} not " + "allowed to occur in the parameters table." + ) + + in_both = entities_in_condition_table & entities_in_parameter_table + if in_both: + is_or_are = "is" if len(in_both) == 1 else "are" + return ValidationError( + f"{in_both} {is_or_are} present in both " + "the condition table and the parameter table." + ) + + return None + + +class CheckUnusedExperiments(ValidationTask): + """A task to check for experiments that are not used in the measurement + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_experiments = { + m.experiment_id + for m in problem.measurements + if m.experiment_id is not None + } + available_experiments = {e.id for e in problem.experiments} + + unused_experiments = available_experiments - used_experiments + if unused_experiments: + return ValidationWarning( + f"Experiments {unused_experiments} are not used in the " + "measurements table." ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + + return None + + +class CheckUndefinedExperiments(ValidationTask): + """A task to check for experiments that are used in the measurement + table but not defined in the experiment table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_experiments = { + m.experiment_id + for m in problem.measurements + if m.experiment_id is not None + } + available_experiments = {e.id for e in problem.experiments} + + if undefined_experiments := used_experiments - available_experiments: + return ValidationWarning( + f"Experiments {undefined_experiments} are used in the " + "measurements table but are not defined in the experiments " + "table." ) + return None + -class CheckVisualizationTable(ValidationTask): - """A task to validate the visualization table of a PEtab problem.""" +class CheckUnusedConditions(ValidationTask): + """A task to check for conditions that are not used in the experiment + table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.visualization_df is None: - return + used_conditions = set( + chain.from_iterable( + p.condition_ids for e in problem.experiments for p in e.periods + ) + ) + available_conditions = {c.id for c in problem.conditions} - if validate_visualization_df(problem): - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, - message="Visualization table is invalid.", + unused_conditions = available_conditions - used_conditions + if unused_conditions: + return ValidationWarning( + f"Conditions {unused_conditions} are not used in the " + "experiments table." ) + return None -def get_required_parameters_for_parameter_table( + +class CheckInitialChangeSymbols(ValidationTask): + """ + Check that changes of any first period of any experiment only refers to + allowed symbols. + + The only allowed symbols are those that are present in the parameter table. + """ + + def run(self, problem: Problem) -> ValidationIssue | None: + if not problem.experiments: + return None + + if not problem.conditions: + return None + + allowed_symbols = {p.id for p in problem.parameters} + allowed_symbols.add(TIME_SYMBOL) + # IDs of conditions that have already been checked + valid_conditions = set() + id_to_condition = {c.id: c for c in problem.conditions} + + messages = [] + for experiment in problem.experiments: + if not experiment.periods: + continue + + first_period = experiment.sorted_periods[0] + for condition_id in first_period.condition_ids: + if condition_id in valid_conditions: + continue + + try: + condition = id_to_condition[condition_id] + except KeyError: + messages.append( + f"Unable to validate changes for condition " + f"{condition_id} applied at the start of " + f"experiment {experiment.id}, as the condition " + "does not exist." + ) + + used_symbols = { + str(sym) + for change in condition.changes + for sym in change.target_value.free_symbols + } + invalid_symbols = used_symbols - allowed_symbols + if invalid_symbols: + messages.append( + f"Condition {condition.id} is applied at the start of " + f"experiment {experiment.id}, and thus, its " + f"target value expressions must only contain " + f"symbols from the parameter table, or `time`. " + "However, it contains additional symbols: " + f"{invalid_symbols}. " + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None + + +class CheckPriorDistribution(ValidationTask): + """A task to validate the prior distribution of a PEtab problem.""" + + _num_pars = { + PriorDistribution.CAUCHY: 2, + PriorDistribution.CHI_SQUARED: 1, + PriorDistribution.EXPONENTIAL: 1, + PriorDistribution.GAMMA: 2, + PriorDistribution.LAPLACE: 2, + PriorDistribution.LOG_LAPLACE: 2, + PriorDistribution.LOG_NORMAL: 2, + PriorDistribution.LOG_UNIFORM: 2, + PriorDistribution.NORMAL: 2, + PriorDistribution.RAYLEIGH: 1, + PriorDistribution.UNIFORM: 2, + } + + def run(self, problem: Problem) -> ValidationIssue | None: + messages = [] + for parameter in problem.parameters: + if parameter.prior_distribution is None: + continue + + if parameter.prior_distribution not in PRIOR_DISTRIBUTIONS: + messages.append( + f"Prior distribution `{parameter.prior_distribution}' " + f"for parameter `{parameter.id}' is not valid." + ) + continue + + if ( + exp_num_par := self._num_pars[parameter.prior_distribution] + ) != len(parameter.prior_parameters): + messages.append( + f"Prior distribution `{parameter.prior_distribution}' " + f"for parameter `{parameter.id}' requires " + f"{exp_num_par} parameters, but got " + f"{len(parameter.prior_parameters)} " + f"({parameter.prior_parameters})." + ) + + # TODO: check distribution parameter domains more specifically + try: + if parameter.estimate and parameter.prior_dist is not None: + # .prior_dist fails for non-estimated parameters + _ = parameter.prior_dist.sample(1) + except Exception as e: + messages.append( + f"Prior parameters `{parameter.prior_parameters}' " + f"for parameter `{parameter.id}' are invalid " + f"(hint: {e})." + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None + + +class CheckMeasurementModelId(ValidationTask): + """Validate model IDs of measurements.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + messages = [] + available_models = {m.model_id for m in problem.models} + + for measurement in problem.measurements: + if not measurement.model_id: + if len(available_models) < 2: + # If there is only one model, it is not required to specify + # the model ID in the measurement table. + continue + + messages.append( + f"Measurement `{measurement}' does not have a model ID, " + "but there are multiple models available. " + "Please specify the model ID in the measurement table." + ) + continue + + if measurement.model_id not in available_models: + messages.append( + f"Measurement `{measurement}' has model ID " + f"`{measurement.model_id}' which does not match " + "any of the available models: " + f"{available_models}." + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None + + +def get_valid_parameters_for_parameter_table( problem: Problem, ) -> set[str]: """ - Get set of parameters which need to go into the parameter table + Get the set of parameters which may be present inside the parameter table + + :param problem: The PEtab problem + + Returns: + Set of parameter IDs which PEtab allows to be present in the + parameter table. + """ + # - grab all allowed model parameters + # - grab corresponding names from mapping table + # - grab all output parameters defined in {observable,noise}Formula + # - grab all parameters from measurement table + # - grab all parametric overrides from condition table + # - remove parameters for which condition table columns exist + # - remove placeholder parameters + # (only partial overrides are not supported) + + # must not go into parameter table + invalid = set(get_placeholders(problem)) + + # condition table targets + invalid |= { + change.target_id + for cond in problem.conditions + for change in cond.changes + } + + # don't use sets here, to have deterministic ordering, + # e.g., for creating parameter tables + parameter_ids = OrderedDict.fromkeys( + p + for p in problem.model.get_valid_parameters_for_parameter_table() + if p not in invalid + ) + + for mapping in problem.mappings: + if mapping.model_id and mapping.model_id in parameter_ids.keys(): + parameter_ids[mapping.petab_id] = None + + # add output parameters from observable table + output_parameters = problem.get_output_parameters() + for p in output_parameters: + if p not in invalid: + parameter_ids[p] = None + + # Append parameters from measurement table, unless they occur as condition + # table columns + def append_overrides(overrides): + for p in overrides: + if isinstance(p, sp.Symbol) and (str_p := str(p)) not in invalid: + parameter_ids[str_p] = None + + for measurement in problem.measurements: + # we trust that the number of overrides matches + append_overrides(measurement.observable_parameters) + append_overrides(measurement.noise_parameters) + + # Append parameter overrides from condition table + for ct in problem.condition_tables: + for p in ct.free_symbols: + parameter_ids[str(p)] = None + + return set(parameter_ids.keys()) + + +def get_required_parameters_for_parameter_table( + problem: Problem, +) -> Set[str]: + """ + Get the set of parameters that need to go into the parameter table Arguments: problem: The PEtab problem @@ -473,95 +978,117 @@ def get_required_parameters_for_parameter_table( that are not defined in the model. """ parameter_ids = set() + condition_targets = { + change.target_id + for cond in problem.conditions + for change in cond.changes + } # Add parameters from measurement table, unless they are fixed parameters def append_overrides(overrides): parameter_ids.update( - p + str_p for p in overrides - if isinstance(p, str) and p not in problem.condition_df.columns + if isinstance(p, sp.Symbol) + and (str_p := str(p)) not in condition_targets ) - for _, row in problem.measurement_df.iterrows(): + for m in problem.measurements: # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list(row.get(NOISE_PARAMETERS, None)) - ) - - # remove `observable_ids` when - # `get_output_parameters` is updated for PEtab v2/v1.1, where - # observable IDs are allowed in observable formulae - observable_ids = set(problem.observable_df.index) + append_overrides(m.observable_parameters) + append_overrides(m.noise_parameters) # Add output parameters except for placeholders for formula_type, placeholder_sources in ( ( # Observable formulae - {"observables": True, "noise": False}, + {"observable": True, "noise": False}, # can only contain observable placeholders - {"noise": False, "observables": True}, + {"noise": False, "observable": True}, ), ( # Noise formulae - {"observables": False, "noise": True}, + {"observable": False, "noise": True}, # can contain noise and observable placeholders - {"noise": True, "observables": True}, + {"noise": True, "observable": True}, ), ): - output_parameters = get_output_parameters( - problem.observable_df, - problem.model, - mapping_df=problem.mapping_df, + output_parameters = problem.get_output_parameters( **formula_type, ) placeholders = get_placeholders( - problem.observable_df, + problem, **placeholder_sources, ) parameter_ids.update( - p - for p in output_parameters - if p not in placeholders and p not in observable_ids + p for p in output_parameters if p not in placeholders ) # Add condition table parametric overrides unless already defined in the # model parameter_ids.update( - p - for p in get_parametric_overrides(problem.condition_df) - if not problem.model.has_entity_with_id(p) + str(p) + for ct in problem.condition_tables + for p in ct.free_symbols + if not problem.model.has_entity_with_id(str(p)) ) - # remove parameters that occur in the condition table and are overridden - # for ALL conditions - for p in problem.condition_df.columns[ - ~problem.condition_df.isnull().any() - ]: - try: - parameter_ids.remove(p) - except KeyError: - pass + # parameters that are overridden via the condition table are not allowed + parameter_ids -= condition_targets return parameter_ids +def get_placeholders( + problem: Problem, + observable: bool = True, + noise: bool = True, +) -> list[str]: + """Get all placeholder parameters from observable table observableFormulas + and noiseFormulas. + + Arguments: + problem: The PEtab problem + observable: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of placeholder parameters from observable table observableFormulas + and noiseFormulas. + """ + # collect placeholder parameters overwritten by + # {observable,noise}Parameters + placeholders = [] + for o in problem.observables: + if observable: + placeholders.extend(map(str, o.observable_placeholders)) + if noise: + placeholders.extend(map(str, o.noise_placeholders)) + + from ..v1.core import unique_preserve_order + + return unique_preserve_order(placeholders) + + #: Validation tasks that should be run on any PEtab problem default_validation_tasks = [ - CheckTableExists("measurement"), - CheckTableExists("observable"), - CheckTableExists("parameter"), + CheckProblemConfig(), CheckModel(), - CheckMeasurementTable(), - CheckConditionTable(), - CheckObservableTable(), + CheckUniquePrimaryKeys(), + CheckMeasurementModelId(), + CheckMeasuredObservablesDefined(), + CheckPosLogMeasurements(), + CheckOverridesMatchPlaceholders(), + CheckValidConditionTargets(), + CheckExperimentTable(), + CheckExperimentConditionsExist(), + CheckUndefinedExperiments(), CheckObservablesDoNotShadowModelEntities(), - CheckParameterTable(), CheckAllParametersPresentInParameterTable(), - CheckVisualizationTable(), CheckValidParameterInConditionOrParameterTable(), + CheckUnusedExperiments(), + CheckUnusedConditions(), + CheckPriorDistribution(), + CheckInitialChangeSymbols(), + # TODO validate mapping table ] diff --git a/petab/v2/math/__init__.py b/petab/v2/math/__init__.py new file mode 100644 index 00000000..8a5a5559 --- /dev/null +++ b/petab/v2/math/__init__.py @@ -0,0 +1,3 @@ +"""Functions for parsing and evaluating mathematical expressions.""" + +from petab.v1.math import * # noqa: F401 diff --git a/petab/v2/models/__init__.py b/petab/v2/models/__init__.py index a387c27b..79ec7639 100644 --- a/petab/v2/models/__init__.py +++ b/petab/v2/models/__init__.py @@ -1,2 +1,3 @@ """Handling of different model types supported by PEtab.""" + from ...v1.models import * # noqa: F401, F403 diff --git a/petab/v2/models/_sbml_utils.py b/petab/v2/models/_sbml_utils.py new file mode 100644 index 00000000..cbccde2b --- /dev/null +++ b/petab/v2/models/_sbml_utils.py @@ -0,0 +1,51 @@ +"""Private utility functions for SBML handling.""" + +import libsbml + +retval_to_str = { + getattr(libsbml, attr): attr + for attr in ( + "LIBSBML_DUPLICATE_OBJECT_ID", + "LIBSBML_INDEX_EXCEEDS_SIZE", + "LIBSBML_INVALID_ATTRIBUTE_VALUE", + "LIBSBML_INVALID_OBJECT", + "LIBSBML_INVALID_XML_OPERATION", + "LIBSBML_LEVEL_MISMATCH", + "LIBSBML_NAMESPACES_MISMATCH", + "LIBSBML_OPERATION_FAILED", + "LIBSBML_UNEXPECTED_ATTRIBUTE", + "LIBSBML_PKG_UNKNOWN", + "LIBSBML_PKG_VERSION_MISMATCH", + "LIBSBML_PKG_CONFLICTED_VERSION", + ) +} + + +def check(res: int): + """Check the return value of a libsbml function that returns a status code. + + :param res: The return value to check. + :raises RuntimeError: If the return value indicates an error. + """ + if res != libsbml.LIBSBML_OPERATION_SUCCESS: + raise RuntimeError(f"libsbml error: {retval_to_str.get(res, res)}") + + +def add_sbml_parameter( + model: libsbml.Model, + id_: str, + value: float = None, + constant: bool = None, +) -> libsbml.Parameter: + """Add a parameter to the SBML model.""" + param = model.createParameter() + + check(param.setId(id_)) + + if value is not None: + check(param.setValue(value)) + + if constant is not None: + check(param.setConstant(constant)) + + return param diff --git a/petab/v2/models/model.py b/petab/v2/models/model.py index 403a03e2..345247eb 100644 --- a/petab/v2/models/model.py +++ b/petab/v2/models/model.py @@ -1,2 +1,3 @@ """PEtab model abstraction""" + from ...v1.models.model import * # noqa: F401, F403 diff --git a/petab/v2/models/pysb_model.py b/petab/v2/models/pysb_model.py index 111c9864..4da866e7 100644 --- a/petab/v2/models/pysb_model.py +++ b/petab/v2/models/pysb_model.py @@ -1,2 +1,3 @@ """Functions for handling PySB models""" + from ...v1.models.pysb_model import * # noqa: F401, F403 diff --git a/petab/v2/models/sbml_model.py b/petab/v2/models/sbml_model.py index 2a0eadc7..b696ce31 100644 --- a/petab/v2/models/sbml_model.py +++ b/petab/v2/models/sbml_model.py @@ -1,2 +1,3 @@ """Functions for handling SBML models""" + from ...v1.models.sbml_model import * # noqa: F401, F403 diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 86cbe49c..de809acf 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,48 +1,77 @@ """Convert PEtab version 1 problems to version 2.""" + +from __future__ import annotations + +import re import shutil -from itertools import chain +import warnings +from contextlib import suppress from pathlib import Path +from tempfile import TemporaryDirectory +from urllib.parse import urlparse +from uuid import uuid4 +import pandas as pd from pandas.io.common import get_handle, is_url -import petab.v1.C as C -from petab.models import MODEL_TYPE_SBML -from petab.v1 import Problem as ProblemV1 -from petab.v2.lint import lint_problem as lint_v2_problem -from petab.yaml import get_path_prefix - -from ..v1 import lint_problem as lint_v1_problem -from ..v1.yaml import load_yaml, validate, write_yaml +from .. import v1, v2 +from ..v1.math import sympify_petab +from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version +from .models import MODEL_TYPE_SBML __all__ = ["petab1to2"] -def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): +def petab1to2( + yaml_config: Path | str, output_dir: Path | str = None +) -> v2.Problem | None: """Convert from PEtab 1.0 to PEtab 2.0 format. Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. - Parameters - ---------- - yaml_config: dict | Path | str + .. note:: + + Some aspects of PEtab v1 were not well-defined. For example, model + initialization order (e.g., applying initial assignments before or + after condition table overrides) and the impact of compartment size + changes were not specified. In such cases, we made assumptions that are + consistent with the clarified PEtab v2 specifications, + the PEtab test suite, or common practice. + Therefore, it is recommended to carefully review the generated PEtab v2 + problem to ensure it aligns with the expected behavior. + + :param yaml_config: The PEtab problem as dictionary or YAML file name. - output_dir: Path | str + :param output_dir: The output directory to save the converted PEtab problem, or ``None``, to return a :class:`petab.v2.Problem` instance. - Raises - ------ - ValueError + :raises ValueError: If the input is invalid or does not pass linting or if the generated files do not pass linting. """ - if output_dir is None: - # TODO requires petab.v2.Problem - raise NotImplementedError("Not implemented yet.") - elif isinstance(yaml_config, dict): - raise ValueError("If output_dir is given, yaml_config must be a file.") + if output_dir is not None: + return petab_files_1to2(yaml_config, output_dir) + + with TemporaryDirectory() as tmp_dir: + petab_files_1to2(yaml_config, tmp_dir) + return v2.Problem.from_yaml(Path(tmp_dir, Path(yaml_config).name)) + +def petab_files_1to2(yaml_config: Path | str | dict, output_dir: Path | str): + """Convert PEtab files from PEtab 1.0 to PEtab 2.0. + + + :param yaml_config: + The PEtab problem as dictionary or YAML file name. + :param output_dir: + The output directory to save the converted PEtab problem. + + :raises ValueError: + If the input is invalid or does not pass linting or if the generated + files do not pass linting. + """ if isinstance(yaml_config, Path | str): yaml_file = str(yaml_config) path_prefix = get_path_prefix(yaml_file) @@ -55,54 +84,192 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): get_dest_path = lambda filename: f"{output_dir}/{filename}" # noqa: E731 - # Validate original PEtab problem + # Validate the original PEtab problem validate(yaml_config, path_prefix=path_prefix) if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") - petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config) - if lint_v1_problem(petab_problem): - raise ValueError("PEtab problem does not pass linting.") + petab_problem = v1.Problem.from_yaml(yaml_file or yaml_config) + # TODO: move to mapping table + # get rid of conditionName column if present (unsupported in v2) + petab_problem.condition_df = petab_problem.condition_df.drop( + columns=[v1.C.CONDITION_NAME], errors="ignore" + ) + if v1.lint_problem(petab_problem): + raise ValueError("Provided PEtab problem does not pass linting.") + + output_dir = Path(output_dir) # Update YAML file new_yaml_config = _update_yaml(yaml_config) - - # Write new YAML file - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - new_yaml_file = output_dir / Path(yaml_file).name - write_yaml(new_yaml_config, new_yaml_file) + new_yaml_config = v2.ProblemConfig(**new_yaml_config) # Update tables - # condition tables, observable tables, SBML files, parameter table: - # no changes - just copy - file = yaml_config[C.PARAMETER_FILE] - _copy_file(get_src_path(file), get_dest_path(file)) - - for problem_config in yaml_config[C.PROBLEMS]: - for file in chain( - problem_config.get(C.CONDITION_FILES, []), - problem_config.get(C.OBSERVABLE_FILES, []), - ( - model[C.MODEL_LOCATION] - for model in problem_config.get(C.MODEL_FILES, {}).values() - ), - problem_config.get(C.MEASUREMENT_FILES, []), - problem_config.get(C.VISUALIZATION_FILES, []), + + # parameter table + parameter_df = v1v2_parameter_df(petab_problem.parameter_df.copy()) + v2.write_parameter_df( + parameter_df, get_dest_path(new_yaml_config.parameter_files[0]) + ) + + # copy files that don't need conversion: models + for file in ( + model.location for model in new_yaml_config.model_files.values() + ): + _copy_file(get_src_path(file), Path(get_dest_path(file))) + + # Update observable table + for observable_file in new_yaml_config.observable_files: + observable_df = v1.get_observable_df(get_src_path(observable_file)) + observable_df = v1v2_observable_df( + observable_df, + ) + v2.write_observable_df(observable_df, get_dest_path(observable_file)) + + # Update condition table + for condition_file in new_yaml_config.condition_files: + condition_df = v1.get_condition_df(get_src_path(condition_file)) + condition_df = v1v2_condition_df(condition_df, petab_problem.model) + v2.write_condition_df(condition_df, get_dest_path(condition_file)) + + # records for the experiment table to be created + experiments = [] + + def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: + if not sim_cond_id and not preeq_cond_id: + return "" + # check whether the conditions will exist in the v2 condition table + sim_cond_exists = ( + petab_problem.condition_df.loc[sim_cond_id].notna().any() + ) + preeq_cond_exists = ( + preeq_cond_id + and petab_problem.condition_df.loc[preeq_cond_id].notna().any() + ) + if not sim_cond_exists and not preeq_cond_exists: + # if we have only all-NaN conditions, we don't create a new + # experiment + return "" + + if preeq_cond_id: + preeq_cond_id = f"{preeq_cond_id}_" + exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}" + if exp_id in experiments: # noqa: B023 + i = 1 + while f"{exp_id}_{i}" in experiments: # noqa: B023 + i += 1 + exp_id = f"{exp_id}_{i}" + return exp_id + + measured_experiments = ( + petab_problem.get_simulation_conditions_from_measurement_df() + ) + for ( + _, + row, + ) in measured_experiments.iterrows(): + # generate a new experiment for each simulation / pre-eq condition + # combination + sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID] + preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "") + exp_id = create_experiment_id(sim_cond_id, preeq_cond_id) + if not exp_id: + continue + if preeq_cond_id: + experiments.append( + { + v2.C.EXPERIMENT_ID: exp_id, + v2.C.TIME: v2.C.TIME_PREEQUILIBRATION, + v2.C.CONDITION_ID: preeq_cond_id, + } + ) + experiments.append( + { + v2.C.EXPERIMENT_ID: exp_id, + v2.C.TIME: 0, + v2.C.CONDITION_ID: sim_cond_id, + } + ) + if experiments: + exp_table_path = output_dir / "experiments.tsv" + if exp_table_path.exists(): + raise ValueError( + f"Experiment table file {exp_table_path} already exists." + ) + new_yaml_config.experiment_files.append("experiments.tsv") + v2.write_experiment_df( + v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path + ) + + for measurement_file in new_yaml_config.measurement_files: + measurement_df = v1.get_measurement_df(get_src_path(measurement_file)) + # if there is already an experiment ID column, we rename it + if v2.C.EXPERIMENT_ID in measurement_df.columns: + measurement_df.rename( + columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"}, + inplace=True, + ) + # add pre-eq condition id if not present or convert to string + # for simplicity + if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: + measurement_df.fillna( + {v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True + ) + else: + measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" + + if ( + petab_problem.condition_df is not None + and len( + set(petab_problem.condition_df.columns) - {v1.C.CONDITION_NAME} + ) + == 0 ): - _copy_file(get_src_path(file), get_dest_path(file)) + # we can't have "empty" conditions with no overrides in v2, + # therefore, we drop the respective condition ID completely + # TODO: or can we? + # TODO: this needs to be checked condition-wise, not globally + measurement_df[v1.C.SIMULATION_CONDITION_ID] = "" + if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: + measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" + # condition IDs to experiment IDs + measurement_df.insert( + 0, + v2.C.EXPERIMENT_ID, + measurement_df.apply( + lambda row: create_experiment_id( + row[v1.C.SIMULATION_CONDITION_ID], + row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""), + ), + axis=1, + ), + ) + del measurement_df[v1.C.SIMULATION_CONDITION_ID] + del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] + v2.write_measurement_df( + measurement_df, get_dest_path(measurement_file) + ) - # TODO: Measurements: preequilibration to experiments/timecourses once - # finalized - ... + # Write the new YAML file + new_yaml_file = output_dir / Path(yaml_file).name + new_yaml_config.to_yaml(new_yaml_file) # validate updated Problem - validation_issues = lint_v2_problem(new_yaml_file) + validation_issues = v2.lint_problem(new_yaml_file) if validation_issues: - raise ValueError( - "Generated PEtab v2 problem did not pass linting: " - f"{validation_issues}" + sev = v2.lint.ValidationIssueSeverity + validation_issues.log(max_level=sev.WARNING) + errors = "\n".join( + map( + str, + (i for i in validation_issues if i.level > sev.WARNING), + ) ) + if errors: + raise ValueError( + "The generated PEtab v2 problem did not pass linting: " + f"{errors}" + ) def _update_yaml(yaml_config: dict) -> dict: @@ -110,31 +277,52 @@ def _update_yaml(yaml_config: dict) -> dict: yaml_config = yaml_config.copy() # Update format_version - yaml_config[C.FORMAT_VERSION] = "2.0.0" + yaml_config[v2.C.FORMAT_VERSION] = "2.0.0" # Add extensions - yaml_config[C.EXTENSIONS] = [] + yaml_config[v2.C.EXTENSIONS] = {} # Move models and set IDs (filename for now) - for problem in yaml_config[C.PROBLEMS]: - problem[C.MODEL_FILES] = {} - models = problem[C.MODEL_FILES] - for sbml_file in problem[C.SBML_FILES]: + yaml_config[v2.C.MODEL_FILES] = {} + for problem in yaml_config[v1.C.PROBLEMS]: + models = {} + for sbml_file in problem[v1.C.SBML_FILES]: model_id = sbml_file.split("/")[-1].split(".")[0] models[model_id] = { - C.MODEL_LANGUAGE: MODEL_TYPE_SBML, - C.MODEL_LOCATION: sbml_file, + v2.C.MODEL_LANGUAGE: MODEL_TYPE_SBML, + v2.C.MODEL_LOCATION: sbml_file, } - problem[C.MODEL_FILES] = problem.get(C.MODEL_FILES, {}) - del problem[C.SBML_FILES] + yaml_config[v2.C.MODEL_FILES] |= models + del problem[v1.C.SBML_FILES] + + for file_type in ( + v1.C.CONDITION_FILES, + v1.C.MEASUREMENT_FILES, + v1.C.OBSERVABLE_FILES, + ): + if file_type in problem: + yaml_config[file_type] = problem[file_type] + del problem[file_type] + del yaml_config[v1.C.PROBLEMS] + + # parameter_file -> parameter_files + if not isinstance( + (par_files := yaml_config.pop(v1.C.PARAMETER_FILE, [])), list + ): + par_files = [par_files] + yaml_config[v2.C.PARAMETER_FILES] = par_files return yaml_config -def _copy_file(src: Path | str, dest: Path | str): +def _copy_file(src: Path | str, dest: Path): """Copy file.""" - src = str(src) - dest = str(dest) + # src might be a URL - convert to Path if local + src_url = urlparse(src) + if not src_url.scheme: + src = Path(src) + elif src_url.scheme == "file" and not src_url.netloc: + src = Path(src.removeprefix("file:/")) if is_url(src): with get_handle(src, mode="r") as src_handle: @@ -142,4 +330,237 @@ def _copy_file(src: Path | str, dest: Path | str): dest_handle.write(src_handle.handle.read()) return - shutil.copy(str(src), str(dest)) + try: + if dest.samefile(src): + return + except FileNotFoundError: + shutil.copy(str(src), str(dest)) + + +def v1v2_condition_df( + condition_df: pd.DataFrame, model: v1.Model +) -> pd.DataFrame: + """Convert condition table from petab v1 to v2.""" + condition_df = condition_df.copy().reset_index() + with suppress(KeyError): + # conditionName was dropped in PEtab v2 + condition_df.drop(columns=[v1.C.CONDITION_NAME], inplace=True) + + condition_df = condition_df.melt( + id_vars=[v1.C.CONDITION_ID], + var_name=v2.C.TARGET_ID, + value_name=v2.C.TARGET_VALUE, + ).dropna(subset=[v2.C.TARGET_VALUE]) + + if condition_df.empty: + # This happens if there weren't any condition-specific changes + return pd.DataFrame( + columns=[ + v2.C.CONDITION_ID, + v2.C.TARGET_ID, + v2.C.TARGET_VALUE, + ] + ) + + return condition_df + + +def v1v2_observable_df(observable_df: pd.DataFrame) -> pd.DataFrame: + """Convert observable table from petab v1 to v2. + + Perform all updates that can be done solely on the observable table: + * drop observableTransformation, update noiseDistribution + * update placeholder parameters + """ + df = observable_df.copy().reset_index() + + # drop observableTransformation, update noiseDistribution + # if there is no observableTransformation, no need to update + if v1.C.OBSERVABLE_TRANSFORMATION in df.columns: + df[v1.C.OBSERVABLE_TRANSFORMATION] = df[ + v1.C.OBSERVABLE_TRANSFORMATION + ].fillna(v1.C.LIN) + + if v1.C.NOISE_DISTRIBUTION in df: + df[v1.C.NOISE_DISTRIBUTION] = df[v1.C.NOISE_DISTRIBUTION].fillna( + v1.C.NORMAL + ) + else: + df[v1.C.NOISE_DISTRIBUTION] = v1.C.NORMAL + + # merge observableTransformation into noiseDistribution + def update_noise_dist(row): + dist = row.get(v1.C.NOISE_DISTRIBUTION) + trans = row.get(v1.C.OBSERVABLE_TRANSFORMATION) + + if trans == v1.C.LIN: + new_dist = dist + else: + new_dist = f"{trans}-{dist}" + + if new_dist == "log10-normal": + warnings.warn( + f"Noise distribution `{new_dist}' for " + f"observable `{row[v1.C.OBSERVABLE_ID]}'" + f" is not supported in PEtab v2. " + "Using `log-normal` instead.", + # call to `petab1to2` + stacklevel=9, + ) + new_dist = v2.C.LOG_NORMAL + + if new_dist not in v2.C.NOISE_DISTRIBUTIONS: + raise NotImplementedError( + f"Noise distribution `{new_dist}' for " + f"observable `{row[v1.C.OBSERVABLE_ID]}'" + f" is not supported in PEtab v2." + ) + + df[v2.C.NOISE_DISTRIBUTION] = df.apply(update_noise_dist, axis=1) + df.drop(columns=[v1.C.OBSERVABLE_TRANSFORMATION], inplace=True) + + def extract_placeholders(row: pd.Series, type_: str) -> str: + """Extract placeholders from observable formula.""" + if type_ == "observable": + formula = row[v1.C.OBSERVABLE_FORMULA] + elif type_ == "noise": + formula = row[v1.C.NOISE_FORMULA] + else: + raise ValueError(f"Unknown placeholder type: {type_}") + + if pd.isna(formula): + return "" + + t = f"{re.escape(type_)}Parameter" + o = re.escape(row[v1.C.OBSERVABLE_ID]) + + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + + expr = sympify_petab(formula) + # for 10+ placeholders, the current lexicographical sorting will result + # in incorrect ordering of the placeholder IDs, so that they don't + # align with the overrides in the measurement table, but who does + # that anyway? + return v2.C.PARAMETER_SEPARATOR.join( + sorted( + str(sym) + for sym in expr.free_symbols + if sym.is_Symbol and pattern.match(str(sym)) + ) + ) + + df[v2.C.OBSERVABLE_PLACEHOLDERS] = df.apply( + extract_placeholders, args=("observable",), axis=1 + ) + df[v2.C.NOISE_PLACEHOLDERS] = df.apply( + extract_placeholders, args=("noise",), axis=1 + ) + + return df + + +def v1v2_parameter_df( + parameter_df: pd.DataFrame, +) -> pd.DataFrame: + """Convert parameter table from petab v1 to v2. + + Do all the necessary conversions to the parameter table that can + be done with the parameter table alone. + """ + df = parameter_df.copy().reset_index() + + # parameter.estimate: int -> bool + df[v2.C.ESTIMATE] = df[v1.C.ESTIMATE].apply( + lambda x: str(bool(int(x))).lower() + ) + + def update_prior(row): + """Convert prior to v2 format.""" + prior_type = row.get(v1.C.OBJECTIVE_PRIOR_TYPE) + if pd.isna(prior_type): + prior_type = v1.C.UNIFORM + + pscale = row.get(v1.C.PARAMETER_SCALE) + if pd.isna(pscale): + pscale = v1.C.LIN + + if prior_type not in v1.C.PARAMETER_SCALE_PRIOR_TYPES: + return prior_type + + new_prior_type = prior_type.removeprefix("parameterScale").lower() + if pscale != v1.C.LIN: + new_prior_type = f"{pscale}-{new_prior_type}" + + if new_prior_type == "log10-normal": + warnings.warn( + f"Prior distribution `{new_prior_type}' for parameter " + f"`{row[v1.C.PARAMETER_ID]}' is not supported in PEtab v2. " + "Using `log-normal` instead.", + # call to `petab1to2` + stacklevel=9, + ) + new_prior_type = v2.C.LOG_NORMAL + + if new_prior_type not in v2.C.PRIOR_DISTRIBUTIONS: + raise NotImplementedError( + f"PEtab v2 does not support prior type `{new_prior_type}' " + f"required for parameter `{row[v1.C.PARAMETER_ID]}'." + ) + + return new_prior_type + + # update parameterScale*-priors + if v1.C.OBJECTIVE_PRIOR_TYPE in df.columns: + df[v1.C.OBJECTIVE_PRIOR_TYPE] = df.apply(update_prior, axis=1) + + # rename objectivePrior* to prior* + df.rename( + columns={ + v1.C.OBJECTIVE_PRIOR_TYPE: v2.C.PRIOR_DISTRIBUTION, + v1.C.OBJECTIVE_PRIOR_PARAMETERS: v2.C.PRIOR_PARAMETERS, + }, + inplace=True, + errors="ignore", + ) + # some columns were dropped in PEtab v2 + if v1.C.INITIALIZATION_PRIOR_TYPE in df and ( + df[v1.C.INITIALIZATION_PRIOR_TYPE].notna().any() + ): + warnings.warn( + "Initialisation priors in parameter table are not supported " + "in PEtab v2.", + stacklevel=9, + ) + if not (df[v1.C.PARAMETER_SCALE] == v1.C.LIN).all(): + warnings.warn( + "Parameter scales are not supported in PEtab v2.", + stacklevel=9, + ) + df.drop( + columns=[ + v1.C.INITIALIZATION_PRIOR_TYPE, + v1.C.INITIALIZATION_PRIOR_PARAMETERS, + v1.C.PARAMETER_SCALE, + ], + inplace=True, + errors="ignore", + ) + + # if uniform, we need to explicitly set the parameters + def update_prior_pars(row): + prior_type = row.get(v2.C.PRIOR_DISTRIBUTION) + prior_pars = row.get(v2.C.PRIOR_PARAMETERS) + + if prior_type in (v2.C.UNIFORM, v2.C.LOG_UNIFORM) and pd.isna( + prior_pars + ): + return ( + f"{row[v2.C.LOWER_BOUND]}{v2.C.PARAMETER_SEPARATOR}" + f"{row[v2.C.UPPER_BOUND]}" + ) + + return prior_pars + + df[v2.C.PRIOR_PARAMETERS] = df.apply(update_prior_pars, axis=1) + + return df diff --git a/petab/v2/problem.py b/petab/v2/problem.py deleted file mode 100644 index 612f2571..00000000 --- a/petab/v2/problem.py +++ /dev/null @@ -1,719 +0,0 @@ -"""PEtab v2 problems.""" -from __future__ import annotations - -import logging -import os -import tempfile -from math import nan -from pathlib import Path -from typing import TYPE_CHECKING - -import pandas as pd - -from ..v1 import ( - conditions, - core, - mapping, - measurements, - observables, - parameter_mapping, - parameters, - sampling, - yaml, -) -from ..v1.C import * # noqa: F403 -from ..v1.models.model import Model, model_factory -from ..v1.yaml import get_path_prefix - -if TYPE_CHECKING: - from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask - - -__all__ = ["Problem"] - - -class Problem: - """ - PEtab parameter estimation problem as defined by - - - model - - condition table - - measurement table - - parameter table - - observables table - - mapping table - - Optionally it may contain visualization tables. - - Parameters: - condition_df: PEtab condition table - measurement_df: PEtab measurement table - parameter_df: PEtab parameter table - observable_df: PEtab observable table - visualization_df: PEtab visualization table - mapping_df: PEtab mapping table - model: The underlying model - extensions_config: Information on the extensions used - """ - - def __init__( - self, - model: Model = None, - condition_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, - visualization_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, - extensions_config: dict = None, - ): - from ..v2.lint import default_validation_tasks - - self.condition_df: pd.DataFrame | None = condition_df - self.measurement_df: pd.DataFrame | None = measurement_df - self.parameter_df: pd.DataFrame | None = parameter_df - self.visualization_df: pd.DataFrame | None = visualization_df - self.observable_df: pd.DataFrame | None = observable_df - self.mapping_df: pd.DataFrame | None = mapping_df - self.model: Model | None = model - self.extensions_config = extensions_config or {} - self.validation_tasks: list[ - ValidationTask - ] = default_validation_tasks.copy() - - def __str__(self): - model = f"with model ({self.model})" if self.model else "without model" - conditions = ( - f"{self.condition_df.shape[0]} conditions" - if self.condition_df is not None - else "without conditions table" - ) - - observables = ( - f"{self.observable_df.shape[0]} observables" - if self.observable_df is not None - else "without observables table" - ) - - measurements = ( - f"{self.measurement_df.shape[0]} measurements" - if self.measurement_df is not None - else "without measurements table" - ) - - if self.parameter_df is not None: - num_estimated_parameters = ( - sum(self.parameter_df[ESTIMATE] == 1) - if ESTIMATE in self.parameter_df - else self.parameter_df.shape[0] - ) - parameters = f"{num_estimated_parameters} estimated parameters" - else: - parameters = "without parameter_df table" - - return ( - f"PEtab Problem {model}, {conditions}, {observables}, " - f"{measurements}, {parameters}" - ) - - @staticmethod - def from_yaml(yaml_config: dict | Path | str) -> Problem: - """ - Factory method to load model and tables as specified by YAML file. - - Arguments: - yaml_config: PEtab configuration as dictionary or YAML file name - """ - if isinstance(yaml_config, Path): - yaml_config = str(yaml_config) - - if isinstance(yaml_config, str): - yaml_file = yaml_config - path_prefix = get_path_prefix(yaml_file) - yaml_config = yaml.load_yaml(yaml_config) - get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 - else: - yaml_file = None - get_path = lambda filename: filename # noqa: E731 - - if yaml_config[FORMAT_VERSION] not in {"2.0.0"}: - # If we got a path to a v1 yaml file, try to auto-upgrade - from tempfile import TemporaryDirectory - - from ..versions import get_major_version - from .petab1to2 import petab1to2 - - if get_major_version(yaml_config) == 1 and yaml_file: - logging.debug( - "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" - ) - with TemporaryDirectory() as tmpdirname: - try: - petab1to2(yaml_file, output_dir=tmpdirname) - except Exception as e: - raise ValueError( - "Failed to auto-upgrade PEtab 1.0 problem to " - "PEtab 2.0" - ) from e - return Problem.from_yaml( - Path(tmpdirname) / Path(yaml_file).name - ) - raise ValueError( - "Provided PEtab files are of unsupported version " - f"{yaml_config[FORMAT_VERSION]}. Expected 2.0.0." - ) - - if yaml.is_composite_problem(yaml_config): - raise ValueError( - "petab.Problem.from_yaml() can only be used for " - "yaml files comprising a single model. " - "Consider using " - "petab.CompositeProblem.from_yaml() instead." - ) - - problem0 = yaml_config["problems"][0] - - if isinstance(yaml_config[PARAMETER_FILE], list): - parameter_df = parameters.get_parameter_df( - [get_path(f) for f in yaml_config[PARAMETER_FILE]] - ) - else: - parameter_df = ( - parameters.get_parameter_df( - get_path(yaml_config[PARAMETER_FILE]) - ) - if yaml_config[PARAMETER_FILE] - else None - ) - - if len(problem0[MODEL_FILES]) > 1: - # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 - raise NotImplementedError( - "Support for multiple models is not yet implemented." - ) - if not problem0[MODEL_FILES]: - model = None - else: - model_id, model_info = next(iter(problem0[MODEL_FILES].items())) - model = model_factory( - get_path(model_info[MODEL_LOCATION]), - model_info[MODEL_LANGUAGE], - model_id=model_id, - ) - - measurement_files = [ - get_path(f) for f in problem0.get(MEASUREMENT_FILES, []) - ] - # If there are multiple tables, we will merge them - measurement_df = ( - core.concat_tables( - measurement_files, measurements.get_measurement_df - ) - if measurement_files - else None - ) - - condition_files = [ - get_path(f) for f in problem0.get(CONDITION_FILES, []) - ] - # If there are multiple tables, we will merge them - condition_df = ( - core.concat_tables(condition_files, conditions.get_condition_df) - if condition_files - else None - ) - - visualization_files = [ - get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) - ] - # If there are multiple tables, we will merge them - visualization_df = ( - core.concat_tables(visualization_files, core.get_visualization_df) - if visualization_files - else None - ) - - observable_files = [ - get_path(f) for f in problem0.get(OBSERVABLE_FILES, []) - ] - # If there are multiple tables, we will merge them - observable_df = ( - core.concat_tables(observable_files, observables.get_observable_df) - if observable_files - else None - ) - - mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])] - # If there are multiple tables, we will merge them - mapping_df = ( - core.concat_tables(mapping_files, mapping.get_mapping_df) - if mapping_files - else None - ) - - return Problem( - condition_df=condition_df, - measurement_df=measurement_df, - parameter_df=parameter_df, - observable_df=observable_df, - model=model, - visualization_df=visualization_df, - mapping_df=mapping_df, - extensions_config=yaml_config.get(EXTENSIONS, {}), - ) - - @staticmethod - def from_combine(filename: Path | str) -> Problem: - """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). - - See also :py:func:`petab.create_combine_archive`. - - Arguments: - filename: Path to the PEtab-COMBINE archive - - Returns: - A :py:class:`petab.Problem` instance. - """ - # function-level import, because module-level import interfered with - # other SWIG interfaces - try: - import libcombine - except ImportError as e: - raise ImportError( - "To use PEtab's COMBINE functionality, libcombine " - "(python-libcombine) must be installed." - ) from e - - archive = libcombine.CombineArchive() - if archive.initializeFromArchive(str(filename)) is None: - raise ValueError(f"Invalid Combine Archive: {filename}") - - with tempfile.TemporaryDirectory() as tmpdirname: - archive.extractTo(tmpdirname) - problem = Problem.from_yaml( - os.path.join(tmpdirname, archive.getMasterFile().getLocation()) - ) - archive.cleanUp() - - return problem - - @staticmethod - def get_problem(problem: str | Path | Problem) -> Problem: - """Get a PEtab problem from a file or a problem object. - - Arguments: - problem: Path to a PEtab problem file or a PEtab problem object. - - Returns: - A PEtab problem object. - """ - if isinstance(problem, Problem): - return problem - - if isinstance(problem, str | Path): - return Problem.from_yaml(problem) - - raise TypeError( - "The argument `problem` must be a path to a PEtab problem file " - "or a PEtab problem object." - ) - - def get_optimization_parameters(self) -> list[str]: - """ - Return list of optimization parameter IDs. - - See :py:func:`petab.parameters.get_optimization_parameters`. - """ - return parameters.get_optimization_parameters(self.parameter_df) - - def get_optimization_parameter_scales(self) -> dict[str, str]: - """ - Return list of optimization parameter scaling strings. - - See :py:func:`petab.parameters.get_optimization_parameters`. - """ - return parameters.get_optimization_parameter_scaling(self.parameter_df) - - def get_observable_ids(self) -> list[str]: - """ - Returns dictionary of observable ids. - """ - return list(self.observable_df.index) - - def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): - """Apply mask of only free or only fixed values. - - Parameters - ---------- - v: - The full vector the mask is to be applied to. - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - - Returns - ------- - The reduced vector with applied mask. - """ - if not free and not fixed: - return [] - if not free: - return [v[ix] for ix in self.x_fixed_indices] - if not fixed: - return [v[ix] for ix in self.x_free_indices] - return v - - def get_x_ids(self, free: bool = True, fixed: bool = True): - """Generic function to get parameter ids. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - - Returns - ------- - The parameter IDs. - """ - v = list(self.parameter_df.index.values) - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def x_ids(self) -> list[str]: - """Parameter table parameter IDs""" - return self.get_x_ids() - - @property - def x_free_ids(self) -> list[str]: - """Parameter table parameter IDs, for free parameters.""" - return self.get_x_ids(fixed=False) - - @property - def x_fixed_ids(self) -> list[str]: - """Parameter table parameter IDs, for fixed parameters.""" - return self.get_x_ids(free=False) - - def get_x_nominal( - self, free: bool = True, fixed: bool = True, scaled: bool = False - ): - """Generic function to get parameter nominal values. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - scaled: - Whether to scale the values according to the parameter scale, - or return them on linear scale. - - Returns - ------- - The parameter nominal values. - """ - if NOMINAL_VALUE in self.parameter_df: - v = list(self.parameter_df[NOMINAL_VALUE]) - else: - v = [nan] * len(self.parameter_df) - - if scaled: - v = list( - parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) - ) - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def x_nominal(self) -> list: - """Parameter table nominal values""" - return self.get_x_nominal() - - @property - def x_nominal_free(self) -> list: - """Parameter table nominal values, for free parameters.""" - return self.get_x_nominal(fixed=False) - - @property - def x_nominal_fixed(self) -> list: - """Parameter table nominal values, for fixed parameters.""" - return self.get_x_nominal(free=False) - - @property - def x_nominal_scaled(self) -> list: - """Parameter table nominal values with applied parameter scaling""" - return self.get_x_nominal(scaled=True) - - @property - def x_nominal_free_scaled(self) -> list: - """Parameter table nominal values with applied parameter scaling, - for free parameters. - """ - return self.get_x_nominal(fixed=False, scaled=True) - - @property - def x_nominal_fixed_scaled(self) -> list: - """Parameter table nominal values with applied parameter scaling, - for fixed parameters. - """ - return self.get_x_nominal(free=False, scaled=True) - - def get_lb( - self, free: bool = True, fixed: bool = True, scaled: bool = False - ): - """Generic function to get lower parameter bounds. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - scaled: - Whether to scale the values according to the parameter scale, - or return them on linear scale. - - Returns - ------- - The lower parameter bounds. - """ - v = list(self.parameter_df[LOWER_BOUND]) - if scaled: - v = list( - parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) - ) - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def lb(self) -> list: - """Parameter table lower bounds.""" - return self.get_lb() - - @property - def lb_scaled(self) -> list: - """Parameter table lower bounds with applied parameter scaling""" - return self.get_lb(scaled=True) - - def get_ub( - self, free: bool = True, fixed: bool = True, scaled: bool = False - ): - """Generic function to get upper parameter bounds. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - scaled: - Whether to scale the values according to the parameter scale, - or return them on linear scale. - - Returns - ------- - The upper parameter bounds. - """ - v = list(self.parameter_df[UPPER_BOUND]) - if scaled: - v = list( - parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) - ) - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def ub(self) -> list: - """Parameter table upper bounds""" - return self.get_ub() - - @property - def ub_scaled(self) -> list: - """Parameter table upper bounds with applied parameter scaling""" - return self.get_ub(scaled=True) - - @property - def x_free_indices(self) -> list[int]: - """Parameter table estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val != 0] - - @property - def x_fixed_indices(self) -> list[int]: - """Parameter table non-estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val == 0] - - def get_simulation_conditions_from_measurement_df(self) -> pd.DataFrame: - """See :func:`petab.get_simulation_conditions`.""" - return measurements.get_simulation_conditions(self.measurement_df) - - def get_optimization_to_simulation_parameter_mapping(self, **kwargs): - """ - See - :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`, - to which all keyword arguments are forwarded. - """ - return ( - parameter_mapping.get_optimization_to_simulation_parameter_mapping( - condition_df=self.condition_df, - measurement_df=self.measurement_df, - parameter_df=self.parameter_df, - observable_df=self.observable_df, - model=self.model, - **kwargs, - ) - ) - - def create_parameter_df(self, **kwargs) -> pd.DataFrame: - """Create a new PEtab parameter table - - See :py:func:`create_parameter_df`. - """ - return parameters.create_parameter_df( - model=self.model, - condition_df=self.condition_df, - observable_df=self.observable_df, - measurement_df=self.measurement_df, - mapping_df=self.mapping_df, - **kwargs, - ) - - def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): - """Create 2D array with starting points for optimization - - See :py:func:`petab.sample_parameter_startpoints`. - """ - return sampling.sample_parameter_startpoints( - self.parameter_df, n_starts=n_starts, **kwargs - ) - - def sample_parameter_startpoints_dict( - self, n_starts: int = 100 - ) -> list[dict[str, float]]: - """Create dictionaries with starting points for optimization - - See also :py:func:`petab.sample_parameter_startpoints`. - - Returns: - A list of dictionaries with parameter IDs mapping to samples - parameter values. - """ - return [ - dict(zip(self.x_free_ids, parameter_values, strict=True)) - for parameter_values in self.sample_parameter_startpoints( - n_starts=n_starts - ) - ] - - def unscale_parameters( - self, - x_dict: dict[str, float], - ) -> dict[str, float]: - """Unscale parameter values. - - Parameters - ---------- - x_dict: - Keys are parameter IDs in the PEtab problem, values are scaled - parameter values. - - Returns - ------- - The unscaled parameter values. - """ - return { - parameter_id: parameters.unscale( - parameter_value, - self.parameter_df[PARAMETER_SCALE][parameter_id], - ) - for parameter_id, parameter_value in x_dict.items() - } - - def scale_parameters( - self, - x_dict: dict[str, float], - ) -> dict[str, float]: - """Scale parameter values. - - Parameters - ---------- - x_dict: - Keys are parameter IDs in the PEtab problem, values are unscaled - parameter values. - - Returns - ------- - The scaled parameter values. - """ - return { - parameter_id: parameters.scale( - parameter_value, - self.parameter_df[PARAMETER_SCALE][parameter_id], - ) - for parameter_id, parameter_value in x_dict.items() - } - - @property - def n_estimated(self) -> int: - """The number of estimated parameters.""" - return len(self.x_free_indices) - - @property - def n_measurements(self) -> int: - """Number of measurements.""" - return self.measurement_df[MEASUREMENT].notna().sum() - - @property - def n_priors(self) -> int: - """Number of priors.""" - if OBJECTIVE_PRIOR_PARAMETERS not in self.parameter_df: - return 0 - - return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum() - - def validate( - self, validation_tasks: list[ValidationTask] = None - ) -> ValidationResultList: - """Validate the PEtab problem. - - Arguments: - validation_tasks: List of validation tasks to run. If ``None`` - or empty, :attr:`Problem.validation_tasks` are used. - Returns: - A list of validation results. - """ - from ..v2.lint import ValidationIssueSeverity, ValidationResultList - - validation_results = ValidationResultList() - if self.extensions_config: - validation_results.append( - ValidationIssue( - ValidationIssueSeverity.WARNING, - "Validation of PEtab extensions is not yet implemented, " - "but the given problem uses the following extensions: " - f"{'', ''.join(self.extensions_config.keys())}", - ) - ) - - for task in validation_tasks or self.validation_tasks: - try: - cur_result = task.run(self) - except Exception as e: - cur_result = ValidationIssue( - ValidationIssueSeverity.CRITICAL, - f"Validation task {task} failed with exception: {e}", - ) - - if cur_result: - validation_results.append(cur_result) - - if cur_result.level == ValidationIssueSeverity.CRITICAL: - break - - return validation_results diff --git a/petab/version.py b/petab/version.py index c59cab99..ab7ae256 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,3 @@ """PEtab library version""" -__version__ = "0.5.0" + +__version__ = "0.8.2" diff --git a/petab/versions.py b/petab/versions.py index 2b263aff..b1fdecf4 100644 --- a/petab/versions.py +++ b/petab/versions.py @@ -1,35 +1,68 @@ """Handling of PEtab version numbers.""" + from __future__ import annotations +import re from pathlib import Path -from petab.v1 import Problem as V1Problem -from petab.v1.C import FORMAT_VERSION -from petab.v1.yaml import load_yaml -from petab.v2 import Problem as V2Problem +import petab __all__ = [ "get_major_version", + "parse_version", ] +from . import v1 + +# version regex pattern +_version_pattern = ( + r"(?P\d+)(?:\.(?P\d+))?" + r"(?:\.(?P\d+))?(?P[\w.]+)?" +) +_version_re = re.compile(_version_pattern) + + +def parse_version(version: str | int) -> tuple[int, int, int, str]: + """Parse a version string into a tuple of integers and suffix.""" + if isinstance(version, int): + return version, 0, 0, "" + + version = str(version) + match = _version_re.match(version) + if match is None: + raise ValueError(f"Invalid version string: {version}") + + major = int(match.group("major")) + minor = int(match.group("minor") or 0) + patch = int(match.group("patch") or 0) + suffix = match.group("suffix") or "" + + return major, minor, patch, suffix def get_major_version( - problem: str | dict | Path | V1Problem | V2Problem, + problem: str | dict | Path | petab.v1.Problem | petab.v2.Problem, ) -> int: """Get the major version number of the given problem.""" - if isinstance(problem, V1Problem): - return 1 - - if isinstance(problem, V2Problem): - return 2 + version = None if isinstance(problem, str | Path): + from petab.v1.yaml import load_yaml + yaml_config = load_yaml(problem) - version = yaml_config.get(FORMAT_VERSION) + version = yaml_config.get(v1.C.FORMAT_VERSION) elif isinstance(problem, dict): - version = problem.get(FORMAT_VERSION) - else: - raise ValueError(f"Unsupported argument type: {type(problem)}") + version = problem.get(v1.C.FORMAT_VERSION) - version = str(version) - return int(version.split(".")[0]) + if version is not None: + version = str(version) + return int(version.split(".")[0]) + + if isinstance(problem, petab.v1.Problem): + return 1 + + from . import v2 + + if isinstance(problem, v2.Problem): + return 2 + + raise ValueError(f"Unsupported argument type: {type(problem)}") diff --git a/pyproject.toml b/pyproject.toml index 1758476a..0295cfa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools>=62", + "setuptools>=77", "wheel", ] build-backend = "setuptools.build_meta" @@ -9,21 +9,19 @@ build-backend = "setuptools.build_meta" name = "petab" dynamic = ["version", "readme"] description = "Parameter estimation tabular data" -requires-python = ">=3.10" +requires-python = ">=3.12" dependencies = [ - "numpy>=1.15.1", + "numpy>=2.1", "pandas>=1.2.0", - # remove when pandas >= 3, see also - # https://github.com/pandas-dev/pandas/issues/54466 - "pyarrow", "python-libsbml>=5.17.0", "sympy", "colorama", "pyyaml", "jsonschema", "antlr4-python3-runtime==4.13.1", + "pydantic>=2.10", ] -license = {text = "MIT License"} +license = "MIT" authors = [ {name = "The PEtab developers"}, ] @@ -35,18 +33,22 @@ maintainers = [ [project.optional-dependencies] tests = [ + "antimony>=3.1.0", + "copasi-basico>=0.85", + "pysb", "pytest", "pytest-cov", - "simplesbml", "scipy", - "pysb", ] quality = [ "pre-commit", ] reports = [ # https://github.com/spatialaudio/nbsphinx/issues/641 - "Jinja2==3.0.3", + "Jinja2==3.1.6", +] +antimony = [ + "antimony>=2.14.0", ] combine = [ "python-libcombine>=0.2.6", @@ -61,6 +63,8 @@ doc = [ # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312 "ipython>=7.21.0, !=8.7.0", "pysb", + "antimony>=2.14.0", + "sbmlmath>=0.4.0", ] vis = [ "matplotlib>=3.6.0", @@ -109,6 +113,7 @@ convention = "pep257" "tests/*" = ["T201"] [tool.ruff.format] +docstring-code-format = true exclude = [ "petab/math/_generated/*", # auto-generated ] diff --git a/pytest.ini b/pytest.ini index 11b8918a..721d63d9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,9 +1,11 @@ [pytest] +addopts = --doctest-modules --durations=0 --durations-min=10 +testpaths = + petab + tests filterwarnings = error # TODO: until tests are reorganized for petab.v1 ignore::DeprecationWarning - ignore:Support for PEtab2.0 and all of petab.v2 is experimental:UserWarning - ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 00000000..757a9b50 --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,13 @@ +"""Tests related to petab.versions""" + +from petab.versions import * + + +def test_parse_version(): + assert parse_version("1.2.3") == (1, 2, 3, "") + assert parse_version("1.2.3a") == (1, 2, 3, "a") + assert parse_version("1.2") == (1, 2, 0, "") + assert parse_version("1") == (1, 0, 0, "") + assert parse_version(1) == (1, 0, 0, "") + assert parse_version("1.2.3.a") == (1, 2, 3, ".a") + assert parse_version("1.2.3.4") == (1, 2, 3, ".4") diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 4b350d4e..03bd0cbf 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -6,9 +6,9 @@ import sympy as sp import yaml from sympy.abc import _clash -from sympy.logic.boolalg import Boolean +from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue -from petab.math import sympify_petab +from petab.v1.math import petab_math_str, sympify_petab def test_sympify_numpy(): @@ -24,6 +24,31 @@ def test_parse_simple(): assert float(sympify_petab("1 + 2 * (3 + 4) / 2")) == 8 +def test_evaluate(): + act = sympify_petab("piecewise(1, 1 > 2, 0)", evaluate=False) + assert str(act) == "Piecewise((1.0, 1.0 > 2.0), (0.0, True))" + + +def test_assumptions(): + # in PEtab, all symbols are expected to be real-valued + assert sympify_petab("x").is_real + + # non-real symbols are changed to real + assert sympify_petab(sp.Symbol("x", real=False)).is_real + + +def test_printer(): + a, b, c, d = sp.symbols("a b c d", real=True) + assert petab_math_str(None) == "" + assert petab_math_str(BooleanTrue()) == "true" + assert petab_math_str(BooleanFalse()) == "false" + assert petab_math_str((a + b) ** (c + d)) == "(a + b) ^ (c + d)" + # A non-integer rational exponent must be parenthesized, else "a ^ 1/2" + # re-parses as (a^1)/2 (i.e. sqrt(a) would round-trip to a/2). + assert petab_math_str(sp.sqrt(a)) == "a ^ (1/2)" + assert petab_math_str(a ** sp.Rational(2, 3)) == "a ^ (2/3)" + + def read_cases(): """Read test cases from YAML file in the petab_test_suite package.""" yaml_file = importlib.resources.files("petabtests.cases").joinpath( @@ -55,25 +80,36 @@ def read_cases(): @pytest.mark.parametrize("expr_str, expected", read_cases()) def test_parse_cases(expr_str, expected): """Test PEtab math expressions for the PEtab test suite.""" - result = sympify_petab(expr_str) - if isinstance(result, Boolean): - assert result == expected + sym_expr = sympify_petab(expr_str) + if isinstance(sym_expr, Boolean): + assert sym_expr == expected else: try: - result = float(result.evalf()) - assert np.isclose( - result, expected - ), f"{expr_str}: Expected {expected}, got {result}" + result = float(sym_expr.evalf()) + assert np.isclose(result, expected), ( + f"{expr_str}: Expected {expected}, got {result}" + ) except TypeError: - assert ( - result == expected - ), f"{expr_str}: Expected {expected}, got {result}" + assert sym_expr == expected, ( + f"{expr_str}: Expected {expected}, got {result}" + ) + + # test parsing, printing, and parsing again + resympified = sympify_petab(petab_math_str(sym_expr)) + if sym_expr.is_number: + assert np.isclose(float(resympified), float(sym_expr)) + else: + assert resympified.equals(sym_expr), (sym_expr, resympified) def test_ids(): """Test symbols in expressions.""" assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True) + # test that sympy expressions that are invalid in PEtab raise an error + with pytest.raises(ValueError): + sympify_petab(sp.Symbol("föö")) + def test_syntax_error(): """Test exceptions upon syntax errors.""" diff --git a/tests/v1/test_calculate.py b/tests/v1/test_calculate.py index ca93c33a..c13105a8 100644 --- a/tests/v1/test_calculate.py +++ b/tests/v1/test_calculate.py @@ -4,18 +4,18 @@ import pandas as pd import pytest -import petab -from petab import ( +from petab.v1 import get_observable_df, get_parameter_df +from petab.v1.C import * +from petab.v1.calculate import ( calculate_chi2, calculate_llh, calculate_residuals, calculate_single_llh, ) -from petab.C import * def model_simple(): - "Simple model." "" + "Simple model." measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["obs_a", "obs_a", "obs_b", "obs_b"], @@ -43,12 +43,12 @@ def model_simple(): simulation_df[SIMULATION] = [2, 2, 19, 20] expected_residuals = { - (2 - 0) / 2, - (2 - 1) / 2, - (19 - 20) / 3, - (20 - 22) / 3, + (0 - 2) / 2, + (1 - 2) / 2, + (20 - 19) / 3, + (22 - 20) / 3, } - expected_residuals_nonorm = {2 - 0, 2 - 1, 19 - 20, 20 - 22} + expected_residuals_nonorm = {0 - 2, 1 - 2, 20 - 19, 22 - 20} expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() - 0.5 * np.log(2 * np.pi * np.array([2, 2, 3, 3]) ** 2).sum() @@ -56,8 +56,8 @@ def model_simple(): return ( measurement_df, - petab.get_observable_df(observable_df), - petab.get_parameter_df(parameter_df), + get_observable_df(observable_df), + get_parameter_df(parameter_df), simulation_df, expected_residuals, expected_residuals_nonorm, @@ -93,8 +93,8 @@ def model_replicates(): ) simulation_df[SIMULATION] = [2, 2] - expected_residuals = {(2 - 0) / 2, (2 - 1) / 2} - expected_residuals_nonorm = {2 - 0, 2 - 1} + expected_residuals = {(0 - 2) / 2, (1 - 2) / 2} + expected_residuals_nonorm = {0 - 2, 1 - 2} expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() - 0.5 * np.log(2 * np.pi * np.array([2, 2]) ** 2).sum() @@ -141,12 +141,12 @@ def model_scalings(): simulation_df[SIMULATION] = [2, 3] expected_residuals = { - (np.log(2) - np.log(0.5)) / 2, - (np.log(3) - np.log(1)) / 2, + (np.log(0.5) - np.log(2)) / 2, + (np.log(1) - np.log(3)) / 2, } expected_residuals_nonorm = { - np.log(2) - np.log(0.5), - np.log(3) - np.log(1), + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), } expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() @@ -201,12 +201,12 @@ def model_non_numeric_overrides(): simulation_df[SIMULATION] = [2, 3] expected_residuals = { - (np.log(2) - np.log(0.5)) / (2 * 7 + 8 + 4 + np.log(2)), - (np.log(3) - np.log(1)) / (2 * 2 + 3 + 4 + np.log(3)), + (np.log(0.5) - np.log(2)) / (2 * 7 + 8 + 4 + 2), + (np.log(1) - np.log(3)) / (2 * 2 + 3 + 4 + 3), } expected_residuals_nonorm = { - np.log(2) - np.log(0.5), - np.log(3) - np.log(1), + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), } expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() @@ -214,8 +214,7 @@ def model_non_numeric_overrides(): * np.log( 2 * np.pi - * np.array([2 * 7 + 8 + 4 + np.log(2), 2 * 2 + 3 + 4 + np.log(3)]) - ** 2 + * np.array([2 * 7 + 8 + 4 + 2, 2 * 2 + 3 + 4 + 3]) ** 2 * np.array([0.5, 1]) ** 2 ).sum() ) @@ -261,8 +260,8 @@ def model_custom_likelihood(): ) simulation_df[SIMULATION] = [2, 3] - expected_residuals = {(np.log(2) - np.log(0.5)) / 2, (3 - 2) / 1.5} - expected_residuals_nonorm = {np.log(2) - np.log(0.5), 3 - 2} + expected_residuals = {(np.log(0.5) - np.log(2)) / 2, (2 - 3) / 1.5} + expected_residuals_nonorm = {np.log(0.5) - np.log(2), 2 - 3} expected_llh = ( -np.abs(list(expected_residuals)).sum() - np.log(2 * np.array([2, 1.5]) * np.array([0.5, 1])).sum() diff --git a/tests/v1/test_combine.py b/tests/v1/test_combine.py index 08ad5b77..e685bf2b 100644 --- a/tests/v1/test_combine.py +++ b/tests/v1/test_combine.py @@ -1,11 +1,13 @@ """Test COMBINE archive""" + import tempfile from pathlib import Path import pandas as pd -import petab +import petab.v1 as petab from petab.C import * +from petab.v1.models.sbml_model import SbmlModel # import fixtures pytest_plugins = [ @@ -16,10 +18,7 @@ def test_combine_archive(): """Test `create_combine_archive` and `Problem.from_combine`""" # Create test files - import simplesbml - - ss_model = simplesbml.SbmlModel() - + model = SbmlModel.from_antimony("") # Create tables with arbitrary content measurement_df = pd.DataFrame( data={ @@ -80,7 +79,7 @@ def test_combine_archive(): ) as tempdir: # Write test data outdir = Path(tempdir) - petab.write_sbml(ss_model.document, outdir / sbml_file_name) + model.to_file(outdir / sbml_file_name) petab.write_measurement_df( measurement_df, outdir / measurement_file_name ) diff --git a/tests/v1/test_conditions.py b/tests/v1/test_conditions.py index b240241d..45059ba1 100644 --- a/tests/v1/test_conditions.py +++ b/tests/v1/test_conditions.py @@ -1,4 +1,5 @@ """Tests related to petab.conditions""" + import os import tempfile from pathlib import Path diff --git a/tests/v1/test_deprecated.py b/tests/v1/test_deprecated.py index 4af41fa3..ef96f2e9 100644 --- a/tests/v1/test_deprecated.py +++ b/tests/v1/test_deprecated.py @@ -1,4 +1,5 @@ """Check that deprecated functionality raises but still works.""" + import tempfile from pathlib import Path @@ -14,7 +15,7 @@ def test_problem_with_sbml_model(): """Test that a problem can be correctly created from sbml model.""" # retrieve test data ( - ss_model, + model, condition_df, observable_df, measurement_df, @@ -23,7 +24,7 @@ def test_problem_with_sbml_model(): with pytest.deprecated_call(): petab_problem = petab.Problem( # noqa: F811 - sbml_model=ss_model.model, + model=model, condition_df=condition_df, measurement_df=measurement_df, parameter_df=parameter_df, diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py new file mode 100644 index 00000000..f4b3e3fe --- /dev/null +++ b/tests/v1/test_distributions.py @@ -0,0 +1,135 @@ +import sys +from math import exp + +import numpy as np +import pytest +from numpy.testing import assert_allclose +from scipy.integrate import cumulative_trapezoid +from scipy.stats import ( + kstest, + laplace, + loglaplace, + lognorm, + loguniform, + norm, + uniform, +) + +from petab.v1.distributions import * +from petab.v2.C import * + + +@pytest.mark.parametrize( + "distribution", + [ + Normal(2, 1), + Normal(2, 1, log=True), + Normal(2, 1, log=10), + Uniform(2, 4), + Uniform(-2, 4, log=True), + Uniform(2, 4, log=10), + Laplace(1, 2), + Laplace(1, 0.5, log=True), + Normal(2, 1, trunc=(1, 2)), + Normal(2, 1, log=True, trunc=(0.5, 8)), + Normal(2, 1, log=10), + Laplace(1, 2, trunc=(1, 2)), + Laplace(1, 0.5, log=True, trunc=(0.5, 8)), + Cauchy(2, 1), + ChiSquare(4), + Exponential(1), + Gamma(3, 5), + Rayleigh(3), + ], +) +def test_sample_matches_pdf(distribution): + """Test that the sample matches the PDF.""" + np.random.seed(1) + N_SAMPLES = 10_000 + sample = distribution.sample(N_SAMPLES) + + def cdf(x): + # pdf -> cdf + return cumulative_trapezoid(distribution.pdf(x), x) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) + + # if p < 0.05: + # import matplotlib.pyplot as plt + # plt.hist(sample, bins=100, density=True) + # x = np.linspace(min(sample), max(sample), 100) + # plt.plot(x, distribution.pdf(x)) + # plt.show() + + assert p > 0.05, (p, distribution) + + # check min/max of CDF at the bounds + assert np.isclose( + distribution.cdf( + distribution.trunc_low + if not distribution.logbase + else max(sys.float_info.min, distribution.trunc_low) + ), + 0, + atol=1e-16, + rtol=0, + ) + assert np.isclose( + distribution.cdf(distribution.trunc_high), 1, atol=1e-14, rtol=0 + ) + + # Test samples match scipy CDFs + reference_pdf = None + if distribution._trunc is None and distribution.logbase is False: + if isinstance(distribution, Normal): + reference_pdf = norm.pdf( + sample, distribution.loc, distribution.scale + ) + elif isinstance(distribution, Uniform): + reference_pdf = uniform.pdf( + sample, + distribution._low, + distribution._high - distribution._low, + ) + elif isinstance(distribution, Laplace): + reference_pdf = laplace.pdf( + sample, distribution.loc, distribution.scale + ) + + if distribution._trunc is None and distribution.logbase == np.exp(1): + if isinstance(distribution, Normal): + reference_pdf = lognorm.pdf( + sample, scale=np.exp(distribution.loc), s=distribution.scale + ) + elif isinstance(distribution, Uniform): + reference_pdf = loguniform.pdf( + sample, np.exp(distribution._low), np.exp(distribution._high) + ) + elif isinstance(distribution, Laplace): + reference_pdf = loglaplace.pdf( + sample, + c=1 / distribution.scale, + scale=np.exp(distribution.loc), + ) + if reference_pdf is not None: + assert_allclose( + distribution.pdf(sample), reference_pdf, rtol=1e-10, atol=1e-14 + ) + + +def test_log_uniform(): + """Test Uniform(a, b, log=True) vs LogUniform(a, b).""" + # support between exp(1) and exp(2) + dist = Uniform(1, 2, log=True) + assert dist.pdf(exp(0)) == 0 + assert dist.pdf(exp(1)) > 0 + assert dist.pdf(exp(2)) > 0 + assert dist.pdf(exp(3)) == 0 + + # support between 1 and 2 + dist = LogUniform(1, 2) + assert dist.pdf(0) == 0 + assert dist.pdf(1) > 0 + assert dist.pdf(2) > 0 + assert dist.pdf(3) == 0 diff --git a/tests/v1/test_lint.py b/tests/v1/test_lint.py index b178a425..4ad2e9b1 100644 --- a/tests/v1/test_lint.py +++ b/tests/v1/test_lint.py @@ -18,7 +18,6 @@ def test_assert_measured_observables_present(): # create test model - measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["non-existing1"], @@ -203,9 +202,9 @@ def test_assert_overrides_match_parameter_count(): # 3 observable parameters given, 2 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[ - 1, OBSERVABLE_PARAMETERS - ] = "override1;override2;oneTooMuch" + measurement_df.loc[1, OBSERVABLE_PARAMETERS] = ( + "override1;override2;oneTooMuch" + ) with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( measurement_df, observable_df @@ -255,15 +254,15 @@ def test_assert_no_leading_trailing_whitespace(): def test_assert_model_parameters_in_condition_or_parameter_table(): - import simplesbml - from petab.models.sbml_model import SbmlModel - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("parameter1", 0.0) - ss_model.addParameter("noiseParameter1_", 0.0) - ss_model.addParameter("observableParameter1_", 0.0) - sbml_model = SbmlModel(sbml_model=ss_model.model) + ant_model = """ + parameter1 = 0.0 + noiseParameter1_ = 0.0 + observableParameter1_ = 0.0 + """ + sbml_model = SbmlModel.from_antimony(ant_model) + assert sbml_model.is_valid() lint.assert_model_parameters_in_condition_or_parameter_table( sbml_model, pd.DataFrame(columns=["parameter1"]), pd.DataFrame() @@ -284,7 +283,10 @@ def test_assert_model_parameters_in_condition_or_parameter_table(): sbml_model, pd.DataFrame(), pd.DataFrame() ) - ss_model.addAssignmentRule("parameter1", "parameter2") + sbml_model = SbmlModel.from_antimony( + ant_model + "\nparameter2 = 0\nparameter1 := parameter2" + ) + assert sbml_model.is_valid() lint.assert_model_parameters_in_condition_or_parameter_table( sbml_model, pd.DataFrame(), pd.DataFrame() ) @@ -499,12 +501,11 @@ def test_assert_measurement_conditions_present_in_condition_table(): def test_check_condition_df(): """Check that we correctly detect errors in condition table""" - import simplesbml from petab.models.sbml_model import SbmlModel - ss_model = simplesbml.SbmlModel() - model = SbmlModel(sbml_model=ss_model.model) + model = SbmlModel.from_antimony("") + condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1"], @@ -527,7 +528,7 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model, observable_df) # fix by adding parameter - ss_model.addParameter("p1", 1.0) + model = SbmlModel.from_antimony("p1 = 1") lint.check_condition_df(condition_df, model) # species missing in model @@ -536,7 +537,7 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model) # fix: - ss_model.addSpecies("[s1]", 1.0) + model = SbmlModel.from_antimony("p1 = 1; species s1 = 1") lint.check_condition_df(condition_df, model) # compartment missing in model @@ -545,7 +546,9 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model) # fix: - ss_model.addCompartment(comp_id="c2", vol=1.0) + model = SbmlModel.from_antimony( + "p1 = 1; species s1 = 1; compartment c2 = 1" + ) lint.check_condition_df(condition_df, model) diff --git a/tests/v1/test_measurements.py b/tests/v1/test_measurements.py index ac3e59a3..10f5ba98 100644 --- a/tests/v1/test_measurements.py +++ b/tests/v1/test_measurements.py @@ -1,4 +1,5 @@ """Tests related to petab.measurements""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_model_pysb.py b/tests/v1/test_model_pysb.py index 922dab2f..57371c79 100644 --- a/tests/v1/test_model_pysb.py +++ b/tests/v1/test_model_pysb.py @@ -1,4 +1,5 @@ """Test related to petab.models.model_pysb""" + import pysb import pytest diff --git a/tests/v1/test_observables.py b/tests/v1/test_observables.py index f9547fec..c9932b0d 100644 --- a/tests/v1/test_observables.py +++ b/tests/v1/test_observables.py @@ -1,4 +1,5 @@ """Tests for petab.observables""" + import tempfile from pathlib import Path @@ -69,14 +70,11 @@ def test_write_observable_df(): def test_get_output_parameters(): """Test measurements.get_output_parameters.""" - # sbml model - import simplesbml - from petab.models.sbml_model import SbmlModel - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("fixedParameter1", 1.0) - ss_model.addParameter("observable_1", 1.0) + model = SbmlModel.from_antimony( + "fixedParameter1 = 1.0; observable_1 = 1.0" + ) # observable file observable_df = pd.DataFrame( @@ -88,9 +86,7 @@ def test_get_output_parameters(): } ).set_index(OBSERVABLE_ID) - output_parameters = petab.get_output_parameters( - observable_df, SbmlModel(sbml_model=ss_model.model) - ) + output_parameters = petab.get_output_parameters(observable_df, model) assert output_parameters == ["offset", "scaling"] @@ -105,9 +101,7 @@ def test_get_output_parameters(): } ).set_index(OBSERVABLE_ID) - output_parameters = petab.get_output_parameters( - observable_df, SbmlModel(sbml_model=ss_model.model) - ) + output_parameters = petab.get_output_parameters(observable_df, model) assert output_parameters == ["N", "beta"] diff --git a/tests/v1/test_parameter_mapping.py b/tests/v1/test_parameter_mapping.py index e499bd5c..4fe44aa5 100644 --- a/tests/v1/test_parameter_mapping.py +++ b/tests/v1/test_parameter_mapping.py @@ -32,16 +32,15 @@ def test_no_condition_specific(condition_df_2_conditions): } ) - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("dynamicParameter1", 1.0) - ss_model.addParameter("dynamicParameter2", 2.0) - ss_model.addParameter("dynamicParameter3", 3.0) + model = SbmlModel.from_antimony( + "dynamicParameter1 = 1.0; " + "dynamicParameter2 = 2.0; " + "dynamicParameter3 = 3.0; " + # add species, which will have initial concentration in condition + # table but which should not show up in mapping + "species someSpecies = 1.0" + ) - # add species, which will have initial concentration in condition table - # but which should not show up in mapping - ss_model.addSpecies("[someSpecies]", 1.0) condition_df["someSpecies"] = [0.0, 0.0] # Test without parameter table @@ -80,7 +79,6 @@ def test_no_condition_specific(condition_df_2_conditions): ), ] - model = SbmlModel(sbml_model=ss_model.model) actual = petab.get_optimization_to_simulation_parameter_mapping( model=model, measurement_df=measurement_df, @@ -245,13 +243,9 @@ def test_no_condition_specific(condition_df_2_conditions): def test_all_override(condition_df_2_conditions): # Condition-specific parameters overriding original parameters condition_df = condition_df_2_conditions - - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("dynamicParameter1", 0.0) - ss_model.addParameter("dynamicParameter2", 0.0) - model = SbmlModel(sbml_model=ss_model.model) + model = SbmlModel.from_antimony( + "dynamicParameter1 = 0.0; dynamicParameter2 = 0.0" + ) measurement_df = pd.DataFrame( data={ @@ -364,15 +358,16 @@ def test_partial_override(condition_df_2_conditions): ) condition_df.set_index("conditionId", inplace=True) - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("fixedParameter1", 0.5) - ss_model.addParameter("fixedParameter2", 1.0) - ss_model.addParameter("dynamicParameter1", 0.0) - ss_model.addParameter("observableParameter1_obs1", 0.0) - ss_model.addParameter("observableParameter2_obs1", 0.0) - ss_model.addParameter("observableParameter1_obs2", 0.0) + model = SbmlModel.from_antimony( + """ + fixedParameter1 = 0.5 + fixedParameter2 = 1.0 + dynamicParameter1 = 0.0 + observableParameter1_obs1 = 0.0 + observableParameter2_obs1 = 0.0 + observableParameter1_obs2 = 0.0 + """ + ) measurement_df = pd.DataFrame( data={ @@ -454,7 +449,7 @@ def test_partial_override(condition_df_2_conditions): actual = petab.get_optimization_to_simulation_parameter_mapping( measurement_df=measurement_df, condition_df=condition_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, parameter_df=parameter_df, ) @@ -504,12 +499,9 @@ def test_parameterized_condition_table(): ) parameter_df.set_index(PARAMETER_ID, inplace=True) - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("dynamicParameter1", 1.0) + model = SbmlModel.from_antimony("dynamicParameter1 = 1.0") - assert petab.get_model_parameters(ss_model.model) == [ + assert petab.get_model_parameters(model.sbml_model) == [ "dynamicParameter1" ] @@ -517,7 +509,7 @@ def test_parameterized_condition_table(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, ) expected = [ @@ -550,13 +542,10 @@ def test_parameterized_condition_table_changed_scale(): overridee_id = "overridee" # set up model - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter(overridee_id, 2.0) - assert petab.get_model_parameters(ss_model.model) == [overridee_id] + model = SbmlModel.from_antimony(f"{overridee_id} = 2.0") + assert petab.get_model_parameters(model.sbml_model) == [overridee_id] assert petab.get_model_parameters( - ss_model.model, with_values=True + model.sbml_model, with_values=True ) == {overridee_id: 2.0} # set up condition table @@ -614,7 +603,7 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, ) expected = [ @@ -638,7 +627,7 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, scaled_parameters=True, ) @@ -669,7 +658,7 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, ) expected = [ diff --git a/tests/v1/test_parameters.py b/tests/v1/test_parameters.py index c28528fe..33e7c97d 100644 --- a/tests/v1/test_parameters.py +++ b/tests/v1/test_parameters.py @@ -1,4 +1,5 @@ """Tests for petab/parameters.py""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index 65700af5..7fe6cb9e 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -6,7 +6,6 @@ from math import nan from pathlib import Path -import libsbml import numpy as np import pandas as pd import pytest @@ -16,6 +15,7 @@ import petab.v1 from petab.C import * from petab.models.sbml_model import SbmlModel +from petab.v1 import Problem @pytest.fixture @@ -38,61 +38,64 @@ def condition_df_2_conditions(): def petab_problem(): """Test petab problem.""" # create test model - import simplesbml + ant_model = "fixedParameter1=0.0; observable_1=0.0" + model = SbmlModel.from_antimony(ant_model) - model = simplesbml.SbmlModel() - model.addParameter("fixedParameter1", 0.0) - model.addParameter("observable_1", 0.0) - - measurement_df = pd.DataFrame( - data={ - OBSERVABLE_ID: ["obs1", "obs2"], - MEASUREMENT: [0.1, 0.2], - OBSERVABLE_PARAMETERS: ["", "p1;p2"], - NOISE_PARAMETERS: ["p3;p4", "p5"], - } + petab_problem = petab.Problem() + petab_problem.add_measurement( + obs_id="obs1", + sim_cond_id="condition1", + time=1.0, + measurement=0.1, + noise_parameters=["p3", "p4"], + ) + petab_problem.add_measurement( + obs_id="obs2", + sim_cond_id="condition2", + time=1.0, + measurement=0.2, + observable_parameters=["p1", "p2"], + noise_parameters=["p5"], ) - condition_df = pd.DataFrame( - data={ - CONDITION_ID: ["condition1", "condition2"], - CONDITION_NAME: ["", "Condition 2"], - "fixedParameter1": [1.0, 2.0], - } - ).set_index(CONDITION_ID) + petab_problem.add_condition("condition1", fixedParameter1=1.0) + petab_problem.add_condition( + "condition2", fixedParameter1=2.0, name="Condition 2" + ) - parameter_df = pd.DataFrame( - data={ - PARAMETER_ID: ["dynamicParameter1", "dynamicParameter2"], - PARAMETER_NAME: ["", "..."], - ESTIMATE: [1, 0], - } - ).set_index(PARAMETER_ID) + petab_problem.add_parameter("dynamicParameter1", estimate=1) + petab_problem.add_parameter("dynamicParameter2", estimate=0, name="...") - observable_df = pd.DataFrame( - data={ - OBSERVABLE_ID: ["obs1"], - OBSERVABLE_NAME: ["julius"], - OBSERVABLE_FORMULA: ["observable_1 * observableParameter1_obs1"], - NOISE_FORMULA: ["0.1 * observable_1 * observableParameter1_obs1"], - } - ).set_index(OBSERVABLE_ID) + petab_problem.add_observable( + "obs1", + formula="observable_1 * observableParameter1_obs1", + noise_formula="0.1 * observable_1 * observableParameter1_obs1", + name="julius", + ) with tempfile.TemporaryDirectory() as temp_dir: sbml_file_name = Path(temp_dir, "model.xml") - libsbml.writeSBMLToFile(model.document, str(sbml_file_name)) + model.to_file(sbml_file_name) measurement_file_name = Path(temp_dir, "measurements.tsv") - petab.write_measurement_df(measurement_df, measurement_file_name) + petab.write_measurement_df( + petab_problem.measurement_df, measurement_file_name + ) condition_file_name = Path(temp_dir, "conditions.tsv") - petab.write_condition_df(condition_df, condition_file_name) + petab.write_condition_df( + petab_problem.condition_df, condition_file_name + ) parameter_file_name = Path(temp_dir, "parameters.tsv") - petab.write_parameter_df(parameter_df, parameter_file_name) + petab.write_parameter_df( + petab_problem.parameter_df, parameter_file_name + ) observable_file_name = Path(temp_dir, "observables.tsv") - petab.write_observable_df(observable_df, observable_file_name) + petab.write_observable_df( + petab_problem.observable_df, observable_file_name + ) with pytest.deprecated_call(): petab_problem = petab.Problem.from_files( @@ -278,13 +281,15 @@ def test_create_parameter_df( condition_df_2_conditions, ): # pylint: disable=W0621 """Test petab.create_parameter_df.""" - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addSpecies("[x1]", 1.0) - ss_model.addParameter("fixedParameter1", 2.0) - ss_model.addParameter("p0", 3.0) - model = SbmlModel(sbml_model=ss_model.model) + ant_model = """ + species x1 = 1.0 + fixedParameter1 = 2.0 + p0 = 3.0 + # Add assignment rule target which should be ignored + assignment_target = 0.0 + assignment_target := 1.0 + """ + model = SbmlModel.from_antimony(ant_model) observable_df = pd.DataFrame( data={ @@ -293,10 +298,6 @@ def test_create_parameter_df( } ).set_index(OBSERVABLE_ID) - # Add assignment rule target which should be ignored - ss_model.addParameter("assignment_target", 0.0) - ss_model.addAssignmentRule("assignment_target", "1.0") - measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["obs1", "obs2"], @@ -312,10 +313,10 @@ def test_create_parameter_df( with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") parameter_df = petab.v1.create_parameter_df( - ss_model.model, - condition_df_2_conditions, - observable_df, - measurement_df, + sbml_model=model.sbml_model, + condition_df=condition_df_2_conditions, + observable_df=observable_df, + measurement_df=measurement_df, ) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) @@ -330,9 +331,9 @@ def test_create_parameter_df( assert parameter_df.index.values.tolist() == expected # test with condition parameter override: - condition_df_2_conditions.loc[ - "condition2", "fixedParameter1" - ] = "overrider" + condition_df_2_conditions.loc["condition2", "fixedParameter1"] = ( + "overrider" + ) expected = ["p3", "p4", "p1", "p2", "p5", "overrider"] parameter_df = petab.create_parameter_df( @@ -507,7 +508,7 @@ def test_flatten_timepoint_specific_output_overrides(): ) pd.testing.assert_frame_equal( - problem.observable_df, observable_df_expected + problem.observable_df, observable_df_expected, check_dtype=False ) pd.testing.assert_frame_equal( problem.measurement_df, measurement_df_expected @@ -822,51 +823,38 @@ def test_problem_from_yaml_v1_multiple_files(): observable_files: [observables1.tsv, observables2.tsv] sbml_files: [] """ - with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") with open(yaml_path, "w") as f: f.write(yaml_config) for i in (1, 2): - condition_df = pd.DataFrame( - { - CONDITION_ID: [f"condition{i}"], - } - ) - condition_df.set_index([CONDITION_ID], inplace=True) + problem = Problem() + problem.add_condition(f"condition{i}") petab.write_condition_df( - condition_df, Path(tmpdir, f"conditions{i}.tsv") + problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) - measurement_df = pd.DataFrame( - { - SIMULATION_CONDITION_ID: [f"condition{i}"], - OBSERVABLE_ID: [f"observable{i}"], - TIME: [i], - MEASUREMENT: [1], - } - ) + problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1) petab.write_measurement_df( - measurement_df, Path(tmpdir, f"measurements{i}.tsv") + problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") ) - observables_df = pd.DataFrame( - { - OBSERVABLE_ID: [f"observable{i}"], - OBSERVABLE_FORMULA: [1], - NOISE_FORMULA: [1], - } - ) + problem.add_observable(f"observable{i}", 1, 1) petab.write_observable_df( - observables_df, Path(tmpdir, f"observables{i}.tsv") + problem.observable_df, Path(tmpdir, f"observables{i}.tsv") ) - petab_problem = petab.Problem.from_yaml(yaml_path) + petab_problem1 = petab.Problem.from_yaml(yaml_path) + + # test that we can load the problem from a dict with a custom base path + yaml_config = petab.v1.load_yaml(yaml_path) + petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) - assert petab_problem.measurement_df.shape[0] == 2 - assert petab_problem.observable_df.shape[0] == 2 - assert petab_problem.condition_df.shape[0] == 2 + for petab_problem in (petab_problem1, petab_problem2): + assert petab_problem.measurement_df.shape[0] == 2 + assert petab_problem.observable_df.shape[0] == 2 + assert petab_problem.condition_df.shape[0] == 2 def test_get_required_parameters_for_parameter_table(petab_problem): diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index ea47e54f..a0ad1116 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -1,15 +1,56 @@ from copy import deepcopy +from itertools import product from pathlib import Path import benchmark_models_petab import numpy as np import pandas as pd import pytest -from scipy.stats import norm +from scipy.integrate import cumulative_trapezoid, quad +from scipy.stats import kstest import petab.v1 -from petab.v1 import get_simulation_conditions -from petab.v1.priors import priors_to_measurements +from petab.v1 import ( + ESTIMATE, + MEASUREMENT, + OBJECTIVE_PRIOR_TYPE, + OBSERVABLE_ID, + SIMULATION, + C, + get_simulation_conditions, + get_simulation_df, +) +from petab.v1.calculate import calculate_single_llh +from petab.v1.priors import Prior, priors_to_measurements + + +def test_priors_to_measurements_simple(): + """Test the conversion of priors to measurements. + + Illustrates & tests the conversion of a prior to a measurement. + """ + # parameter value at which we evaluate the prior + par_value = 2.5 + # location and scale parameters of the prior + prior_loc = 3 + prior_scale = 3 + + for prior_type in [C.NORMAL, C.LAPLACE]: + # evaluate the original prior + prior = Prior( + prior_type, (prior_loc, prior_scale), transformation=C.LIN + ) + logprior = -prior.neglogprior(par_value, x_scaled=False) + + # evaluate the alternative implementation as a measurement + llh = calculate_single_llh( + measurement=prior_loc, + simulation=par_value, + scale=C.LIN, + noise_distribution=prior_type, + noise_value=prior_scale, + ) + assert np.isclose(llh, logprior, rtol=1e-12, atol=1e-16) @pytest.mark.parametrize( @@ -17,21 +58,44 @@ ) def test_priors_to_measurements(problem_id): """Test the conversion of priors to measurements.""" + # setup petab_problem_priors: petab.v1.Problem = ( benchmark_models_petab.get_problem(problem_id) ) petab_problem_priors.visualization_df = None assert petab.v1.lint_problem(petab_problem_priors) is False - if problem_id == "Isensee_JCB2018": # required to match the stored simulation results below petab.v1.flatten_timepoint_specific_output_overrides( petab_problem_priors ) assert petab.v1.lint_problem(petab_problem_priors) is False + original_problem = deepcopy(petab_problem_priors) + # All priors in this test case are defined on parameter scale, hence + # the dummy measurements will take the scaled nominal values. + x_scaled_dict = dict( + zip( + original_problem.x_free_ids, + original_problem.x_nominal_free_scaled, + strict=True, + ) + ) + x_unscaled_dict = dict( + zip( + original_problem.x_free_ids, + original_problem.x_nominal_free, + strict=True, + ) + ) - petab_problem_measurements = priors_to_measurements(petab_problem_priors) + try: + # convert priors to measurements + petab_problem_measurements = priors_to_measurements( + petab_problem_priors + ) + except NotImplementedError as e: + pytest.skip(str(e)) # check that the original problem is not modified for attr in [ @@ -45,6 +109,7 @@ def test_priors_to_measurements(problem_id): getattr(original_problem, attr) ) ).empty, diff + # check that measurements and observables were added assert petab.v1.lint_problem(petab_problem_measurements) is False assert ( @@ -59,6 +124,7 @@ def test_priors_to_measurements(problem_id): petab_problem_measurements.measurement_df.shape[0] > petab_problem_priors.measurement_df.shape[0] ) + # ensure we didn't introduce any new conditions assert len( get_simulation_conditions(petab_problem_measurements.measurement_df) @@ -67,26 +133,47 @@ def test_priors_to_measurements(problem_id): # verify that the objective function value is the same # load/construct the simulation results - simulation_df_priors = petab.v1.get_simulation_df( + simulation_df_priors = get_simulation_df( Path( benchmark_models_petab.MODELS_DIR, problem_id, f"simulatedData_{problem_id}.tsv", ) ) - simulation_df_measurements = pd.concat( - [ - petab_problem_measurements.measurement_df.rename( - columns={petab.v1.MEASUREMENT: petab.v1.SIMULATION} - )[ - petab_problem_measurements.measurement_df[ - petab.v1.C.OBSERVABLE_ID - ].str.startswith("prior_") - ], - simulation_df_priors, + # for the prior observables, we need to "simulate" the model with the + # nominal parameter values + simulated_prior_observables = ( + petab_problem_measurements.measurement_df.rename( + columns={MEASUREMENT: SIMULATION} + )[ + petab_problem_measurements.measurement_df[ + OBSERVABLE_ID + ].str.startswith("prior_") ] ) + def apply_parameter_values(row): + # apply the parameter values to the observable formula for the prior + if row[OBSERVABLE_ID].startswith("prior_"): + parameter_id = row[OBSERVABLE_ID].removeprefix("prior_") + if ( + original_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_TYPE + ] + in C.PARAMETER_SCALE_PRIOR_TYPES + ): + row[SIMULATION] = x_scaled_dict[parameter_id] + else: + row[SIMULATION] = x_unscaled_dict[parameter_id] + return row + + simulated_prior_observables = simulated_prior_observables.apply( + apply_parameter_values, axis=1 + ) + simulation_df_measurements = pd.concat( + [simulation_df_priors, simulated_prior_observables] + ) + llh_priors = petab.v1.calculate_llh_for_table( petab_problem_priors.measurement_df, simulation_df_priors, @@ -102,36 +189,93 @@ def test_priors_to_measurements(problem_id): # get prior objective function contribution parameter_ids = petab_problem_priors.parameter_df.index.values[ - (petab_problem_priors.parameter_df[petab.v1.ESTIMATE] == 1) - & petab_problem_priors.parameter_df[ - petab.v1.OBJECTIVE_PRIOR_TYPE - ].notna() + (petab_problem_priors.parameter_df[ESTIMATE] == 1) + & petab_problem_priors.parameter_df[OBJECTIVE_PRIOR_TYPE].notna() + ] + priors = [ + Prior.from_par_dict( + petab_problem_priors.parameter_df.loc[par_id], + type_="objective", + _bounds_truncate=False, + ) + for par_id in parameter_ids ] - priors = petab.v1.get_priors_from_df( - petab_problem_priors.parameter_df, - mode="objective", - parameter_ids=parameter_ids, - ) prior_contrib = 0 for parameter_id, prior in zip(parameter_ids, priors, strict=True): - prior_type, prior_pars, par_scale, par_bounds = prior - if prior_type == petab.v1.PARAMETER_SCALE_NORMAL: - prior_contrib += norm.logpdf( - petab_problem_priors.x_nominal_free_scaled[ - petab_problem_priors.x_free_ids.index(parameter_id) - ], - loc=prior_pars[0], - scale=prior_pars[1], - ) - else: - # enable other models, once libpetab has proper support for - # evaluating the prior contribution. until then, two test - # problems should suffice - assert problem_id == "Raimundez_PCB2020" - pytest.skip(f"Prior type {prior_type} not implemented") + prior_contrib -= prior.neglogprior( + x_scaled_dict[parameter_id], x_scaled=True + ) assert np.isclose( - llh_priors + prior_contrib, llh_measurements, rtol=1e-3, atol=1e-16 + llh_priors + prior_contrib, llh_measurements, rtol=1e-8, atol=1e-16 ), (llh_priors + prior_contrib, llh_measurements) # check that the tolerance is not too high - assert np.abs(prior_contrib) > 1e-3 * np.abs(llh_priors) + assert np.abs(prior_contrib) > 1e-8 * np.abs(llh_priors) + + +cases = list( + product( + [ + (C.NORMAL, (10, 1)), + (C.LOG_NORMAL, (2, 1)), + (C.UNIFORM, (1, 2)), + (C.LAPLACE, (20, 2)), + (C.LOG_LAPLACE, (1, 0.5)), + (C.PARAMETER_SCALE_NORMAL, (1, 1)), + (C.PARAMETER_SCALE_LAPLACE, (1, 2)), + (C.PARAMETER_SCALE_UNIFORM, (1, 2)), + ], + C.PARAMETER_SCALES, + ) +) +ids = [f"{prior_args[0]}_{transform}" for prior_args, transform in cases] + + +@pytest.mark.parametrize("prior_args, transform", cases, ids=ids) +def test_sample_matches_pdf(prior_args, transform): + """Test that the sample matches the PDF.""" + np.random.seed(1) + N_SAMPLES = 10_000 + + prior = Prior(*prior_args, transformation=transform) + + for x_scaled in [False, True]: + sample = prior.sample(N_SAMPLES, x_scaled=x_scaled) + + # pdf -> cdf + def cdf(x): + return cumulative_trapezoid( + prior.pdf( + x, + x_scaled=x_scaled, # noqa B208 + rescale=x_scaled, # noqa B208 + ), + x, + ) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) + + if p < 0.05: + import matplotlib.pyplot as plt + + plt.hist(sample, bins=100, density=True) + x = np.linspace(min(sample), max(sample), 100) + plt.plot(x, prior.pdf(x, x_scaled=x_scaled, rescale=x_scaled)) + plt.xlabel(("scaled" if x_scaled else "unscaled") + " x") + plt.ylabel(("rescaled " if x_scaled else "") + "density") + plt.title(str(prior)) + plt.show() + + assert p > 0.05, (p, prior) + + # check that the integral of the PDF is 1 for the unscaled parameters + integral, abserr = quad( + lambda x: prior.pdf(x, x_scaled=False), + -np.inf if prior.distribution.logbase is False else 0, + np.inf, + limit=100, + epsabs=1e-10, + epsrel=0, + ) + assert np.isclose(integral, 1, rtol=0, atol=10 * abserr) diff --git a/tests/v1/test_sbml.py b/tests/v1/test_sbml.py index 350a2f0d..c38f5ab5 100644 --- a/tests/v1/test_sbml.py +++ b/tests/v1/test_sbml.py @@ -13,17 +13,16 @@ def create_test_data(): # Create test model and data files - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addCompartment(comp_id="compartment_1", vol=1) - for i in range(1, 4): - ss_model.addParameter(f"parameter_{i}", i) - - for i in range(1, 5): - ss_model.addSpecies(f"[species_{i}]", 10 * i) - - ss_model.addAssignmentRule("species_2", "25") + model = SbmlModel.from_antimony( + "\n".join( + [ + "compartment compartment_1 = 1", + *(f"species species_{i} = 10 * {i}" for i in range(1, 5)), + *(f"parameter_{i} = {i}" for i in range(1, 4)), + "species_2 := 25", + ] + ) + ) condition_df = pd.DataFrame( { @@ -68,7 +67,7 @@ def create_test_data(): ) parameter_df.set_index([petab.PARAMETER_ID], inplace=True) - return ss_model, condition_df, observable_df, measurement_df, parameter_df + return model, condition_df, observable_df, measurement_df, parameter_df def check_model(condition_model): @@ -86,9 +85,9 @@ def check_model(condition_model): condition_model.getSpecies("species_4").getInitialConcentration() == 3.25 ) - assert ( - len(condition_model.getListOfInitialAssignments()) == 0 - ), "InitialAssignment not removed" + assert len(condition_model.getListOfInitialAssignments()) == 0, ( + "InitialAssignment not removed" + ) assert condition_model.getCompartment("compartment_1").getSize() == 2.0 assert condition_model.getParameter("parameter_1").getValue() == 1.25 assert condition_model.getParameter("parameter_2").getValue() == 2.25 @@ -99,7 +98,7 @@ def test_get_condition_specific_models(): """Test for petab.sbml.get_condition_specific_models""" # retrieve test data ( - ss_model, + model, condition_df, observable_df, measurement_df, @@ -107,7 +106,7 @@ def test_get_condition_specific_models(): ) = create_test_data() petab_problem = petab.Problem( - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, @@ -133,3 +132,21 @@ def test_sbml_model_repr(): sbml_model.setId("test") petab_model = SbmlModel(sbml_model) assert repr(petab_model) == "" + + +def test_sbml_from_to_ant(): + ant_model = """ + model test + R1: S1 -> S2; k1*S1 + k1 = 1 + end + """ + petab_model = SbmlModel.from_antimony(ant_model) + assert petab_model.model_id == "test" + assert petab_model.get_parameter_value("k1") == 1.0 + assert set(petab_model.get_valid_parameters_for_parameter_table()) == { + "k1" + } + + # convert back to antimony + assert "R1: S1 -> S2; k1*S1" in petab_model.to_antimony() diff --git a/tests/v1/test_simplify.py b/tests/v1/test_simplify.py index 3d9a8909..1724f8bb 100644 --- a/tests/v1/test_simplify.py +++ b/tests/v1/test_simplify.py @@ -1,9 +1,9 @@ """Tests for petab.simplify.*""" + from math import nan import pandas as pd import pytest -import simplesbml from pandas.testing import * from petab import Problem @@ -14,9 +14,9 @@ @pytest.fixture def problem() -> Problem: - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("some_parameter", val=1.0) - ss_model.addParameter("same_value_for_all_conditions", val=1.0) + model = SbmlModel.from_antimony( + "some_parameter = 1.0; same_value_for_all_conditions = 1.0" + ) observable_df = pd.DataFrame( { @@ -53,7 +53,7 @@ def problem() -> Problem: } ) yield Problem( - model=SbmlModel(sbml_model=ss_model.getModel()), + model=model, condition_df=conditions_df, observable_df=observable_df, measurement_df=measurement_df, diff --git a/tests/v1/test_simulate.py b/tests/v1/test_simulate.py index e23b63cb..7945b1bb 100644 --- a/tests/v1/test_simulate.py +++ b/tests/v1/test_simulate.py @@ -1,4 +1,5 @@ """Tests for petab/simulate.py.""" + import functools from collections.abc import Callable from pathlib import Path diff --git a/tests/v1/test_visualization.py b/tests/v1/test_visualization.py index 0edd4b78..3c5a3a65 100644 --- a/tests/v1/test_visualization.py +++ b/tests/v1/test_visualization.py @@ -8,14 +8,14 @@ import petab from petab.C import * -from petab.visualize import ( +from petab.v1.visualize import ( plot_goodness_of_fit, plot_residuals_vs_simulation, plot_with_vis_spec, plot_without_vis_spec, ) -from petab.visualize.lint import validate_visualization_df -from petab.visualize.plotting import VisSpecParser +from petab.v1.visualize.lint import validate_visualization_df +from petab.v1.visualize.plotting import VisSpecParser # Avoid errors when plotting without X server plt.switch_backend("agg") diff --git a/tests/v1/test_yaml.py b/tests/v1/test_yaml.py index 82ab242c..168d7697 100644 --- a/tests/v1/test_yaml.py +++ b/tests/v1/test_yaml.py @@ -1,4 +1,5 @@ """Test for petab.yaml""" + import tempfile from pathlib import Path diff --git a/tests/v2/test_calculate.py b/tests/v2/test_calculate.py new file mode 100644 index 00000000..cba929ae --- /dev/null +++ b/tests/v2/test_calculate.py @@ -0,0 +1,451 @@ +"""Tests related to petab.calculate.""" + +import numpy as np +import pandas as pd +import pytest + +from petab.v2 import get_observable_df, get_parameter_df +from petab.v2.C import * +from petab.v2.calculate import ( + calculate_chi2, + calculate_llh, + calculate_residuals, + calculate_single_llh, +) + + +def model_simple(): + "Simple model." + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a", "obs_b", "obs_b"], + EXPERIMENT_ID: ["c0", "c1", "c0", "c1"], + TIME: [0, 10, 0, 10], + MEASUREMENT: [0, 1, 20, 22], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + OBSERVABLE_FORMULA: ["A", "B"], + NOISE_FORMULA: [2, 3], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 2, 19, 20] + + expected_residuals = { + (0 - 2) / 2, + (1 - 2) / 2, + (20 - 19) / 3, + (22 - 20) / 3, + } + expected_residuals_nonorm = {0 - 2, 1 - 2, 20 - 19, 22 - 20} + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 * np.log(2 * np.pi * np.array([2, 2, 3, 3]) ** 2).sum() + ) + + return ( + measurement_df, + get_observable_df(observable_df), + get_parameter_df(parameter_df), + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_replicates(): + """Model with replicates.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [10, 10], + MEASUREMENT: [0, 1], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_FORMULA: [2], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 2] + + expected_residuals = {(0 - 2) / 2, (1 - 2) / 2} + expected_residuals_nonorm = {0 - 2, 1 - 2} + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 * np.log(2 * np.pi * np.array([2, 2]) ** 2).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_scalings(): + """Model with scalings.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 1], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_DISTRIBUTION: [LOG_NORMAL], + NOISE_FORMULA: [2], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = { + (np.log(0.5) - np.log(2)) / 2, + (np.log(1) - np.log(3)) / 2, + } + expected_residuals_nonorm = { + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), + } + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 + * np.log( + 2 * np.pi * np.array([2, 2]) ** 2 * np.array([0.5, 1]) ** 2 + ).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_non_numeric_overrides(): + """Model with non-numeric overrides.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 1], + NOISE_PARAMETERS: ["7;8", "2;par1"], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_DISTRIBUTION: [LOG_NORMAL], + NOISE_FORMULA: [ + "2*noiseParameter1_obs_a + " + "noiseParameter2_obs_a + par2 + obs_a" + ], + NOISE_PLACEHOLDERS: [ + "noiseParameter1_obs_a;noiseParameter2_obs_a" + ], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = { + (np.log(0.5) - np.log(2)) / (2 * 7 + 8 + 4 + 2), + (np.log(1) - np.log(3)) / (2 * 2 + 3 + 4 + 3), + } + expected_residuals_nonorm = { + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), + } + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 + * np.log( + 2 + * np.pi + * np.array([2 * 7 + 8 + 4 + 2, 2 * 2 + 3 + 4 + 3]) ** 2 + * np.array([0.5, 1]) ** 2 + ).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_custom_likelihood(): + """Model with customized likelihoods.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 2], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + OBSERVABLE_FORMULA: ["A", "B"], + NOISE_FORMULA: [2, 1.5], + NOISE_DISTRIBUTION: [LOG_LAPLACE, LAPLACE], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = {(np.log(0.5) - np.log(2)) / 2, (2 - 3) / 1.5} + expected_residuals_nonorm = {np.log(0.5) - np.log(2), 2 - 3} + expected_llh = ( + -np.abs(list(expected_residuals)).sum() + - np.log(2 * np.array([2, 1.5]) * np.array([0.5, 1])).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +@pytest.fixture +def models(): + """Test model collection covering different features.""" + return [ + model_simple(), + model_replicates(), + model_scalings(), + model_non_numeric_overrides(), + model_custom_likelihood(), + ] + + +def test_calculate_residuals(models): # pylint: disable=W0621 + """Test calculate.calculate_residuals.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + _, + _, + ) = model + residual_dfs = calculate_residuals( + measurement_df, simulation_df, observable_df, parameter_df + ) + assert sorted(residual_dfs[0][RESIDUAL]) == pytest.approx( + sorted(expected_residuals) + ) + + +def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 + """Test calculate.calculate_residuals without normalization.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + _, + expected_residuals_nonorm, + _, + ) = model + residual_dfs = calculate_residuals( + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize=False, + ) + assert sorted(residual_dfs[0][RESIDUAL]) == pytest.approx( + sorted(expected_residuals_nonorm) + ) + + +def test_calculate_chi2(models): # pylint: disable=W0621 + """Test calculate.calculate_chi2.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + _, + _, + ) = model + chi2 = calculate_chi2( + measurement_df, simulation_df, observable_df, parameter_df + ) + + expected = sum(np.array(list(expected_residuals)) ** 2) + assert chi2 == pytest.approx(expected) + + +def test_calculate_llh(models): # pylint: disable=W0621 + """Test calculate.calculate_llh.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + _, + _, + expected_llh, + ) = model + llh = calculate_llh( + measurement_df, simulation_df, observable_df, parameter_df + ) + assert llh == pytest.approx(expected_llh) or expected_llh is None + + +def test_calculate_single_llh(): + """Test calculate.calculate_single_llh.""" + m, s, sigma = 5.3, 4.5, 1.6 + pi, log, log10 = np.pi, np.log, np.log10 + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LIN, + ) + expected_llh = -0.5 * (((s - m) / sigma) ** 2 + log(2 * pi * sigma**2)) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LOG, + ) + expected_llh = -0.5 * ( + ((log(s) - log(m)) / sigma) ** 2 + log(2 * pi * sigma**2 * m**2) + ) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LOG10, + ) + expected_llh = -0.5 * ( + ((log10(s) - log10(m)) / sigma) ** 2 + + log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + ) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LIN, + ) + expected_llh = -abs((s - m) / sigma) - log(2 * sigma) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LOG, + ) + expected_llh = -abs((log(s) - log(m)) / sigma) - log(2 * sigma * m) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LOG10, + ) + expected_llh = -abs((log10(s) - log10(m)) / sigma) - log( + 2 * sigma * m * log(10) + ) + assert llh == pytest.approx(expected_llh) diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index c309a00e..21949714 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -1,34 +1,58 @@ import logging -import tempfile +import pytest + +from petab.v2 import Problem from petab.v2.petab1to2 import petab1to2 def test_petab1to2_remote(): + """Test that we can upgrade a remote PEtab 1.0.0 problem.""" yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" ) - with tempfile.TemporaryDirectory(prefix="test_petab1to2") as tmpdirname: - # TODO verify that the v2 files match "ground truth" - # in `petabtests/cases/v2.0.0/sbml/0001/_0001.yaml` - petab1to2(yaml_url, tmpdirname) + problem = petab1to2(yaml_url) + assert isinstance(problem, Problem) + assert len(problem.measurements) -def test_benchmark_collection(): - """Test that we can upgrade all benchmark collection models.""" +try: import benchmark_models_petab - logging.basicConfig(level=logging.DEBUG) + parametrize_or_skip = pytest.mark.parametrize( + "problem_id", benchmark_models_petab.MODELS + ) +except ImportError: + parametrize_or_skip = pytest.mark.skip( + reason="benchmark_models_petab not installed" + ) + - for problem_id in benchmark_models_petab.MODELS: - if problem_id == "Lang_PLOSComputBiol2024": - # Does not pass initial linting - continue +@pytest.mark.filterwarnings( + "ignore:.*Using `log-normal` instead.*:UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:.*Initialisation priors in parameter table are not supported.*:" + "UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:.*Parameter scales are not supported in PEtab v2.*:UserWarning" +) +@parametrize_or_skip +def test_benchmark_collection(problem_id): + """Test that we can upgrade all benchmark collection models.""" + logging.basicConfig(level=logging.DEBUG) - yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) - with tempfile.TemporaryDirectory( - prefix=f"test_petab1to2_{problem_id}" - ) as tmpdirname: - petab1to2(yaml_path, tmpdirname) + if problem_id == "Froehlich_CellSystems2018": + # this is mostly about 6M sympifications in the condition table + pytest.skip("Too slow. Re-enable once we are faster.") + + yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) + try: + problem = petab1to2(yaml_path) + except NotImplementedError as e: + pytest.skip(str(e)) + assert isinstance(problem, Problem) + assert len(problem.measurements) diff --git a/tests/v2/test_converters.py b/tests/v2/test_converters.py new file mode 100644 index 00000000..7f031fc7 --- /dev/null +++ b/tests/v2/test_converters.py @@ -0,0 +1,234 @@ +from math import inf + +import pandas as pd + +from petab.v2 import Change, Condition, Experiment, ExperimentPeriod, Problem +from petab.v2.converters import ExperimentsToSbmlConverter +from petab.v2.models.sbml_model import SbmlModel + + +def test_experiments_to_events_converter(): + """Test the ExperimentsToSbmlConverter.""" + ant_model = """ + species X = 0 + X' = 1 + """ + problem = Problem() + problem.model = SbmlModel.from_antimony(ant_model) + problem.add_condition("c1", X=1) + problem.add_condition("c2", X=2) + problem.add_experiment("e1", -inf, "c1", 10, "c2") + + converter = ExperimentsToSbmlConverter(problem) + converted = converter.convert() + assert converted.validate().has_errors() is False + + assert isinstance(converted.model, SbmlModel) + sbml_model = converted.model.sbml_model + + # one event -- the initial period is handled via initial assignments + assert sbml_model.getNumEvents() == 1 + assert converted.conditions == [ + Condition( + id="_petab_preequilibration_on", + changes=[ + Change( + target_id="_petab_preequilibration_indicator", + target_value=1, + ) + ], + ), + Condition( + id="_petab_preequilibration_off", + changes=[ + Change( + target_id="_petab_preequilibration_indicator", + target_value=0, + ) + ], + ), + Condition( + id="_petab_experiment_condition_e1", + changes=[ + Change( + target_id="_petab_experiment_indicator_e1", target_value=1 + ) + ], + ), + ] + assert converted.experiments == [ + Experiment( + id="e1", + periods=[ + ExperimentPeriod( + time=-inf, + condition_ids=[ + "_petab_experiment_condition_e1", + "_petab_preequilibration_on", + ], + ), + ExperimentPeriod( + time=10.0, + condition_ids=[ + "_petab_experiment_condition_e1", + "_petab_preequilibration_off", + ], + ), + ], + ), + ] + + +def test_simulate_experiment_to_events(): + """ + Convert PEtab experiment to SBML events and compare BasiCO simulation + results. + """ + import basico + + # the basic model for the PEtab problem + ant_model1 = """ + compartment comp1 = 10 + compartment comp2 = 2 + # concentration-based species + species s1c_comp1 in comp1 = 1 + species s1c_comp2 in comp2 = 2 + species s2c_comp1 in comp1 = 3 + species s2c_comp2 in comp2 = 4 + # amount-based species + # (note that in antimony<3.1.0 the initial values are concentrations + # nonetheless) + substanceOnly species s3a_comp1 in comp1 = 5 * comp1 + substanceOnly species s3a_comp2 in comp2 = 6 * comp2 + substanceOnly species s4a_comp1 in comp1 = 7 * comp1 + substanceOnly species s4a_comp2 in comp2 = 8 * comp2 + + # something dynamic + some_species in comp1 = 0 + some_species' = 1 + + # set time-derivatives, otherwise BasiCO won't include them in the result + s1c_comp1' = 0 + s1c_comp2' = 0 + s2c_comp1' = 0 + s2c_comp2' = 0 + s3a_comp1' = 0 + s3a_comp2' = 0 + s4a_comp1' = 0 + s4a_comp2' = 0 + """ + + # append events, equivalent to the expected PEtab conversion result + ant_model_expected = ( + ant_model1 + + """ + # resize compartment + # The size of comp1 should be set to 20, the concentrations of the + # contained concentration-based species and the amounts of the amount-based + # species should remain unchanged. comp2 and everything therein is + # unaffected. + # I.e., post-event: + # s1c_comp1 = 1, s2c_comp1 = 3, s3a_comp1 = 5, s4a_comp1 = 7 + at time >= 1: + comp1 = 20, + s1c_comp1 = s1c_comp1 * 20 / comp1, + s2c_comp1 = s2c_comp1 * 20 / comp1; + + # resize compartment *and* reassign concentration + # The size of comp2 should be set to 4, the concentration/amount of + # s1c_comp2/s3a_comp2 should be set to the given values, + # the amounts for amount-based and concentrations for concentration-based + # other species in comp2 should remain unchanged. + # I.e., post-event: + # comp2 = 4 + # s1c_comp2 = 5, s3a_comp2 = 16, + # s2c_comp2 = 4 (unchanged), s4a_comp2 = 8 (unchanged) + # The post-event concentrations of concentration-based species are + # (per SBML): + # new_conc = assigned_amount / new_volume + # = assigned_conc * old_volume / new_volume + # <=> assigned_conc = new_conc * new_volume / old_volume + # The post-event amounts of amount-based species are: + # new_amount = assigned_amount (independent of volume change) + at time >= 5: + comp2 = 4, + s3a_comp2 = 16, + s1c_comp2 = 5 * 4 / comp2, + s2c_comp2 = s2c_comp2 * 4 / comp2; + """ + ) + + # simulate expected model in BasiCO + sbml_expected = SbmlModel.from_antimony(ant_model_expected).to_sbml_str() + basico.load_model(sbml_expected) + # output timepoints (initial, pre-/post-event, ...) + timepoints = [0, 0.9, 1.1, 4.9, 5.1, 10] + # Simulation will return all species as concentrations + df_expected = basico.run_time_course(values=timepoints) + # fmt: off + assert ( + df_expected + == pd.DataFrame( + {'Values[some_species]': {0.0: 0.0, 0.9: 0.9, + 1.1: 1.0999999999999996, 4.9: 4.9, + 5.1: 5.100000000000001, 10.0: 10.0}, + 's1c_comp1': {0.0: 1.0, 0.9: 1.0, 1.1: 1.0, 4.9: 1.0, 5.1: 1.0, + 10.0: 1.0}, + 's2c_comp1': {0.0: 3.0, 0.9: 3.0, 1.1: 3.0, 4.9: 3.0, 5.1: 3.0, + 10.0: 3.0}, + 's3a_comp1': {0.0: 5.0, 0.9: 5.0, 1.1: 2.5, 4.9: 2.5, 5.1: 2.5, + 10.0: 2.5}, + 's4a_comp1': {0.0: 7.0, 0.9: 7.0, 1.1: 3.5, 4.9: 3.5, 5.1: 3.5, + 10.0: 3.5}, + 's1c_comp2': {0.0: 2.0, 0.9: 2.0, 1.1: 2.0, 4.9: 2.0, 5.1: 5.0, + 10.0: 5.0}, + 's2c_comp2': {0.0: 4.0, 0.9: 4.0, 1.1: 4.0, 4.9: 4.0, 5.1: 4.0, + 10.0: 4.0}, + 's3a_comp2': {0.0: 6.0, 0.9: 6.0, 1.1: 6.0, 4.9: 6.0, 5.1: 4.0, + 10.0: 4.0}, + 's4a_comp2': {0.0: 8.0, 0.9: 8.0, 1.1: 8.0, 4.9: 8.0, 5.1: 4.0, + 10.0: 4.0}, + 'Compartments[comp1]': {0.0: 10.0, 0.9: 10.0, 1.1: 20.0, + 4.9: 20.0, 5.1: 20.0, 10.0: 20.0}, + 'Compartments[comp2]': {0.0: 2.0, 0.9: 2.0, 1.1: 2.0, 4.9: 2.0, + 5.1: 4.0, 10.0: 4.0}} + ) + ).all().all() + # fmt: on + + # construct PEtab test problem + problem = Problem() + problem.model = SbmlModel.from_antimony(ant_model1) + problem.add_condition("c0", comp1=10) + problem.add_condition("c1", comp1=20) + problem.add_condition("c2", comp2=4, s1c_comp2=5, s3a_comp2=16) + problem.add_experiment("e1", 0, "c0", 1, "c1", 5, "c2") + problem.assert_valid() + + # convert PEtab experiments to SBML events and simulate in BasiCO + converter = ExperimentsToSbmlConverter(problem) + converted = converter.convert() + # set experiment indicator to simulate experiment "e1" + converted.model.sbml_model.getParameter( + "_petab_experiment_indicator_e1" + ).setValue(1) + sbml_actual = converted.model.to_sbml_str() + basico.load_model(sbml_actual) + df_actual = basico.run_time_course(values=timepoints) + + # compare results + with pd.option_context( + "display.max_rows", + None, + "display.max_columns", + None, + "display.width", + None, + ): + print("Expected:") + print(df_expected) + print("Actual:") + print(df_actual) + + for col in df_expected.columns: + assert (df_expected[col] == df_actual[col]).all() diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py new file mode 100644 index 00000000..22dbf0e1 --- /dev/null +++ b/tests/v2/test_core.py @@ -0,0 +1,893 @@ +import subprocess +import tempfile +from pathlib import Path + +import numpy as np +import pandas as pd +import pytest +import sympy as sp +from pandas.testing import assert_frame_equal +from pydantic import AnyUrl, ValidationError +from sympy.abc import x, y + +import petab.v2 as petab +from petab.v2 import C +from petab.v2.C import ( + CONDITION_ID, + ESTIMATE, + LOWER_BOUND, + MODEL_ENTITY_ID, + NAME, + NOISE_FORMULA, + NOMINAL_VALUE, + OBSERVABLE_FORMULA, + OBSERVABLE_ID, + PARAMETER_ID, + PETAB_ENTITY_ID, + TARGET_ID, + TARGET_VALUE, + UPPER_BOUND, +) +from petab.v2.core import * +from petab.v2.models.sbml_model import SbmlModel +from petab.v2.petab1to2 import petab1to2 + +example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" + + +def test_observable_table_round_trip(): + file = example_dir_fujita / "Fujita_observables.tsv" + observables = ObservableTable.from_tsv(file) + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = Path(tmp_dir) / "observables.tsv" + observables.rel_path = tmp_file + observables.to_tsv() + observables2 = ObservableTable.from_tsv(tmp_file) + assert observables == observables2 + + +@pytest.mark.filterwarnings( + "ignore:.*Parameter scales are not supported in PEtab v2.*:UserWarning" +) +def test_condition_table_round_trip(): + with tempfile.TemporaryDirectory() as tmp_dir: + petab1to2(example_dir_fujita / "Fujita.yaml", tmp_dir) + file = Path(tmp_dir, "Fujita_experimentalCondition.tsv") + conditions = ConditionTable.from_tsv(file) + tmp_file = Path(tmp_dir) / "conditions.tsv" + conditions.rel_path = tmp_file + conditions.to_tsv() + conditions2 = ConditionTable.from_tsv(tmp_file) + assert conditions == conditions2 + + +@pytest.mark.filterwarnings( + "ignore:.*Parameter scales are not supported in PEtab v2.*:UserWarning" +) +def test_assert_valid(): + problem = petab1to2(example_dir_fujita / "Fujita.yaml") + problem.assert_valid() + problem.observable_tables[0] = ObservableTable() + with pytest.raises( + AssertionError, match="not defined in the observable table" + ): + problem.assert_valid() + + +def test_experiment_add_periods(): + """Test operators for Experiment""" + exp = Experiment(id="exp1") + assert exp.periods == [] + + p1 = ExperimentPeriod(time=0, condition_ids=["p1"]) + p2 = ExperimentPeriod(time=1, condition_ids=["p2"]) + p3 = ExperimentPeriod(time=2, condition_ids=["p3"]) + exp += p1 + exp += p2 + + assert exp.periods == [p1, p2] + + exp2 = exp + p3 + assert exp2.periods == [p1, p2, p3] + assert exp.periods == [p1, p2] + + +def test_condition_table_add_changes(): + condition_table = ConditionTable() + assert condition_table.conditions == [] + + c1 = Condition( + id="condition1", + changes=[Change(target_id="k1", target_value=1)], + ) + c2 = Condition( + id="condition2", + changes=[Change(target_id="k2", target_value=sp.sympify("2 * x"))], + ) + + condition_table += c1 + condition_table += c2 + + assert condition_table.conditions == [c1, c2] + + +def test_measurments(): + Measurement( + observable_id="obs1", time=1, experiment_id="exp1", measurement=1 + ) + Measurement( + observable_id="obs1", time="1", experiment_id="exp1", measurement="1" + ) + Measurement( + observable_id="obs1", time="inf", experiment_id="exp1", measurement="1" + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[1], + noise_parameters=[2], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[sp.sympify("x ** y")], + noise_parameters=[sp.sympify("x ** y")], + ) + + assert ( + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + non_petab=1, + ).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got -inf"): + Measurement( + observable_id="obs1", + time="-inf", + experiment_id="exp1", + measurement=1, + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="1_obs", time=1, experiment_id="exp1", measurement=1 + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="obs", time=1, experiment_id=" exp1", measurement=1 + ) + + +def test_observable(): + Observable(id="obs1", formula=x + y) + Observable(id="obs1", formula="x + y", noise_formula="x + y") + Observable(id="obs1", formula=1, noise_formula=2) + Observable( + id="obs1", + formula="x + y", + noise_formula="x + y", + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + Observable( + id="obs1", + formula=sp.sympify("x + y"), + noise_formula=sp.sympify("x + y"), + observable_parameters=[sp.Symbol("p1")], + noise_parameters=[sp.Symbol("n1")], + ) + assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1 + + o = Observable(id="obs1", formula=x + y) + assert o.observable_placeholders == [] + assert o.noise_placeholders == [] + + o = Observable( + id="obs1", + formula="observableParameter1_obs1", + noise_formula="noiseParameter1_obs1", + observable_placeholders="observableParameter1_obs1", + noise_placeholders="noiseParameter1_obs1", + ) + assert o.observable_placeholders == [ + sp.Symbol("observableParameter1_obs1", real=True), + ] + assert o.noise_placeholders == [ + sp.Symbol("noiseParameter1_obs1", real=True) + ] + + +def test_change(): + Change(target_id="k1", target_value=1) + Change(target_id="k1", target_value="x * y") + + assert ( + Change(target_id="k1", target_value=x * y, non_petab="foo").non_petab + == "foo" + ) + with pytest.raises(ValidationError, match="Invalid ID"): + Change(target_id="1_k", target_value=x) + + with pytest.raises(ValidationError, match="input_value=None"): + Change(target_id="k1", target_value=None) + + +def test_period(): + ExperimentPeriod(time=0) + ExperimentPeriod(time=1, condition_ids=["p1"]) + ExperimentPeriod(time="-inf", condition_ids=["p1"]) + + assert ( + ExperimentPeriod(time="1", condition_id="p1", non_petab=1).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got inf"): + ExperimentPeriod(time="inf", condition_ids=["p1"]) + + with pytest.raises(ValidationError, match="Invalid conditionId"): + ExperimentPeriod(time=1, condition_ids=["1_condition"]) + + with pytest.raises(ValidationError, match="type=missing"): + ExperimentPeriod(condition_ids=["condition"]) + + +def test_parameter(): + Parameter(id="k1", lb=1, ub=2) + Parameter(id="k1", estimate=False, nominal_value=1) + + assert Parameter(id="k1", lb=1, ub=2, non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Invalid ID"): + Parameter(id="1_k", lb=1, ub=2) + + with pytest.raises(ValidationError, match="upper"): + Parameter(id="k1", lb=1) + + with pytest.raises(ValidationError, match="lower"): + Parameter(id="k1", ub=1) + + with pytest.raises(ValidationError, match="less than"): + Parameter(id="k1", lb=2, ub=1) + + assert Parameter( + id="k1", estimate=True, lb=1, ub=2, prior_parameters=[1, 2] + ).model_dump() == { + "id": "k1", + "lb": 1.0, + "ub": 2.0, + "nominal_value": None, + "estimate": "true", + "prior_distribution": "", + "prior_parameters": "1.0;2.0", + } + assert Parameter( + id="k1", estimate=False, nominal_value="8" + ).model_dump() == { + "id": "k1", + "lb": None, + "ub": None, + "nominal_value": 8.0, + "estimate": "false", + "prior_distribution": "", + "prior_parameters": "", + } + + +def test_experiment(): + Experiment(id="experiment1") + + # extra fields allowed + assert Experiment(id="experiment1", non_petab=1).non_petab == 1 + + # ID required + with pytest.raises(ValidationError, match="Field required"): + Experiment() + + # valid ID required + with pytest.raises(ValidationError, match="Invalid ID"): + Experiment(id="experiment 1") + + periods = [ + ExperimentPeriod(time=C.TIME_PREEQUILIBRATION, condition_ids=["c1"]), + ExperimentPeriod(time=-1, condition_id="c1"), + ExperimentPeriod(time=1, condition_id="c1"), + ] + e = Experiment(id="experiment1", periods=list(reversed(periods))) + + assert e.has_preequilibration is True + + assert e.sorted_periods == periods + assert e.periods != periods + + e.sort_periods() + assert e.periods == periods + + e.periods.pop(0) + assert e.has_preequilibration is False + + +def test_condition_table(): + assert ConditionTable().free_symbols == set() + + assert ( + ConditionTable( + [ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value="true")], + ) + ] + ).free_symbols + == set() + ) + + assert ConditionTable( + [ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value=x / y)], + ) + ] + ).free_symbols == {x, y} + + +def test_load_remote(): + """Test loading remote files""" + from jsonschema.exceptions import ValidationError + + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" + ) + + try: + petab_problem = Problem.from_yaml(yaml_url) + + assert ( + petab_problem.measurement_df is not None + and not petab_problem.measurement_df.empty + ) + + assert petab_problem.validate() == [] + except ValidationError: + # FIXME: Until v2 is finalized, the format of the tests will often be + # out of sync with the schema. + # Ignore validation errors for now. + pass + + +def test_auto_upgrade(): + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + ) + problem = Problem.from_yaml(yaml_url) + # TODO check something specifically different in a v2 problem + assert isinstance(problem, Problem) + + +def test_problem_from_yaml_multiple_files(): + """Test loading PEtab version 2 yaml with multiple condition / measurement + / observable files + """ + yaml_config = """ + format_version: 2.0.0 + model_files: + model1: + location: model1.xml + language: sbml + model2: + location: model2.xml + language: sbml + parameter_files: [parameters1.tsv, parameters2.tsv] + condition_files: [conditions1.tsv, conditions2.tsv] + measurement_files: [measurements1.tsv, measurements2.tsv] + observable_files: [observables1.tsv, observables2.tsv] + experiment_files: [experiments1.tsv, experiments2.tsv] + """ + with tempfile.TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir, "problem.yaml") + with open(yaml_path, "w") as f: + f.write(yaml_config) + + for i in (1, 2): + SbmlModel.from_antimony("a = 1;").to_file( + Path(tmpdir, f"model{i}.xml") + ) + + problem = Problem() + problem.add_condition(f"condition{i}", parameter1=i) + petab.write_condition_df( + problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") + ) + + problem.add_experiment(f"experiment{i}", 0, f"condition{i}") + petab.write_experiment_df( + problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") + ) + + problem.add_measurement( + f"observable{i}", + experiment_id=f"experiment{i}", + time=1, + measurement=1, + ) + petab.write_measurement_df( + problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") + ) + + problem.add_observable(f"observable{i}", 1, 1) + petab.write_observable_df( + problem.observable_df, Path(tmpdir, f"observables{i}.tsv") + ) + problem.add_parameter(f"parameter{i}", False, nominal_value=i) + petab.write_parameter_df( + problem.parameter_df, Path(tmpdir, f"parameters{i}.tsv") + ) + + petab_problem1 = petab.Problem.from_yaml(yaml_path) + + # test that we can load the problem from a dict with a custom base path + yaml_config = petab.load_yaml(yaml_path) + petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) + + # test that we can save the problem to a new directory + with tempfile.TemporaryDirectory() as tmpdir2: + petab_problem1.to_files(tmpdir2) + # check the same files are created + assert { + file.relative_to(tmpdir) for file in Path(tmpdir).iterdir() + } == { + file.relative_to(tmpdir2) for file in Path(tmpdir2).iterdir() + } + petab_problem3 = petab.Problem.from_yaml( + Path(tmpdir2, "problem.yaml") + ) + + for petab_problem in (petab_problem1, petab_problem2, petab_problem3): + assert len(petab_problem.models) == 2 + assert petab_problem.measurement_df.shape[0] == 2 + assert petab_problem.observable_df.shape[0] == 2 + assert petab_problem.condition_df.shape[0] == 2 + assert petab_problem.experiment_df.shape[0] == 2 + + +def test_modify_problem(): + """Test modifying a problem via the API.""" + problem = Problem() + problem.add_condition("condition1", parameter1=1) + problem.add_condition("condition2", parameter2=2) + + exp_condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + TARGET_ID: ["parameter1", "parameter2"], + TARGET_VALUE: [1.0, 2.0], + } + ) + assert_frame_equal( + problem.condition_df, exp_condition_df, check_dtype=False + ) + + problem.add_observable("observable1", "1") + problem.add_observable("observable2", "2", noise_formula=2.2) + + exp_observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable1", "observable2"], + OBSERVABLE_FORMULA: [1, 2], + NOISE_FORMULA: [np.nan, 2.2], + } + ).set_index([OBSERVABLE_ID]) + assert_frame_equal( + problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]].map( + lambda x: float(x) if x != "" else None + ), + exp_observable_df, + check_dtype=False, + ) + + problem.add_parameter("parameter1", True, 0, lb=1, ub=2) + problem.add_parameter("parameter2", False, 2) + + exp_parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["parameter1", "parameter2"], + ESTIMATE: ["true", "false"], + NOMINAL_VALUE: [0.0, 2.0], + LOWER_BOUND: [1.0, np.nan], + UPPER_BOUND: [2.0, np.nan], + } + ).set_index([PARAMETER_ID]) + assert_frame_equal( + problem.parameter_df[ + [ESTIMATE, NOMINAL_VALUE, LOWER_BOUND, UPPER_BOUND] + ], + exp_parameter_df, + check_dtype=False, + ) + + problem.add_mapping("new_petab_id", "1some_model_entity_id") + + exp_mapping_df = pd.DataFrame( + data={ + PETAB_ENTITY_ID: ["new_petab_id"], + MODEL_ENTITY_ID: ["1some_model_entity_id"], + NAME: [None], + } + ).set_index([PETAB_ENTITY_ID]) + assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) + + +def test_sample_startpoint_shape(): + """Test startpoint sampling.""" + problem = Problem() + problem += Parameter(id="p1", estimate=True, lb=1, ub=2) + problem += Parameter( + id="p2", + estimate=True, + lb=2, + ub=3, + prior_distribution="normal", + prior_parameters=[2.5, 0.5], + ) + problem += Parameter(id="p3", estimate=False, nominal_value=1) + + n_starts = 10 + sp = problem.sample_parameter_startpoints(n_starts=n_starts) + assert sp.shape == (n_starts, 2) + + +def test_problem_config_paths(): + """Test handling of URLS and local paths in ProblemConfig.""" + + pc = petab.ProblemConfig( + parameter_files=["https://example.com/params.tsv"], + condition_files=["conditions.tsv"], + measurement_files=["measurements.tsv"], + observable_files=["observables.tsv"], + experiment_files=["experiments.tsv"], + ) + assert isinstance(pc.parameter_files[0], AnyUrl) + assert isinstance(pc.condition_files[0], Path) + assert isinstance(pc.measurement_files[0], Path) + assert isinstance(pc.observable_files[0], Path) + assert isinstance(pc.experiment_files[0], Path) + + # Auto-convert to Path on assignment + pc.parameter_files = ["foo.tsv"] + assert isinstance(pc.parameter_files[0], Path) + + # We can't easily intercept mutations to the list: + # pc.parameter_files[0] = "foo.tsv" + # assert isinstance(pc.parameter_files[0], Path) + # see also https://github.com/pydantic/pydantic/issues/8575 + + +def test_get_changes_for_period(): + """Test getting changes for a specific period.""" + problem = Problem() + ch1 = Change(target_id="target1", target_value=1.0) + ch2 = Change(target_id="target2", target_value=2.0) + ch3 = Change(target_id="target3", target_value=3.0) + cond1 = Condition(id="condition1_1", changes=[ch1]) + cond2 = Condition(id="condition1_2", changes=[ch2]) + cond3 = Condition(id="condition2", changes=[ch3]) + problem += cond1 + problem += cond2 + problem += cond3 + + p1 = ExperimentPeriod( + id="p1", time=0, condition_ids=["condition1_1", "condition1_2"] + ) + p2 = ExperimentPeriod(id="p2", time=1, condition_ids=["condition2"]) + problem += Experiment( + id="exp1", + periods=[p1, p2], + ) + assert problem.get_changes_for_period(p1) == [ch1, ch2] + assert problem.get_changes_for_period(p2) == [ch3] + + +def test_get_measurements_for_experiment(): + """Test getting measurements for an experiment.""" + problem = Problem() + problem += Condition( + id="condition1", + changes=[Change(target_id="target1", target_value=1.0)], + ) + problem += Condition( + id="condition2", + changes=[Change(target_id="target2", target_value=2.0)], + ) + + e1 = Experiment( + id="exp1", + periods=[ + ExperimentPeriod(id="p1", time=0, condition_ids=["condition1"]), + ], + ) + e2 = Experiment( + id="exp2", + periods=[ + ExperimentPeriod(id="p2", time=1, condition_ids=["condition2"]), + ], + ) + problem += e1 + problem += e2 + + m1 = Measurement( + observable_id="observable1", + experiment_id="exp1", + time=0, + measurement=10.0, + ) + m2 = Measurement( + observable_id="observable2", + experiment_id="exp1", + time=1, + measurement=20.0, + ) + m3 = Measurement( + observable_id="observable3", + experiment_id="exp2", + time=1, + measurement=30.0, + ) + problem += m1 + problem += m2 + problem += m3 + + assert problem.get_measurements_for_experiment(e1) == [m1, m2] + assert problem.get_measurements_for_experiment(e2) == [m3] + + +def test_generate_path(): + import platform + + from petab._utils import _generate_path as gp + + assert gp("foo") == "foo" + assert gp(Path("foo")) == "foo" + assert gp("https://example.com/foo") == "https://example.com/foo" + assert gp(AnyUrl("https://example.com/foo")) == "https://example.com/foo" + + assert gp("foo", "bar") == str(Path("bar", "foo")) + assert gp(Path("foo"), "bar") == str(Path("bar", "foo")) + assert gp(Path("foo"), Path("bar")) == str(Path("bar", "foo")) + assert ( + gp("bar", AnyUrl("https://example.com/foo")) + == "https://example.com/foo/bar" + ) + assert ( + gp("bar", "https://example.com/foo") == "https://example.com/foo/bar" + ) + assert ( + gp("https://example.com/foo", "https://example.com/bar") + == "https://example.com/foo" + ) + + if platform.system() == "Windows": + assert gp(Path("foo"), "c:/bar") == "c:/bar/foo" + assert gp("c:/foo", "c:/bar") == "c:/foo" + else: + assert gp(Path("foo"), "/bar") == "/bar/foo" + assert gp("/foo", "bar") == "/foo" + + +def test_petablint_v2(tmpdir): + """Test that petablint runs on a valid v2 problem without errors.""" + problem = Problem() + problem.model = SbmlModel.from_antimony(""" + model conversion + species A, B; + A = 10; + B = 0; + k1 = 1; + k2 = 0.5; + R1: A -> B; k1 * A; + R2: B -> A; k2 * B; + end + """) + problem.add_observable("obs_A", "A", noise_formula="sd_A") + problem.add_parameter( + "k1", estimate=True, lb=1e-5, ub=1e5, nominal_value=1 + ) + problem.add_parameter( + "k2", estimate=True, lb=1e-5, ub=1e5, nominal_value=0.5 + ) + problem.add_parameter( + "sd_A", estimate=True, lb=0.01, ub=10, nominal_value=1 + ) + problem.add_measurement( + "obs_A", time=10, measurement=2.5, experiment_id="" + ) + assert problem.validate() == [] + + problem.config = ProblemConfig(filepath="problem.yaml") + problem.models[0].rel_path = "model.xml" + problem.parameter_tables[0].rel_path = "parameters.tsv" + problem.observable_tables[0].rel_path = "observables.tsv" + problem.measurement_tables[0].rel_path = "measurements.tsv" + problem.to_files(Path(tmpdir)) + + result = subprocess.run(["petablint", str(Path(tmpdir, "problem.yaml"))]) # noqa: S603,S607 + assert result.returncode == 0 + + +def test_problem_id(tmpdir): + """Test that the problem ID works as expected.""" + from jsonschema import ValidationError + + def make_yaml(id_line: str) -> str: + return f""" + format_version: 2.0.0 + {id_line} + model_files: {{}} + parameter_files: [] + observable_files: [] + condition_files: [] + measurement_files: [] + """ + + filepath = Path(tmpdir, "problem.yaml") + with open(filepath, "w") as f: + f.write(make_yaml("id: my_problem_id")) + problem = Problem.from_yaml(filepath) + assert problem.id == "my_problem_id" + + with open(filepath, "w") as f: + f.write(make_yaml("id: ")) + with pytest.raises(ValidationError): + Problem.from_yaml(filepath) + + with open(filepath, "w") as f: + f.write(make_yaml("")) + problem = Problem.from_yaml(filepath) + assert problem.id is None + + +def test_parameter_accessors(): # pylint: disable=W0621 + """ + Test the petab.Problem functions to get parameter values. + """ + petab_problem = Problem() + petab_problem += Parameter( + id="par1", lb=0, ub=100, nominal_value=7, estimate=True + ) + petab_problem += Parameter( + id="par2", lb=0.1, ub=100, nominal_value=8, estimate=True + ) + petab_problem += Parameter( + id="par3", lb=0.1, ub=200, nominal_value=9, estimate=False + ) + + assert petab_problem.x_ids == ["par1", "par2", "par3"] + assert petab_problem.x_free_ids == ["par1", "par2"] + assert petab_problem.x_fixed_ids == ["par3"] + assert petab_problem.lb == [0, 0.1, 0.1] + assert petab_problem.ub == [100, 100, 200] + assert petab_problem.x_nominal == [7, 8, 9] + assert petab_problem.x_nominal_free == [7, 8] + assert petab_problem.x_nominal_fixed == [9] + + assert ( + petab_problem.get_x_nominal_dict() + == petab_problem.get_x_nominal_dict(free=True, fixed=True) + == { + "par1": 7, + "par2": 8, + "par3": 9, + } + ) + assert petab_problem.get_x_nominal_dict(free=True, fixed=False) == { + "par1": 7, + "par2": 8, + } + assert petab_problem.get_x_nominal_dict(free=False, fixed=True) == { + "par3": 9, + } + + +def test_get_output_parameters(): + """Test Problem.get_output_parameters""" + petab_problem = Problem() + assert petab_problem.get_output_parameters() == [] + + petab_problem += Parameter(id="p1", lb=0, ub=100, estimate=True) + petab_problem.models.append(SbmlModel.from_antimony("p2 = 1")) + assert petab_problem.get_output_parameters() == [] + + petab_problem += Observable( + id="obs1", formula="p1 + p2", noise_formula="p1 * p2" + ) + assert petab_problem.get_output_parameters() == ["p1"] + + petab_problem += Observable( + id="obs1", + formula="p3 + p4", + noise_formula="p3 * p5", + ) + assert ( + petab_problem.get_output_parameters() + == petab_problem.get_output_parameters(observable=True, noise=True) + == ["p1", "p3", "p4", "p5"] + ) + assert petab_problem.get_output_parameters( + observable=True, noise=False + ) == ["p1", "p3", "p4"] + assert petab_problem.get_output_parameters( + observable=False, noise=True + ) == ["p1", "p3", "p5"] + + +def test_mapping_validation(): + """Test that invalid mapping entries raise errors.""" + + # alias invalid model entity ID + Mapping( + petab_id="valid_id", + model_id=" 1_invalid", + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + # invalid petab entity ID + Mapping( + petab_id="1_invalid", + model_id="valid_id", + ) + + with pytest.raises(ValidationError, match="Aliasing.*not allowed"): + # unnecessary aliasing is forbidden + Mapping( + petab_id="forbidden_alias_of_valid_id", + model_id="valid_id", + ) + + # missing model_id is valid (annotation-only entry) + Mapping(petab_id="valid_id", name="some name") + + # identity mapping is valid + Mapping(petab_id="valid_id", model_id="valid_id", name="some name") + + +def test_objective_type(): + """Test that MAP and ML problems are recognized correctly.""" + problem = Problem() + problem += Parameter(id="par1", lb=0, ub=100, estimate=True) + assert problem.has_ml_objective is True + assert problem.has_map_objective is False + + problem += Parameter( + id="par2", + lb=0, + ub=100, + estimate=True, + prior_distribution="normal", + prior_parameters=[50, 10], + ) + assert problem.has_map_objective is True + assert problem.has_ml_objective is False diff --git a/tests/v2/test_experiments.py b/tests/v2/test_experiments.py new file mode 100644 index 00000000..205f200d --- /dev/null +++ b/tests/v2/test_experiments.py @@ -0,0 +1,31 @@ +"""Tests related to ``petab.v2.experiments``.""" + +from tempfile import TemporaryDirectory + +import pandas as pd + +from petab.v2.C import CONDITION_ID, EXPERIMENT_ID, TIME +from petab.v2.experiments import get_experiment_df, write_experiment_df + + +def test_experiment_df_io(): + # Test None + assert get_experiment_df(None) is None + + # Test DataFrame + df = pd.DataFrame( + { + EXPERIMENT_ID: ["e1", "e2"], + CONDITION_ID: ["c1", "c2"], + TIME: [0, 1], + } + ) + df = get_experiment_df(df) + assert df.shape == (2, 3) + + # Test writing to file and round trip + with TemporaryDirectory() as tmpdir: + tmpfile = f"{tmpdir}/experiment.csv" + write_experiment_df(df, tmpfile) + df2 = get_experiment_df(tmpfile) + assert df.equals(df2) diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py new file mode 100644 index 00000000..7eb6dc91 --- /dev/null +++ b/tests/v2/test_lint.py @@ -0,0 +1,109 @@ +"""Test related to ``petab.v2.lint``.""" + +from copy import deepcopy + +from petab.v2 import Problem +from petab.v2.lint import * +from petab.v2.models.sbml_model import SbmlModel + + +def test_check_experiments(): + """Test ``CheckExperimentTable``.""" + problem = Problem() + + check = CheckExperimentTable() + assert check.run(problem) is None + + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_experiment("e2", "-inf", "c1", 1, "c2") + assert check.run(problem) is None + + tmp_problem = deepcopy(problem) + tmp_problem["e1"].periods[0].time = tmp_problem["e1"].periods[1].time + assert check.run(tmp_problem) is not None + + +def test_check_incompatible_targets(): + """Multiple conditions with overlapping targets cannot be applied + at the same time.""" + problem = Problem() + problem.model = SbmlModel.from_antimony("p1 = 1; p2 = 2") + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_condition("c1", p1="1") + problem.add_condition("c2", p1="2", p2="2") + check = CheckValidConditionTargets() + assert check.run(problem) is None + + problem["e1"].periods[0].condition_ids.append("c2") + assert (error := check.run(problem)) is not None + assert "overlapping targets {'p1'}" in error.message + + +def test_invalid_model_id_in_measurements(): + """Test that measurements with an invalid model ID are caught.""" + problem = Problem() + problem.models.append(SbmlModel.from_antimony("p1 = 1", model_id="model1")) + problem.add_observable("obs1", "A") + problem.add_measurement("obs1", experiment_id="e1", time=0, measurement=1) + + check = CheckMeasurementModelId() + + # Single model -> model ID is optional + assert (error := check.run(problem)) is None, error + + # Two models -> model ID must be set + problem.models.append(SbmlModel.from_antimony("p2 = 2", model_id="model2")) + assert (error := check.run(problem)) is not None + assert "multiple models" in error.message + + # Set model ID to a non-existing model ID + problem.measurements[0].model_id = "invalid_model_id" + assert (error := check.run(problem)) is not None + assert "does not match" in error.message + + # Use a valid model ID + problem.measurements[0].model_id = "model1" + assert (error := check.run(problem)) is None, error + + +def test_undefined_experiment_id_in_measurements(): + """Test that measurements with an undefined experiment ID are caught.""" + problem = Problem() + problem.add_experiment("e1", 0, "c1") + problem.add_observable("obs1", "A") + problem.add_measurement("obs1", experiment_id="e1", time=0, measurement=1) + + check = CheckUndefinedExperiments() + + # Valid experiment ID + assert (error := check.run(problem)) is None, error + + # Invalid experiment ID + problem.measurements[0].experiment_id = "invalid_experiment_id" + assert (error := check.run(problem)) is not None + assert "not defined" in error.message + + +def test_validate_initial_change_symbols(): + """Test validation of symbols in target value expressions for changes + applied at the start of an experiment.""" + problem = Problem() + problem.model = SbmlModel.from_antimony("p1 = 1; p2 = 2") + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_condition("c1", p1="p2 + time") + problem.add_condition("c2", p1="p2", p2="p1") + problem.add_parameter("p1", nominal_value=1, estimate=False) + problem.add_parameter("p2", nominal_value=2, estimate=False) + + check = CheckInitialChangeSymbols() + assert check.run(problem) is None + + # removing `p1` from the parameter table is okay, as `c2` is never + # used at the start of an experiment + problem.parameter_tables[0].parameters.remove(problem["p1"]) + assert check.run(problem) is None + + # removing `p2` is not okay, as it is used at the start of an experiment + problem.parameter_tables[0].parameters.remove(problem["p2"]) + assert (error := check.run(problem)) is not None + assert "contains additional symbols: {'p2'}" in error.message diff --git a/tests/v1/test_mapping.py b/tests/v2/test_mapping.py similarity index 83% rename from tests/v1/test_mapping.py rename to tests/v2/test_mapping.py index 4eaaaeb2..e60e9082 100644 --- a/tests/v1/test_mapping.py +++ b/tests/v2/test_mapping.py @@ -1,11 +1,12 @@ -"""Tests related to petab.mapping""" +"""Tests related to petab.v2.mapping""" + import tempfile import pandas as pd import pytest -from petab.C import * # noqa: F403 -from petab.mapping import * +from petab.v2 import get_mapping_df, write_mapping_df +from petab.v2.C import * # noqa: F403 def test_get_mapping_df(): diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py deleted file mode 100644 index 334dc86a..00000000 --- a/tests/v2/test_problem.py +++ /dev/null @@ -1,27 +0,0 @@ -from petab.v2 import Problem - - -def test_load_remote(): - """Test loading remote files""" - yaml_url = ( - "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/main/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml" - ) - petab_problem = Problem.from_yaml(yaml_url) - - assert ( - petab_problem.measurement_df is not None - and not petab_problem.measurement_df.empty - ) - - assert petab_problem.validate() == [] - - -def test_auto_upgrade(): - yaml_url = ( - "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" - ) - problem = Problem.from_yaml(yaml_url) - # TODO check something specifically different in a v2 problem - assert isinstance(problem, Problem) diff --git a/tox.ini b/tox.ini index d57aa91d..3f3bbe46 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = quality,unit +envlist = quality,unit,doc isolated_build = True [testenv] @@ -19,7 +19,20 @@ deps= commands = python -m pip install sympy>=1.12.1 - python -m pytest --cov=petab --cov-report=xml --cov-append \ + python -m pytest --cov=petab --cov-report=xml --cov-append --durations=10 \ tests description = Basic tests + +[testenv:doc] +description = Build the documentation +extras = doc,vis +deps= + # workaround for m2r2 issue with py3.13: No module named 'pkg_resources' + # see also: https://github.com/CrossNox/m2r2/issues/72 + setuptools +allowlist_externals = rm +commands = + rm -rf {tox_root}/doc/build + sphinx-build -W -b html . build/html +changedir = {tox_root}/doc