From a097ee964cdc2b0f6497bb4aca2ea368c9e1bc48 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 19 Nov 2024 09:05:31 +0100 Subject: [PATCH 001/141] Bump codecov/codecov-action from 4 to 5 (#321) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4...v5) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 82cf5507..4786d9dd 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -51,7 +51,7 @@ jobs: run: tox -e unit - name: Coverage - uses: codecov/codecov-action@v4 + uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} file: ./coverage.xml From 42602aa82cba71590abe16de40ffd3e8b981d9ff Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Fabian=20Fr=C3=B6hlich?= Date: Mon, 2 Dec 2024 09:49:45 +0000 Subject: [PATCH 002/141] Fix error message for missing cols in mapping df (#323) --- petab/v1/mapping.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py index 80c71c68..6eade50e 100644 --- a/petab/v1/mapping.py +++ b/petab/v1/mapping.py @@ -43,9 +43,7 @@ def get_mapping_df( for col in MAPPING_DF_REQUIRED_COLS: if col not in mapping_file.columns: - raise KeyError( - f"Mapping table missing mandatory field {PETAB_ENTITY_ID}." - ) + raise KeyError(f"Mapping table missing mandatory field {col}.") lint.assert_no_leading_trailing_whitespace( mapping_file.reset_index()[col].values, col From 0b77d7fb48ef36c579f9748d7df12365c68a1e24 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 2 Dec 2024 16:56:32 +0100 Subject: [PATCH 003/141] Bump codecov/codecov-action from 4 to 5 (#322) * Bump codecov/codecov-action from 4 to 5 Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 4 to 5. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v4...v5) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] * Update ci_tests.yml --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Daniel Weindl --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 4786d9dd..620d12cd 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -54,5 +54,5 @@ jobs: uses: codecov/codecov-action@v5 with: token: ${{ secrets.CODECOV_TOKEN }} - file: ./coverage.xml + files: ./coverage.xml if: matrix.platform == 'ubuntu-latest' From 9a4efb46f91f0af06f9e857ab1656f103281fbbf Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 3 Dec 2024 22:27:50 +0100 Subject: [PATCH 004/141] Enable passing the base path to Problem.from_yaml (#327) When passing the problem configuration as `dict` to `Problem.from_yaml`, one should be able to specify the base path for resolving relative paths. See #324. Closes #324 --- petab/v1/problem.py | 15 ++++++-- petab/v2/problem.py | 19 +++++++--- tests/v1/test_petab.py | 13 +++++-- tests/v2/test_problem.py | 80 ++++++++++++++++++++++++++++++++++++++++ 4 files changed, 113 insertions(+), 14 deletions(-) diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 4a5577eb..6145656f 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -251,21 +251,28 @@ def from_files( ) @staticmethod - def from_yaml(yaml_config: dict | Path | str) -> Problem: + def from_yaml( + yaml_config: dict | Path | str, base_path: str | Path = None + ) -> Problem: """ Factory method to load model and tables as specified by YAML file. Arguments: yaml_config: PEtab configuration as dictionary or YAML file name + base_path: Base directory or URL to resolve relative paths """ if isinstance(yaml_config, Path): yaml_config = str(yaml_config) - get_path = lambda filename: filename # noqa: E731 if isinstance(yaml_config, str): - path_prefix = get_path_prefix(yaml_config) + if base_path is None: + base_path = get_path_prefix(yaml_config) yaml_config = yaml.load_yaml(yaml_config) - get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 + + def get_path(filename): + if base_path is None: + return filename + return f"{base_path}/{filename}" if yaml.is_composite_problem(yaml_config): raise ValueError( diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 612f2571..4c36d791 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -117,24 +117,31 @@ def __str__(self): ) @staticmethod - def from_yaml(yaml_config: dict | Path | str) -> Problem: + def from_yaml( + yaml_config: dict | Path | str, base_path: str | Path = None + ) -> Problem: """ Factory method to load model and tables as specified by YAML file. Arguments: yaml_config: PEtab configuration as dictionary or YAML file name + base_path: Base directory or URL to resolve relative paths """ if isinstance(yaml_config, Path): yaml_config = str(yaml_config) if isinstance(yaml_config, str): yaml_file = yaml_config - path_prefix = get_path_prefix(yaml_file) - yaml_config = yaml.load_yaml(yaml_config) - get_path = lambda filename: f"{path_prefix}/{filename}" # noqa: E731 + if base_path is None: + base_path = get_path_prefix(yaml_file) + yaml_config = yaml.load_yaml(yaml_file) else: yaml_file = None - get_path = lambda filename: filename # noqa: E731 + + def get_path(filename): + if base_path is None: + return filename + return f"{base_path}/{filename}" if yaml_config[FORMAT_VERSION] not in {"2.0.0"}: # If we got a path to a v1 yaml file, try to auto-upgrade @@ -186,7 +193,7 @@ def from_yaml(yaml_config: dict | Path | str) -> Problem: else None ) - if len(problem0[MODEL_FILES]) > 1: + if len(problem0[MODEL_FILES] or []) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index 65700af5..1a3f3344 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -862,11 +862,16 @@ def test_problem_from_yaml_v1_multiple_files(): observables_df, Path(tmpdir, f"observables{i}.tsv") ) - petab_problem = petab.Problem.from_yaml(yaml_path) + petab_problem1 = petab.Problem.from_yaml(yaml_path) - assert petab_problem.measurement_df.shape[0] == 2 - assert petab_problem.observable_df.shape[0] == 2 - assert petab_problem.condition_df.shape[0] == 2 + # test that we can load the problem from a dict with a custom base path + yaml_config = petab.v1.load_yaml(yaml_path) + petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) + + for petab_problem in (petab_problem1, petab_problem2): + assert petab_problem.measurement_df.shape[0] == 2 + assert petab_problem.observable_df.shape[0] == 2 + assert petab_problem.condition_df.shape[0] == 2 def test_get_required_parameters_for_parameter_table(petab_problem): diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 334dc86a..418f7818 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -1,4 +1,19 @@ +import tempfile +from pathlib import Path + +import pandas as pd + +import petab.v2 as petab from petab.v2 import Problem +from petab.v2.C import ( + CONDITION_ID, + MEASUREMENT, + NOISE_FORMULA, + OBSERVABLE_FORMULA, + OBSERVABLE_ID, + SIMULATION_CONDITION_ID, + TIME, +) def test_load_remote(): @@ -25,3 +40,68 @@ def test_auto_upgrade(): problem = Problem.from_yaml(yaml_url) # TODO check something specifically different in a v2 problem assert isinstance(problem, Problem) + + +def test_problem_from_yaml_multiple_files(): + """Test loading PEtab version 2 yaml with multiple condition / measurement + / observable files + """ + yaml_config = """ + format_version: 2.0.0 + parameter_file: + problems: + - condition_files: [conditions1.tsv, conditions2.tsv] + measurement_files: [measurements1.tsv, measurements2.tsv] + observable_files: [observables1.tsv, observables2.tsv] + model_files: + """ + + with tempfile.TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir, "problem.yaml") + with open(yaml_path, "w") as f: + f.write(yaml_config) + + for i in (1, 2): + condition_df = pd.DataFrame( + { + CONDITION_ID: [f"condition{i}"], + } + ) + condition_df.set_index([CONDITION_ID], inplace=True) + petab.write_condition_df( + condition_df, Path(tmpdir, f"conditions{i}.tsv") + ) + + measurement_df = pd.DataFrame( + { + SIMULATION_CONDITION_ID: [f"condition{i}"], + OBSERVABLE_ID: [f"observable{i}"], + TIME: [i], + MEASUREMENT: [1], + } + ) + petab.write_measurement_df( + measurement_df, Path(tmpdir, f"measurements{i}.tsv") + ) + + observables_df = pd.DataFrame( + { + OBSERVABLE_ID: [f"observable{i}"], + OBSERVABLE_FORMULA: [1], + NOISE_FORMULA: [1], + } + ) + petab.write_observable_df( + observables_df, Path(tmpdir, f"observables{i}.tsv") + ) + + petab_problem1 = petab.Problem.from_yaml(yaml_path) + + # test that we can load the problem from a dict with a custom base path + yaml_config = petab.load_yaml(yaml_path) + petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) + + for petab_problem in (petab_problem1, petab_problem2): + assert petab_problem.measurement_df.shape[0] == 2 + assert petab_problem.observable_df.shape[0] == 2 + assert petab_problem.condition_df.shape[0] == 2 From d3e40064058ba47ecbb564ab1bec3d01bb3435b9 Mon Sep 17 00:00:00 2001 From: Frank Bergmann Date: Thu, 5 Dec 2024 15:43:24 +0100 Subject: [PATCH 005/141] Sbml model enhancements (#333) * - allow to initialize from sbml str * - disambiguate between warning and errors --- petab/v1/models/sbml_model.py | 16 ++++++++++++++++ petab/v1/sbml.py | 16 +++++++++++----- 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index fd57f2dc..5102715d 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -81,6 +81,22 @@ def from_file(filepath_or_buffer, model_id: str = None): model_id=model_id, ) + @staticmethod + def from_string(sbml_string, model_id: str = None): + sbml_reader, sbml_document, sbml_model = load_sbml_from_string( + sbml_string + ) + + if not model_id: + model_id = sbml_model.getIdAttribute() + + return SbmlModel( + sbml_model=sbml_model, + sbml_reader=sbml_reader, + sbml_document=sbml_document, + model_id=model_id, + ) + @property def model_id(self): return self._model_id diff --git a/petab/v1/sbml.py b/petab/v1/sbml.py index 0a8fd20f..6395e41b 100644 --- a/petab/v1/sbml.py +++ b/petab/v1/sbml.py @@ -43,12 +43,18 @@ def is_sbml_consistent( libsbml.LIBSBML_CAT_UNITS_CONSISTENCY, False ) - has_problems = sbml_document.checkConsistency() - if has_problems: + has_issues = sbml_document.checkConsistency() + + # we only have an issue with errors or fatals + has_problems = sbml_document.getNumErrors( + libsbml.LIBSBML_SEV_ERROR + ) + sbml_document.getNumErrors(libsbml.LIBSBML_SEV_FATAL) + if has_issues: log_sbml_errors(sbml_document) - logger.warning( - "WARNING: Generated invalid SBML model. Check messages above." - ) + if has_problems: + logger.warning( + "WARNING: Generated invalid SBML model. Check messages above." + ) return not has_problems From 9baf981340f85536cdbd9768e5125d0c6a7325d7 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 14:31:27 +0100 Subject: [PATCH 006/141] Functions for adding conditions/observables/parameter to Problem (#328) Add functions for adding individual conditions/observables/parameter/measurements to Problem. This will simplify writing test cases and interactively assembling petab problems. `petab.v2.Problem.add_*` will be added / updated to the new format separately. Related to #220. --- petab/v1/mapping.py | 1 + petab/v1/problem.py | 181 +++++++++++++++++++++++++++- petab/v2/petab1to2.py | 21 +++- petab/v2/problem.py | 198 +++++++++++++++++++++++++++++++ tests/v1/test_petab.py | 105 ++++++++-------- tests/{v1 => v2}/test_mapping.py | 6 +- tests/v2/test_problem.py | 102 +++++++++++----- 7 files changed, 518 insertions(+), 96 deletions(-) rename tests/{v1 => v2}/test_mapping.py (83%) diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py index 6eade50e..bae9d5fb 100644 --- a/petab/v1/mapping.py +++ b/petab/v1/mapping.py @@ -1,4 +1,5 @@ """Functionality related to the PEtab entity mapping table""" +# TODO: Move to petab.v2.mapping from pathlib import Path import pandas as pd diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 6145656f..f4951ce6 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -3,8 +3,9 @@ import os import tempfile -from collections.abc import Iterable +from collections.abc import Iterable, Sequence from math import nan +from numbers import Number from pathlib import Path, PurePosixPath from typing import TYPE_CHECKING from warnings import warn @@ -1005,3 +1006,181 @@ def n_priors(self) -> int: return 0 return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum() + + def add_condition(self, id_: str, name: str = None, **kwargs): + """Add a simulation condition to the problem. + + Arguments: + id_: The condition id + name: The condition name + kwargs: Parameter, value pairs to add to the condition table. + """ + record = {CONDITION_ID: [id_], **kwargs} + if name is not None: + record[CONDITION_NAME] = name + tmp_df = pd.DataFrame(record).set_index([CONDITION_ID]) + self.condition_df = ( + pd.concat([self.condition_df, tmp_df]) + if self.condition_df is not None + else tmp_df + ) + + def add_observable( + self, + id_: str, + formula: str | float | int, + noise_formula: str | float | int = None, + noise_distribution: str = None, + transform: str = None, + name: str = None, + **kwargs, + ): + """Add an observable to the problem. + + Arguments: + id_: The observable id + formula: The observable formula + noise_formula: The noise formula + noise_distribution: The noise distribution + transform: The observable transformation + name: The observable name + kwargs: additional columns/values to add to the observable table + + """ + record = { + OBSERVABLE_ID: [id_], + OBSERVABLE_FORMULA: [formula], + } + if name is not None: + record[OBSERVABLE_NAME] = [name] + if noise_formula is not None: + record[NOISE_FORMULA] = [noise_formula] + if noise_distribution is not None: + record[NOISE_DISTRIBUTION] = [noise_distribution] + if transform is not None: + record[OBSERVABLE_TRANSFORMATION] = [transform] + record.update(kwargs) + + tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID]) + self.observable_df = ( + pd.concat([self.observable_df, tmp_df]) + if self.observable_df is not None + else tmp_df + ) + + def add_parameter( + self, + id_: str, + estimated: bool | str | int = True, + nominal_value=None, + scale: str = None, + lb: Number = None, + ub: Number = None, + init_prior_type: str = None, + init_prior_pars: str | Sequence = None, + obj_prior_type: str = None, + obj_prior_pars: str | Sequence = None, + **kwargs, + ): + """Add a parameter to the problem. + + Arguments: + id_: The parameter id + estimated: Whether the parameter is estimated + nominal_value: The nominal value of the parameter + scale: The parameter scale + lb: The lower bound of the parameter + ub: The upper bound of the parameter + init_prior_type: The type of the initialization prior distribution + init_prior_pars: The parameters of the initialization prior + distribution + obj_prior_type: The type of the objective prior distribution + obj_prior_pars: The parameters of the objective prior distribution + kwargs: additional columns/values to add to the parameter table + """ + record = { + PARAMETER_ID: [id_], + } + if estimated is not None: + record[ESTIMATE] = [ + int(estimated) + if isinstance(estimated, bool | int) + else estimated + ] + if nominal_value is not None: + record[NOMINAL_VALUE] = [nominal_value] + if scale is not None: + record[PARAMETER_SCALE] = [scale] + if lb is not None: + record[LOWER_BOUND] = [lb] + if ub is not None: + record[UPPER_BOUND] = [ub] + if init_prior_type is not None: + record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type] + if init_prior_pars is not None: + if not isinstance(init_prior_pars, str): + init_prior_pars = PARAMETER_SEPARATOR.join( + map(str, init_prior_pars) + ) + record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars] + if obj_prior_type is not None: + record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type] + if obj_prior_pars is not None: + if not isinstance(obj_prior_pars, str): + obj_prior_pars = PARAMETER_SEPARATOR.join( + map(str, obj_prior_pars) + ) + record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars] + record.update(kwargs) + + tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID]) + self.parameter_df = ( + pd.concat([self.parameter_df, tmp_df]) + if self.parameter_df is not None + else tmp_df + ) + + def add_measurement( + self, + obs_id: str, + sim_cond_id: str, + time: float, + measurement: float, + observable_parameters: Sequence[str] = None, + noise_parameters: Sequence[str] = None, + preeq_cond_id: str = None, + ): + """Add a measurement to the problem. + + Arguments: + obs_id: The observable ID + sim_cond_id: The simulation condition ID + time: The measurement time + measurement: The measurement value + observable_parameters: The observable parameters + noise_parameters: The noise parameters + preeq_cond_id: The pre-equilibration condition ID + """ + record = { + OBSERVABLE_ID: [obs_id], + SIMULATION_CONDITION_ID: [sim_cond_id], + TIME: [time], + MEASUREMENT: [measurement], + } + if observable_parameters is not None: + record[OBSERVABLE_PARAMETERS] = [ + PARAMETER_SEPARATOR.join(observable_parameters) + ] + if noise_parameters is not None: + record[NOISE_PARAMETERS] = [ + PARAMETER_SEPARATOR.join(noise_parameters) + ] + if preeq_cond_id is not None: + record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id] + + tmp_df = pd.DataFrame(record) + self.measurement_df = ( + pd.concat([self.measurement_df, tmp_df]) + if self.measurement_df is not None + else tmp_df + ) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 86cbe49c..866414c3 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -2,6 +2,7 @@ import shutil from itertools import chain from pathlib import Path +from urllib.parse import urlparse from pandas.io.common import get_handle, is_url @@ -76,7 +77,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): # condition tables, observable tables, SBML files, parameter table: # no changes - just copy file = yaml_config[C.PARAMETER_FILE] - _copy_file(get_src_path(file), get_dest_path(file)) + _copy_file(get_src_path(file), Path(get_dest_path(file))) for problem_config in yaml_config[C.PROBLEMS]: for file in chain( @@ -89,7 +90,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): problem_config.get(C.MEASUREMENT_FILES, []), problem_config.get(C.VISUALIZATION_FILES, []), ): - _copy_file(get_src_path(file), get_dest_path(file)) + _copy_file(get_src_path(file), Path(get_dest_path(file))) # TODO: Measurements: preequilibration to experiments/timecourses once # finalized @@ -131,10 +132,14 @@ def _update_yaml(yaml_config: dict) -> dict: return yaml_config -def _copy_file(src: Path | str, dest: Path | str): +def _copy_file(src: Path | str, dest: Path): """Copy file.""" - src = str(src) - dest = str(dest) + # src might be a URL - convert to Path if local + src_url = urlparse(src) + if not src_url.scheme: + src = Path(src) + elif src_url.scheme == "file" and not src_url.netloc: + src = Path(src.removeprefix("file:/")) if is_url(src): with get_handle(src, mode="r") as src_handle: @@ -142,4 +147,8 @@ def _copy_file(src: Path | str, dest: Path | str): dest_handle.write(src_handle.handle.read()) return - shutil.copy(str(src), str(dest)) + try: + if dest.samefile(src): + return + except FileNotFoundError: + shutil.copy(str(src), str(dest)) diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 4c36d791..87a9b6e1 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -4,7 +4,9 @@ import logging import os import tempfile +from collections.abc import Sequence from math import nan +from numbers import Number from pathlib import Path from typing import TYPE_CHECKING @@ -724,3 +726,199 @@ def validate( break return validation_results + + def add_condition(self, id_: str, name: str = None, **kwargs): + """Add a simulation condition to the problem. + + Arguments: + id_: The condition id + name: The condition name + kwargs: Parameter, value pairs to add to the condition table. + """ + record = {CONDITION_ID: [id_], **kwargs} + if name is not None: + record[CONDITION_NAME] = name + tmp_df = pd.DataFrame(record).set_index([CONDITION_ID]) + self.condition_df = ( + pd.concat([self.condition_df, tmp_df]) + if self.condition_df is not None + else tmp_df + ) + + def add_observable( + self, + id_: str, + formula: str, + noise_formula: str | float | int = None, + noise_distribution: str = None, + transform: str = None, + name: str = None, + **kwargs, + ): + """Add an observable to the problem. + + Arguments: + id_: The observable id + formula: The observable formula + noise_formula: The noise formula + noise_distribution: The noise distribution + transform: The observable transformation + name: The observable name + kwargs: additional columns/values to add to the observable table + + """ + record = { + OBSERVABLE_ID: [id_], + OBSERVABLE_FORMULA: [formula], + } + if name is not None: + record[OBSERVABLE_NAME] = [name] + if noise_formula is not None: + record[NOISE_FORMULA] = [noise_formula] + if noise_distribution is not None: + record[NOISE_DISTRIBUTION] = [noise_distribution] + if transform is not None: + record[OBSERVABLE_TRANSFORMATION] = [transform] + record.update(kwargs) + + tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID]) + self.observable_df = ( + pd.concat([self.observable_df, tmp_df]) + if self.observable_df is not None + else tmp_df + ) + + def add_parameter( + self, + id_: str, + estimated: bool | str | int = True, + nominal_value=None, + scale: str = None, + lb: Number = None, + ub: Number = None, + init_prior_type: str = None, + init_prior_pars: str | Sequence = None, + obj_prior_type: str = None, + obj_prior_pars: str | Sequence = None, + **kwargs, + ): + """Add a parameter to the problem. + + Arguments: + id_: The parameter id + estimated: Whether the parameter is estimated + nominal_value: The nominal value of the parameter + scale: The parameter scale + lb: The lower bound of the parameter + ub: The upper bound of the parameter + init_prior_type: The type of the initialization prior distribution + init_prior_pars: The parameters of the initialization prior + distribution + obj_prior_type: The type of the objective prior distribution + obj_prior_pars: The parameters of the objective prior distribution + kwargs: additional columns/values to add to the parameter table + """ + record = { + PARAMETER_ID: [id_], + } + if estimated is not None: + record[ESTIMATE] = [ + int(estimated) + if isinstance(estimated, bool | int) + else estimated + ] + if nominal_value is not None: + record[NOMINAL_VALUE] = [nominal_value] + if scale is not None: + record[PARAMETER_SCALE] = [scale] + if lb is not None: + record[LOWER_BOUND] = [lb] + if ub is not None: + record[UPPER_BOUND] = [ub] + if init_prior_type is not None: + record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type] + if init_prior_pars is not None: + if not isinstance(init_prior_pars, str): + init_prior_pars = PARAMETER_SEPARATOR.join( + map(str, init_prior_pars) + ) + record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars] + if obj_prior_type is not None: + record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type] + if obj_prior_pars is not None: + if not isinstance(obj_prior_pars, str): + obj_prior_pars = PARAMETER_SEPARATOR.join( + map(str, obj_prior_pars) + ) + record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars] + record.update(kwargs) + + tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID]) + self.parameter_df = ( + pd.concat([self.parameter_df, tmp_df]) + if self.parameter_df is not None + else tmp_df + ) + + def add_measurement( + self, + obs_id: str, + sim_cond_id: str, + time: float, + measurement: float, + observable_parameters: Sequence[str] = None, + noise_parameters: Sequence[str] = None, + preeq_cond_id: str = None, + ): + """Add a measurement to the problem. + + Arguments: + obs_id: The observable ID + sim_cond_id: The simulation condition ID + time: The measurement time + measurement: The measurement value + observable_parameters: The observable parameters + noise_parameters: The noise parameters + preeq_cond_id: The pre-equilibration condition ID + """ + record = { + OBSERVABLE_ID: [obs_id], + SIMULATION_CONDITION_ID: [sim_cond_id], + TIME: [time], + MEASUREMENT: [measurement], + } + if observable_parameters is not None: + record[OBSERVABLE_PARAMETERS] = [ + PARAMETER_SEPARATOR.join(observable_parameters) + ] + if noise_parameters is not None: + record[NOISE_PARAMETERS] = [ + PARAMETER_SEPARATOR.join(noise_parameters) + ] + if preeq_cond_id is not None: + record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id] + + tmp_df = pd.DataFrame(record) + self.measurement_df = ( + pd.concat([self.measurement_df, tmp_df]) + if self.measurement_df is not None + else tmp_df + ) + + def add_mapping(self, petab_id: str, model_id: str): + """Add a mapping table entry to the problem. + + Arguments: + petab_id: The new PEtab-compatible ID mapping to `model_id` + model_id: The ID of some entity in the model + """ + record = { + PETAB_ENTITY_ID: [petab_id], + MODEL_ENTITY_ID: [model_id], + } + tmp_df = pd.DataFrame(record).set_index([PETAB_ENTITY_ID]) + self.mapping_df = ( + pd.concat([self.mapping_df, tmp_df]) + if self.mapping_df is not None + else tmp_df + ) diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index 1a3f3344..eac237d2 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -16,6 +16,7 @@ import petab.v1 from petab.C import * from petab.models.sbml_model import SbmlModel +from petab.v1 import Problem @pytest.fixture @@ -44,55 +45,61 @@ def petab_problem(): model.addParameter("fixedParameter1", 0.0) model.addParameter("observable_1", 0.0) - measurement_df = pd.DataFrame( - data={ - OBSERVABLE_ID: ["obs1", "obs2"], - MEASUREMENT: [0.1, 0.2], - OBSERVABLE_PARAMETERS: ["", "p1;p2"], - NOISE_PARAMETERS: ["p3;p4", "p5"], - } + petab_problem = petab.Problem() + petab_problem.add_measurement( + obs_id="obs1", + sim_cond_id="condition1", + time=1.0, + measurement=0.1, + noise_parameters=["p3", "p4"], + ) + petab_problem.add_measurement( + obs_id="obs2", + sim_cond_id="condition2", + time=1.0, + measurement=0.2, + observable_parameters=["p1", "p2"], + noise_parameters=["p5"], ) - condition_df = pd.DataFrame( - data={ - CONDITION_ID: ["condition1", "condition2"], - CONDITION_NAME: ["", "Condition 2"], - "fixedParameter1": [1.0, 2.0], - } - ).set_index(CONDITION_ID) + petab_problem.add_condition("condition1", fixedParameter1=1.0) + petab_problem.add_condition( + "condition2", fixedParameter1=2.0, name="Condition 2" + ) - parameter_df = pd.DataFrame( - data={ - PARAMETER_ID: ["dynamicParameter1", "dynamicParameter2"], - PARAMETER_NAME: ["", "..."], - ESTIMATE: [1, 0], - } - ).set_index(PARAMETER_ID) + petab_problem.add_parameter("dynamicParameter1", estimate=1) + petab_problem.add_parameter("dynamicParameter2", estimate=0, name="...") - observable_df = pd.DataFrame( - data={ - OBSERVABLE_ID: ["obs1"], - OBSERVABLE_NAME: ["julius"], - OBSERVABLE_FORMULA: ["observable_1 * observableParameter1_obs1"], - NOISE_FORMULA: ["0.1 * observable_1 * observableParameter1_obs1"], - } - ).set_index(OBSERVABLE_ID) + petab_problem.add_observable( + "obs1", + formula="observable_1 * observableParameter1_obs1", + noise_formula="0.1 * observable_1 * observableParameter1_obs1", + name="julius", + ) with tempfile.TemporaryDirectory() as temp_dir: sbml_file_name = Path(temp_dir, "model.xml") libsbml.writeSBMLToFile(model.document, str(sbml_file_name)) measurement_file_name = Path(temp_dir, "measurements.tsv") - petab.write_measurement_df(measurement_df, measurement_file_name) + petab.write_measurement_df( + petab_problem.measurement_df, measurement_file_name + ) condition_file_name = Path(temp_dir, "conditions.tsv") - petab.write_condition_df(condition_df, condition_file_name) + petab.write_condition_df( + petab_problem.condition_df, condition_file_name + ) parameter_file_name = Path(temp_dir, "parameters.tsv") - petab.write_parameter_df(parameter_df, parameter_file_name) + petab.write_parameter_df( + petab_problem.parameter_df, parameter_file_name + ) observable_file_name = Path(temp_dir, "observables.tsv") - petab.write_observable_df(observable_df, observable_file_name) + petab.write_observable_df( + petab_problem.observable_df, observable_file_name + ) with pytest.deprecated_call(): petab_problem = petab.Problem.from_files( @@ -822,44 +829,26 @@ def test_problem_from_yaml_v1_multiple_files(): observable_files: [observables1.tsv, observables2.tsv] sbml_files: [] """ - with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") with open(yaml_path, "w") as f: f.write(yaml_config) for i in (1, 2): - condition_df = pd.DataFrame( - { - CONDITION_ID: [f"condition{i}"], - } - ) - condition_df.set_index([CONDITION_ID], inplace=True) + problem = Problem() + problem.add_condition(f"condition{i}") petab.write_condition_df( - condition_df, Path(tmpdir, f"conditions{i}.tsv") + problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) - measurement_df = pd.DataFrame( - { - SIMULATION_CONDITION_ID: [f"condition{i}"], - OBSERVABLE_ID: [f"observable{i}"], - TIME: [i], - MEASUREMENT: [1], - } - ) + problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1) petab.write_measurement_df( - measurement_df, Path(tmpdir, f"measurements{i}.tsv") + problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") ) - observables_df = pd.DataFrame( - { - OBSERVABLE_ID: [f"observable{i}"], - OBSERVABLE_FORMULA: [1], - NOISE_FORMULA: [1], - } - ) + problem.add_observable(f"observable{i}", 1, 1) petab.write_observable_df( - observables_df, Path(tmpdir, f"observables{i}.tsv") + problem.observable_df, Path(tmpdir, f"observables{i}.tsv") ) petab_problem1 = petab.Problem.from_yaml(yaml_path) diff --git a/tests/v1/test_mapping.py b/tests/v2/test_mapping.py similarity index 83% rename from tests/v1/test_mapping.py rename to tests/v2/test_mapping.py index 4eaaaeb2..60ba6b49 100644 --- a/tests/v1/test_mapping.py +++ b/tests/v2/test_mapping.py @@ -1,11 +1,11 @@ -"""Tests related to petab.mapping""" +"""Tests related to petab.v2.mapping""" import tempfile import pandas as pd import pytest -from petab.C import * # noqa: F403 -from petab.mapping import * +from petab.v2 import get_mapping_df, write_mapping_df +from petab.v2.C import * # noqa: F403 def test_get_mapping_df(): diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 418f7818..9d13e3df 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -1,18 +1,24 @@ import tempfile from pathlib import Path +import numpy as np import pandas as pd +from pandas.testing import assert_frame_equal import petab.v2 as petab from petab.v2 import Problem from petab.v2.C import ( CONDITION_ID, - MEASUREMENT, + ESTIMATE, + LOWER_BOUND, + MODEL_ENTITY_ID, NOISE_FORMULA, + NOMINAL_VALUE, OBSERVABLE_FORMULA, OBSERVABLE_ID, - SIMULATION_CONDITION_ID, - TIME, + PARAMETER_ID, + PETAB_ENTITY_ID, + UPPER_BOUND, ) @@ -55,44 +61,26 @@ def test_problem_from_yaml_multiple_files(): observable_files: [observables1.tsv, observables2.tsv] model_files: """ - with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") with open(yaml_path, "w") as f: f.write(yaml_config) for i in (1, 2): - condition_df = pd.DataFrame( - { - CONDITION_ID: [f"condition{i}"], - } - ) - condition_df.set_index([CONDITION_ID], inplace=True) + problem = Problem() + problem.add_condition(f"condition{i}") petab.write_condition_df( - condition_df, Path(tmpdir, f"conditions{i}.tsv") + problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) - measurement_df = pd.DataFrame( - { - SIMULATION_CONDITION_ID: [f"condition{i}"], - OBSERVABLE_ID: [f"observable{i}"], - TIME: [i], - MEASUREMENT: [1], - } - ) + problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1) petab.write_measurement_df( - measurement_df, Path(tmpdir, f"measurements{i}.tsv") + problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") ) - observables_df = pd.DataFrame( - { - OBSERVABLE_ID: [f"observable{i}"], - OBSERVABLE_FORMULA: [1], - NOISE_FORMULA: [1], - } - ) + problem.add_observable(f"observable{i}", 1, 1) petab.write_observable_df( - observables_df, Path(tmpdir, f"observables{i}.tsv") + problem.observable_df, Path(tmpdir, f"observables{i}.tsv") ) petab_problem1 = petab.Problem.from_yaml(yaml_path) @@ -105,3 +93,61 @@ def test_problem_from_yaml_multiple_files(): assert petab_problem.measurement_df.shape[0] == 2 assert petab_problem.observable_df.shape[0] == 2 assert petab_problem.condition_df.shape[0] == 2 + + +def test_modify_problem(): + """Test modifying a problem via the API.""" + problem = Problem() + problem.add_condition("condition1", parameter1=1) + problem.add_condition("condition2", parameter2=2) + + exp_condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + "parameter1": [1.0, np.nan], + "parameter2": [np.nan, 2.0], + } + ).set_index([CONDITION_ID]) + assert_frame_equal( + problem.condition_df, exp_condition_df, check_dtype=False + ) + + problem.add_observable("observable1", "1") + problem.add_observable("observable2", "2", noise_formula=2.2) + + exp_observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable1", "observable2"], + OBSERVABLE_FORMULA: ["1", "2"], + NOISE_FORMULA: [np.nan, 2.2], + } + ).set_index([OBSERVABLE_ID]) + assert_frame_equal( + problem.observable_df, exp_observable_df, check_dtype=False + ) + + problem.add_parameter("parameter1", 1, 0, lb=1, ub=2) + problem.add_parameter("parameter2", False, 2) + + exp_parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["parameter1", "parameter2"], + ESTIMATE: [1, 0], + NOMINAL_VALUE: [0.0, 2.0], + LOWER_BOUND: [1.0, np.nan], + UPPER_BOUND: [2.0, np.nan], + } + ).set_index([PARAMETER_ID]) + assert_frame_equal( + problem.parameter_df, exp_parameter_df, check_dtype=False + ) + + problem.add_mapping("new_petab_id", "some_model_entity_id") + + exp_mapping_df = pd.DataFrame( + data={ + PETAB_ENTITY_ID: ["new_petab_id"], + MODEL_ENTITY_ID: ["some_model_entity_id"], + } + ).set_index([PETAB_ENTITY_ID]) + assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) From 4be03c84dcb4a88972129582cee124f7a9b8ceb3 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 14:54:43 +0100 Subject: [PATCH 007/141] Add constants + I/O for new conditions/experiments tables (#334) Related to https://github.com/PEtab-dev/PEtab/issues/586 * constants for new yaml fields / table columns / ... * read/write experiment table * add experiments table to Problem, and populate from yaml * add first validation functions * include missing modules in API docs To be complemented by separate pull requests. --- doc/modules.rst | 3 + petab/schemas/petab_schema.v2.0.0.yaml | 5 +- petab/v2/C.py | 46 ++++++++++ petab/v2/__init__.py | 4 + petab/v2/experiments.py | 40 +++++++++ petab/v2/lint.py | 112 ++++++++++++++++++++++--- petab/v2/problem.py | 65 +++++++++++++- pytest.ini | 2 + tests/v2/test_experiments.py | 30 +++++++ tests/v2/test_lint.py | 32 +++++++ tests/v2/test_problem.py | 7 ++ 11 files changed, 330 insertions(+), 16 deletions(-) create mode 100644 petab/v2/experiments.py create mode 100644 tests/v2/test_experiments.py create mode 100644 tests/v2/test_lint.py diff --git a/doc/modules.rst b/doc/modules.rst index 8d6335c8..87a9559d 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -30,5 +30,8 @@ API Reference petab.v1.yaml petab.v2 petab.v2.C + petab.v2.experiments petab.v2.lint + petab.v2.models petab.v2.problem + petab.v2.petab1to2 diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index ddeb428a..b4d7c358 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -76,6 +76,10 @@ properties: description: List of PEtab condition files. $ref: "#/definitions/list_of_files" + experiment_files: + description: List of PEtab experiment files. + $ref: "#/definitions/list_of_files" + observable_files: description: List of PEtab observable files. $ref: "#/definitions/list_of_files" @@ -92,7 +96,6 @@ properties: - model_files - observable_files - measurement_files - - condition_files extensions: type: object diff --git a/petab/v2/C.py b/petab/v2/C.py index 11fede25..2d55355a 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -10,9 +10,14 @@ #: Observable ID column in the observable and measurement tables OBSERVABLE_ID = "observableId" +#: Experiment ID column in the measurement table +EXPERIMENT_ID = "experimentId" + +# TODO: remove #: Preequilibration condition ID column in the measurement table PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" +# TODO: remove #: Simulation condition ID column in the measurement table SIMULATION_CONDITION_ID = "simulationConditionId" @@ -40,6 +45,8 @@ #: Mandatory columns of measurement table MEASUREMENT_DF_REQUIRED_COLS = [ OBSERVABLE_ID, + # TODO: add + # EXPERIMENT_ID, SIMULATION_CONDITION_ID, MEASUREMENT, TIME, @@ -47,6 +54,7 @@ #: Optional columns of measurement table MEASUREMENT_DF_OPTIONAL_COLS = [ + # TODO: remove PREEQUILIBRATION_CONDITION_ID, OBSERVABLE_PARAMETERS, NOISE_PARAMETERS, @@ -125,9 +133,45 @@ #: Condition ID column in the condition table CONDITION_ID = "conditionId" +# TODO: removed? #: Condition name column in the condition table CONDITION_NAME = "conditionName" +#: Column in the condition table with the ID of an entity that is changed +TARGET_ID = "targetId" +#: Column in the condition table with the type of value that is changed +VALUE_TYPE = "valueType" +#: Column in the condition table with the new value of the target entity +TARGET_VALUE = "targetValue" +# value types: +VT_CONSTANT = "constant" +VT_INITIAL = "initial" +VT_RATE = "rate" +VT_ASSIGNMENT = "assignment" +VT_RELATIVE_RATE = "relativeRate" +VT_RELATIVE_ASSIGNMENT = "relativeAssignment" +VALUE_TYPES = [ + VT_CONSTANT, + VT_INITIAL, + VT_RATE, + VT_ASSIGNMENT, + VT_RELATIVE_RATE, + VT_RELATIVE_ASSIGNMENT, +] + +CONDITION_DF_COLS = [ + CONDITION_ID, + TARGET_ID, + VALUE_TYPE, + TARGET_VALUE, +] + +# EXPERIMENTS +EXPERIMENT_DF_REQUIRED_COLS = [ + EXPERIMENT_ID, + TIME, + CONDITION_ID, +] # OBSERVABLES @@ -332,6 +376,8 @@ MODEL_LANGUAGE = "language" #: Condition files key in the YAML file CONDITION_FILES = "condition_files" +#: Experiment files key in the YAML file +EXPERIMENT_FILES = "experiment_files" #: Measurement files key in the YAML file MEASUREMENT_FILES = "measurement_files" #: Observable files key in the YAML file diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 98084fa5..ca55f7d0 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -5,6 +5,10 @@ from warnings import warn from ..v1 import * # noqa: F403, F401, E402 +from .experiments import ( # noqa: F401 + get_experiment_df, + write_experiment_df, +) # import after v1 from .problem import Problem # noqa: F401 diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py new file mode 100644 index 00000000..7833fa1f --- /dev/null +++ b/petab/v2/experiments.py @@ -0,0 +1,40 @@ +"""Functions operating on the PEtab experiments table.""" +from pathlib import Path + +import pandas as pd + +__all__ = ["get_experiment_df", "write_experiment_df"] + + +def get_experiment_df( + experiments_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame | None: + """ + Read the provided observable file into a ``pandas.Dataframe``. + + Arguments: + experiments_file: Name of the file to read from or pandas.Dataframe. + + Returns: + Observable DataFrame + """ + if experiments_file is None: + return experiments_file + + if isinstance(experiments_file, str | Path): + experiments_file = pd.read_csv( + experiments_file, sep="\t", float_precision="round_trip" + ) + + return experiments_file + + +def write_experiment_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab experiments table + + Arguments: + df: PEtab experiments table + filename: Destination file name + """ + df = get_experiment_df(df) + df.to_csv(filename, sep="\t", index=False) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 87554e64..fdf6de0c 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -10,18 +10,6 @@ import numpy as np import pandas as pd -from petab.v1 import ( - assert_model_parameters_in_condition_or_parameter_table, -) -from petab.v1.C import ( - ESTIMATE, - MODEL_ENTITY_ID, - NOISE_PARAMETERS, - NOMINAL_VALUE, - OBSERVABLE_PARAMETERS, - PARAMETER_DF_REQUIRED_COLS, - PARAMETER_ID, -) from petab.v1.conditions import get_parametric_overrides from petab.v1.lint import ( _check_df, @@ -42,6 +30,10 @@ get_valid_parameters_for_parameter_table, ) from petab.v1.visualize.lint import validate_visualization_df +from petab.v2 import ( + assert_model_parameters_in_condition_or_parameter_table, +) +from petab.v2.C import * from ..v1 import ( assert_measurement_conditions_present_in_condition_table, @@ -61,10 +53,13 @@ "ValidationTask", "CheckModel", "CheckTableExists", + "CheckValidPetabIdColumn", "CheckMeasurementTable", "CheckConditionTable", "CheckObservableTable", "CheckParameterTable", + "CheckExperimentTable", + "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", "CheckVisualizationTable", @@ -214,6 +209,35 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError(f"{self.table_name} table is missing.") +class CheckValidPetabIdColumn(ValidationTask): + """A task to check that a given column contains only valid PEtab IDs.""" + + def __init__( + self, table_name: str, column_name: str, required_column: bool = True + ): + self.table_name = table_name + self.column_name = column_name + self.required_column = required_column + + def run(self, problem: Problem) -> ValidationIssue | None: + df = getattr(problem, f"{self.table_name}_df") + if df is None: + return + + if self.column_name not in df.columns: + if self.required_column: + return ValidationError( + f"Column {self.column_name} is missing in " + f"{self.table_name} table." + ) + return + + try: + check_ids(df[self.column_name].values, kind=self.column_name) + except ValueError as e: + return ValidationError(str(e)) + + class CheckMeasurementTable(ValidationTask): """A task to validate the measurement table of a PEtab problem.""" @@ -356,6 +380,66 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError(str(e)) +class CheckExperimentTable(ValidationTask): + """A task to validate the experiment table of a PEtab problem.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.experiment_df is None: + return + + df = problem.experiment_df + + try: + _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment") + except AssertionError as e: + return ValidationError(str(e)) + + # valid timepoints + invalid = [] + for time in df[TIME].values: + try: + time = float(time) + if not np.isfinite(time) and time != -np.inf: + invalid.append(time) + except ValueError: + invalid.append(time) + if invalid: + return ValidationError( + f"Invalid timepoints in experiment table: {invalid}" + ) + + +class CheckExperimentConditionsExist(ValidationTask): + """A task to validate that all conditions in the experiment table exist + in the condition table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.experiment_df is None: + return + + if ( + problem.condition_df is None + and problem.experiment_df is not None + and not problem.experiment_df.empty + ): + return ValidationError( + "Experiment table is non-empty, " + "but condition table is missing." + ) + + required_conditions = problem.experiment_df[CONDITION_ID].unique() + existing_conditions = problem.condition_df.index + + missing_conditions = set(required_conditions) - set( + existing_conditions + ) + if missing_conditions: + return ValidationError( + f"Experiment table contains conditions that are not present " + f"in the condition table: {missing_conditions}" + ) + + class CheckAllParametersPresentInParameterTable(ValidationTask): """Ensure all required parameters are contained in the parameter table with no additional ones.""" @@ -558,6 +642,10 @@ def append_overrides(overrides): CheckModel(), CheckMeasurementTable(), CheckConditionTable(), + CheckExperimentTable(), + CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), + CheckValidPetabIdColumn("experiment", CONDITION_ID), + CheckExperimentConditionsExist(), CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), CheckParameterTable(), diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 87a9b6e1..c22d74e1 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -4,6 +4,7 @@ import logging import os import tempfile +import warnings from collections.abc import Sequence from math import nan from numbers import Number @@ -23,9 +24,10 @@ sampling, yaml, ) -from ..v1.C import * # noqa: F403 from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix +from ..v2.C import * # noqa: F403 +from . import experiments if TYPE_CHECKING: from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask @@ -40,6 +42,7 @@ class Problem: - model - condition table + - experiment table - measurement table - parameter table - observables table @@ -49,6 +52,7 @@ class Problem: Parameters: condition_df: PEtab condition table + experiment_df: PEtab experiment table measurement_df: PEtab measurement table parameter_df: PEtab parameter table observable_df: PEtab observable table @@ -62,6 +66,7 @@ def __init__( self, model: Model = None, condition_df: pd.DataFrame = None, + experiment_df: pd.DataFrame = None, measurement_df: pd.DataFrame = None, parameter_df: pd.DataFrame = None, visualization_df: pd.DataFrame = None, @@ -72,6 +77,7 @@ def __init__( from ..v2.lint import default_validation_tasks self.condition_df: pd.DataFrame | None = condition_df + self.experiment_df: pd.DataFrame | None = experiment_df self.measurement_df: pd.DataFrame | None = measurement_df self.parameter_df: pd.DataFrame | None = parameter_df self.visualization_df: pd.DataFrame | None = visualization_df @@ -83,8 +89,22 @@ def __init__( ValidationTask ] = default_validation_tasks.copy() + if self.experiment_df is not None: + warnings.warn( + "The experiment table is not yet supported and " + "will be ignored.", + stacklevel=2, + ) + def __str__(self): model = f"with model ({self.model})" if self.model else "without model" + + experiments = ( + f"{self.experiment_df.shape[0]} experiments" + if self.experiment_df is not None + else "without experiments table" + ) + conditions = ( f"{self.condition_df.shape[0]} conditions" if self.condition_df is not None @@ -114,8 +134,8 @@ def __str__(self): parameters = "without parameter_df table" return ( - f"PEtab Problem {model}, {conditions}, {observables}, " - f"{measurements}, {parameters}" + f"PEtab Problem {model}, {conditions}, {experiments}, " + f"{observables}, {measurements}, {parameters}" ) @staticmethod @@ -232,6 +252,16 @@ def get_path(filename): else None ) + experiment_files = [ + get_path(f) for f in problem0.get(EXPERIMENT_FILES, []) + ] + # If there are multiple tables, we will merge them + experiment_df = ( + core.concat_tables(experiment_files, experiments.get_experiment_df) + if experiment_files + else None + ) + visualization_files = [ get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) ] @@ -262,6 +292,7 @@ def get_path(filename): return Problem( condition_df=condition_df, + experiment_df=experiment_df, measurement_df=measurement_df, parameter_df=parameter_df, observable_df=observable_df, @@ -922,3 +953,31 @@ def add_mapping(self, petab_id: str, model_id: str): if self.mapping_df is not None else tmp_df ) + + def add_experiment(self, id_: str, *args): + """Add an experiment to the problem. + + :param id_: The experiment ID. + :param args: Timepoints and associated conditions: + ``time_1, condition_id_1, time_2, condition_id_2, ...``. + """ + if len(args) % 2 != 0: + raise ValueError( + "Arguments must be pairs of timepoints and condition IDs." + ) + + records = [] + for i in range(0, len(args), 2): + records.append( + { + EXPERIMENT_ID: id_, + TIME: args[i], + CONDITION_ID: args[i + 1], + } + ) + tmp_df = pd.DataFrame(records) + self.experiment_df = ( + pd.concat([self.experiment_df, tmp_df]) + if self.experiment_df is not None + else tmp_df + ) diff --git a/pytest.ini b/pytest.ini index 11b8918a..1e9b4286 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,3 +7,5 @@ filterwarnings = ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning + # TODO: until we have proper v2 support + ignore:The experiment table is not yet supported and will be ignored:UserWarning diff --git a/tests/v2/test_experiments.py b/tests/v2/test_experiments.py new file mode 100644 index 00000000..234552f2 --- /dev/null +++ b/tests/v2/test_experiments.py @@ -0,0 +1,30 @@ +"""Tests related to ``petab.v2.experiments``.""" +from tempfile import TemporaryDirectory + +import pandas as pd + +from petab.v2.C import CONDITION_ID, EXPERIMENT_ID, TIME +from petab.v2.experiments import get_experiment_df, write_experiment_df + + +def test_experiment_df_io(): + # Test None + assert get_experiment_df(None) is None + + # Test DataFrame + df = pd.DataFrame( + { + EXPERIMENT_ID: ["e1", "e2"], + CONDITION_ID: ["c1", "c2"], + TIME: [0, 1], + } + ) + df = get_experiment_df(df) + assert df.shape == (2, 3) + + # Test writing to file and round trip + with TemporaryDirectory() as tmpdir: + tmpfile = f"{tmpdir}/experiment.csv" + write_experiment_df(df, tmpfile) + df2 = get_experiment_df(tmpfile) + assert df.equals(df2) diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py new file mode 100644 index 00000000..db0c402a --- /dev/null +++ b/tests/v2/test_lint.py @@ -0,0 +1,32 @@ +"""Test related to ``petab.v2.lint``.""" + +from copy import deepcopy + +from petab.v2 import Problem +from petab.v2.C import * +from petab.v2.lint import * + + +def test_check_experiments(): + """Test ``CheckExperimentTable``.""" + problem = Problem() + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_experiment("e2", "-inf", "c1", 1, "c2") + assert problem.experiment_df.shape == (4, 3) + + check = CheckExperimentTable() + assert check.run(problem) is None + + assert check.run(Problem()) is None + + tmp_problem = deepcopy(problem) + tmp_problem.experiment_df.loc[0, TIME] = "invalid" + assert check.run(tmp_problem) is not None + + tmp_problem = deepcopy(problem) + tmp_problem.experiment_df.loc[0, TIME] = "inf" + assert check.run(tmp_problem) is not None + + tmp_problem = deepcopy(problem) + tmp_problem.experiment_df.drop(columns=[TIME], inplace=True) + assert check.run(tmp_problem) is not None diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 9d13e3df..41ecc238 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -60,6 +60,7 @@ def test_problem_from_yaml_multiple_files(): measurement_files: [measurements1.tsv, measurements2.tsv] observable_files: [observables1.tsv, observables2.tsv] model_files: + experiment_files: [experiments1.tsv, experiments2.tsv] """ with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") @@ -73,6 +74,11 @@ def test_problem_from_yaml_multiple_files(): problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) + problem.add_experiment(f"experiment{i}", 0, f"condition{i}") + petab.write_experiment_df( + problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") + ) + problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1) petab.write_measurement_df( problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") @@ -93,6 +99,7 @@ def test_problem_from_yaml_multiple_files(): assert petab_problem.measurement_df.shape[0] == 2 assert petab_problem.observable_df.shape[0] == 2 assert petab_problem.condition_df.shape[0] == 2 + assert petab_problem.experiment_df.shape[0] == 2 def test_modify_problem(): From 980926fee387260bee1ae99efcf6448dd40e4c2d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 15:10:25 +0100 Subject: [PATCH 008/141] Fix test_priors_to_measurements (#336) Fixes an issue in `test_priors_to_measurements` which led to evaluating the prior at the wrong parameter values (using the location parameter of the prior instead of the actually estimated parameters). The problem was in the test code, not the tested code. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- tests/v1/test_priors.py | 73 +++++++++++++++++++++++++++++------------ 1 file changed, 52 insertions(+), 21 deletions(-) diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index ea47e54f..ac07d089 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -8,7 +8,15 @@ from scipy.stats import norm import petab.v1 -from petab.v1 import get_simulation_conditions +from petab.v1 import ( + ESTIMATE, + MEASUREMENT, + OBJECTIVE_PRIOR_TYPE, + OBSERVABLE_ID, + SIMULATION, + get_simulation_conditions, + get_simulation_df, +) from petab.v1.priors import priors_to_measurements @@ -17,20 +25,31 @@ ) def test_priors_to_measurements(problem_id): """Test the conversion of priors to measurements.""" + # setup petab_problem_priors: petab.v1.Problem = ( benchmark_models_petab.get_problem(problem_id) ) petab_problem_priors.visualization_df = None assert petab.v1.lint_problem(petab_problem_priors) is False - if problem_id == "Isensee_JCB2018": # required to match the stored simulation results below petab.v1.flatten_timepoint_specific_output_overrides( petab_problem_priors ) assert petab.v1.lint_problem(petab_problem_priors) is False + original_problem = deepcopy(petab_problem_priors) + # All priors in this test case are defined on parameter scale, hence + # the dummy measurements will take the scaled nominal values. + x_scaled_dict = dict( + zip( + original_problem.x_free_ids, + original_problem.x_nominal_free_scaled, + strict=True, + ) + ) + # convert priors to measurements petab_problem_measurements = priors_to_measurements(petab_problem_priors) # check that the original problem is not modified @@ -45,6 +64,7 @@ def test_priors_to_measurements(problem_id): getattr(original_problem, attr) ) ).empty, diff + # check that measurements and observables were added assert petab.v1.lint_problem(petab_problem_measurements) is False assert ( @@ -59,6 +79,7 @@ def test_priors_to_measurements(problem_id): petab_problem_measurements.measurement_df.shape[0] > petab_problem_priors.measurement_df.shape[0] ) + # ensure we didn't introduce any new conditions assert len( get_simulation_conditions(petab_problem_measurements.measurement_df) @@ -67,26 +88,40 @@ def test_priors_to_measurements(problem_id): # verify that the objective function value is the same # load/construct the simulation results - simulation_df_priors = petab.v1.get_simulation_df( + simulation_df_priors = get_simulation_df( Path( benchmark_models_petab.MODELS_DIR, problem_id, f"simulatedData_{problem_id}.tsv", ) ) - simulation_df_measurements = pd.concat( - [ - petab_problem_measurements.measurement_df.rename( - columns={petab.v1.MEASUREMENT: petab.v1.SIMULATION} - )[ - petab_problem_measurements.measurement_df[ - petab.v1.C.OBSERVABLE_ID - ].str.startswith("prior_") - ], - simulation_df_priors, + # for the prior observables, we need to "simulate" the model with the + # nominal parameter values + simulated_prior_observables = ( + petab_problem_measurements.measurement_df.rename( + columns={MEASUREMENT: SIMULATION} + )[ + petab_problem_measurements.measurement_df[ + OBSERVABLE_ID + ].str.startswith("prior_") ] ) + def apply_parameter_values(row): + # apply the parameter values to the observable formula for the prior + if row[OBSERVABLE_ID].startswith("prior_"): + row[SIMULATION] = x_scaled_dict[ + row[OBSERVABLE_ID].removeprefix("prior_") + ] + return row + + simulated_prior_observables = simulated_prior_observables.apply( + apply_parameter_values, axis=1 + ) + simulation_df_measurements = pd.concat( + [simulation_df_priors, simulated_prior_observables] + ) + llh_priors = petab.v1.calculate_llh_for_table( petab_problem_priors.measurement_df, simulation_df_priors, @@ -102,10 +137,8 @@ def test_priors_to_measurements(problem_id): # get prior objective function contribution parameter_ids = petab_problem_priors.parameter_df.index.values[ - (petab_problem_priors.parameter_df[petab.v1.ESTIMATE] == 1) - & petab_problem_priors.parameter_df[ - petab.v1.OBJECTIVE_PRIOR_TYPE - ].notna() + (petab_problem_priors.parameter_df[ESTIMATE] == 1) + & petab_problem_priors.parameter_df[OBJECTIVE_PRIOR_TYPE].notna() ] priors = petab.v1.get_priors_from_df( petab_problem_priors.parameter_df, @@ -117,9 +150,7 @@ def test_priors_to_measurements(problem_id): prior_type, prior_pars, par_scale, par_bounds = prior if prior_type == petab.v1.PARAMETER_SCALE_NORMAL: prior_contrib += norm.logpdf( - petab_problem_priors.x_nominal_free_scaled[ - petab_problem_priors.x_free_ids.index(parameter_id) - ], + x_scaled_dict[parameter_id], loc=prior_pars[0], scale=prior_pars[1], ) @@ -134,4 +165,4 @@ def test_priors_to_measurements(problem_id): llh_priors + prior_contrib, llh_measurements, rtol=1e-3, atol=1e-16 ), (llh_priors + prior_contrib, llh_measurements) # check that the tolerance is not too high - assert np.abs(prior_contrib) > 1e-3 * np.abs(llh_priors) + assert np.abs(prior_contrib) > 1e-8 * np.abs(llh_priors) From 45a337144d9d8e3efc9717e8b3e18bc0585eaeb9 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 15:22:19 +0100 Subject: [PATCH 009/141] Add `SbmlModel.from_antimony` (#331) Simplify creating a PEtab SbmlModel from antimony files or strings. Replace simplesbml by antimony in tests. --- petab/v1/models/sbml_model.py | 79 +++++++++++++++++++++++++++++- pyproject.toml | 8 ++- tests/v1/test_combine.py | 10 ++-- tests/v1/test_deprecated.py | 4 +- tests/v1/test_lint.py | 33 +++++++------ tests/v1/test_observables.py | 17 ++----- tests/v1/test_parameter_mapping.py | 73 ++++++++++++--------------- tests/v1/test_petab.py | 38 ++++++-------- tests/v1/test_sbml.py | 42 ++++++++++------ tests/v1/test_simplify.py | 9 ++-- 10 files changed, 191 insertions(+), 122 deletions(-) diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index 5102715d..55cd7b4d 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -1,4 +1,5 @@ """Functions for handling SBML models""" +from __future__ import annotations import itertools from collections.abc import Iterable @@ -32,8 +33,25 @@ def __init__( sbml_document: libsbml.SBMLDocument = None, model_id: str = None, ): + """Constructor. + + :param sbml_model: SBML model. Optional if `sbml_document` is given. + :param sbml_reader: SBML reader. Optional. + :param sbml_document: SBML document. Optional if `sbml_model` is given. + :param model_id: Model ID. Defaults to the SBML model ID.""" super().__init__() + if sbml_model is None and sbml_document is None: + raise ValueError( + "Either sbml_model or sbml_document must be given." + ) + + if sbml_model is None: + sbml_model = sbml_document.getModel() + + if sbml_document is None: + sbml_document = sbml_model.getSBMLDocument() + self.sbml_reader: libsbml.SBMLReader | None = sbml_reader self.sbml_document: libsbml.SBMLDocument | None = sbml_document self.sbml_model: libsbml.Model | None = sbml_model @@ -70,7 +88,7 @@ def __setstate__(self, state): self.__dict__.update(state) @staticmethod - def from_file(filepath_or_buffer, model_id: str = None): + def from_file(filepath_or_buffer, model_id: str = None) -> SbmlModel: sbml_reader, sbml_document, sbml_model = get_sbml_model( filepath_or_buffer ) @@ -82,7 +100,12 @@ def from_file(filepath_or_buffer, model_id: str = None): ) @staticmethod - def from_string(sbml_string, model_id: str = None): + def from_string(sbml_string, model_id: str = None) -> SbmlModel: + """Create SBML model from an SBML string. + + :param sbml_string: SBML model as string. + :param model_id: Model ID. Defaults to the SBML model ID. + """ sbml_reader, sbml_document, sbml_model = load_sbml_from_string( sbml_string ) @@ -97,6 +120,18 @@ def from_string(sbml_string, model_id: str = None): model_id=model_id, ) + @staticmethod + def from_antimony(ant_model: str | Path) -> SbmlModel: + """Create SBML model from an Antimony model. + + Requires the `antimony` package (https://github.com/sys-bio/antimony). + + :param ant_model: Antimony model as string or path to file. + Strings are interpreted as Antimony model strings. + """ + sbml_str = antimony2sbml(ant_model) + return SbmlModel.from_string(sbml_str) + @property def model_id(self): return self._model_id @@ -238,3 +273,43 @@ def sympify_sbml(sbml_obj: libsbml.ASTNode | libsbml.SBase) -> sp.Expr: ) return sp.sympify(formula_str, locals=_clash) + + +def antimony2sbml(ant_model: str | Path) -> str: + """Convert Antimony model to SBML. + + :param ant_model: Antimony model as string or path to file. + Strings are interpreted as Antimony model strings. + + :returns: + The SBML model as string. + """ + import antimony as ant + + # Unload everything / free memory + ant.clearPreviousLoads() + ant.freeAll() + + try: + # potentially fails because of too long file name + is_file = ant_model and Path(ant_model).exists() + except OSError: + is_file = False + + if is_file: + status = ant.loadAntimonyFile(str(ant_model)) + else: + status = ant.loadAntimonyString(ant_model) + if status < 0: + raise RuntimeError( + f"Antimony model could not be loaded: {ant.getLastError()}" + ) + + if (main_module_name := ant.getMainModuleName()) is None: + raise AssertionError("There is no Antimony module.") + + sbml_str = ant.getSBMLString(main_module_name) + if not sbml_str: + raise ValueError("Antimony model could not be converted to SBML.") + + return sbml_str diff --git a/pyproject.toml b/pyproject.toml index 1758476a..6eeb3480 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -35,11 +35,11 @@ maintainers = [ [project.optional-dependencies] tests = [ + "antimony>=2.14.0", + "pysb", "pytest", "pytest-cov", - "simplesbml", "scipy", - "pysb", ] quality = [ "pre-commit", @@ -48,6 +48,9 @@ reports = [ # https://github.com/spatialaudio/nbsphinx/issues/641 "Jinja2==3.0.3", ] +antimony = [ + "antimony>=2.14.0", +] combine = [ "python-libcombine>=0.2.6", ] @@ -61,6 +64,7 @@ doc = [ # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312 "ipython>=7.21.0, !=8.7.0", "pysb", + "antimony>=2.14.0" ] vis = [ "matplotlib>=3.6.0", diff --git a/tests/v1/test_combine.py b/tests/v1/test_combine.py index 08ad5b77..4fca4105 100644 --- a/tests/v1/test_combine.py +++ b/tests/v1/test_combine.py @@ -4,8 +4,9 @@ import pandas as pd -import petab +import petab.v1 as petab from petab.C import * +from petab.v1.models.sbml_model import SbmlModel # import fixtures pytest_plugins = [ @@ -16,10 +17,7 @@ def test_combine_archive(): """Test `create_combine_archive` and `Problem.from_combine`""" # Create test files - import simplesbml - - ss_model = simplesbml.SbmlModel() - + model = SbmlModel.from_antimony("") # Create tables with arbitrary content measurement_df = pd.DataFrame( data={ @@ -80,7 +78,7 @@ def test_combine_archive(): ) as tempdir: # Write test data outdir = Path(tempdir) - petab.write_sbml(ss_model.document, outdir / sbml_file_name) + model.to_file(outdir / sbml_file_name) petab.write_measurement_df( measurement_df, outdir / measurement_file_name ) diff --git a/tests/v1/test_deprecated.py b/tests/v1/test_deprecated.py index 4af41fa3..b78e7856 100644 --- a/tests/v1/test_deprecated.py +++ b/tests/v1/test_deprecated.py @@ -14,7 +14,7 @@ def test_problem_with_sbml_model(): """Test that a problem can be correctly created from sbml model.""" # retrieve test data ( - ss_model, + model, condition_df, observable_df, measurement_df, @@ -23,7 +23,7 @@ def test_problem_with_sbml_model(): with pytest.deprecated_call(): petab_problem = petab.Problem( # noqa: F811 - sbml_model=ss_model.model, + model=model, condition_df=condition_df, measurement_df=measurement_df, parameter_df=parameter_df, diff --git a/tests/v1/test_lint.py b/tests/v1/test_lint.py index b178a425..d75bdcea 100644 --- a/tests/v1/test_lint.py +++ b/tests/v1/test_lint.py @@ -18,7 +18,6 @@ def test_assert_measured_observables_present(): # create test model - measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["non-existing1"], @@ -255,15 +254,15 @@ def test_assert_no_leading_trailing_whitespace(): def test_assert_model_parameters_in_condition_or_parameter_table(): - import simplesbml - from petab.models.sbml_model import SbmlModel - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("parameter1", 0.0) - ss_model.addParameter("noiseParameter1_", 0.0) - ss_model.addParameter("observableParameter1_", 0.0) - sbml_model = SbmlModel(sbml_model=ss_model.model) + ant_model = """ + parameter1 = 0.0 + noiseParameter1_ = 0.0 + observableParameter1_ = 0.0 + """ + sbml_model = SbmlModel.from_antimony(ant_model) + assert sbml_model.is_valid() lint.assert_model_parameters_in_condition_or_parameter_table( sbml_model, pd.DataFrame(columns=["parameter1"]), pd.DataFrame() @@ -284,7 +283,10 @@ def test_assert_model_parameters_in_condition_or_parameter_table(): sbml_model, pd.DataFrame(), pd.DataFrame() ) - ss_model.addAssignmentRule("parameter1", "parameter2") + sbml_model = SbmlModel.from_antimony( + ant_model + "\nparameter2 = 0\nparameter1 := parameter2" + ) + assert sbml_model.is_valid() lint.assert_model_parameters_in_condition_or_parameter_table( sbml_model, pd.DataFrame(), pd.DataFrame() ) @@ -499,12 +501,11 @@ def test_assert_measurement_conditions_present_in_condition_table(): def test_check_condition_df(): """Check that we correctly detect errors in condition table""" - import simplesbml from petab.models.sbml_model import SbmlModel - ss_model = simplesbml.SbmlModel() - model = SbmlModel(sbml_model=ss_model.model) + model = SbmlModel.from_antimony("") + condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1"], @@ -527,7 +528,7 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model, observable_df) # fix by adding parameter - ss_model.addParameter("p1", 1.0) + model = SbmlModel.from_antimony("p1 = 1") lint.check_condition_df(condition_df, model) # species missing in model @@ -536,7 +537,7 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model) # fix: - ss_model.addSpecies("[s1]", 1.0) + model = SbmlModel.from_antimony("p1 = 1; species s1 = 1") lint.check_condition_df(condition_df, model) # compartment missing in model @@ -545,7 +546,9 @@ def test_check_condition_df(): lint.check_condition_df(condition_df, model) # fix: - ss_model.addCompartment(comp_id="c2", vol=1.0) + model = SbmlModel.from_antimony( + "p1 = 1; species s1 = 1; compartment c2 = 1" + ) lint.check_condition_df(condition_df, model) diff --git a/tests/v1/test_observables.py b/tests/v1/test_observables.py index f9547fec..e870ac12 100644 --- a/tests/v1/test_observables.py +++ b/tests/v1/test_observables.py @@ -69,14 +69,11 @@ def test_write_observable_df(): def test_get_output_parameters(): """Test measurements.get_output_parameters.""" - # sbml model - import simplesbml - from petab.models.sbml_model import SbmlModel - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("fixedParameter1", 1.0) - ss_model.addParameter("observable_1", 1.0) + model = SbmlModel.from_antimony( + "fixedParameter1 = 1.0; observable_1 = 1.0" + ) # observable file observable_df = pd.DataFrame( @@ -88,9 +85,7 @@ def test_get_output_parameters(): } ).set_index(OBSERVABLE_ID) - output_parameters = petab.get_output_parameters( - observable_df, SbmlModel(sbml_model=ss_model.model) - ) + output_parameters = petab.get_output_parameters(observable_df, model) assert output_parameters == ["offset", "scaling"] @@ -105,9 +100,7 @@ def test_get_output_parameters(): } ).set_index(OBSERVABLE_ID) - output_parameters = petab.get_output_parameters( - observable_df, SbmlModel(sbml_model=ss_model.model) - ) + output_parameters = petab.get_output_parameters(observable_df, model) assert output_parameters == ["N", "beta"] diff --git a/tests/v1/test_parameter_mapping.py b/tests/v1/test_parameter_mapping.py index e499bd5c..4fe44aa5 100644 --- a/tests/v1/test_parameter_mapping.py +++ b/tests/v1/test_parameter_mapping.py @@ -32,16 +32,15 @@ def test_no_condition_specific(condition_df_2_conditions): } ) - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("dynamicParameter1", 1.0) - ss_model.addParameter("dynamicParameter2", 2.0) - ss_model.addParameter("dynamicParameter3", 3.0) + model = SbmlModel.from_antimony( + "dynamicParameter1 = 1.0; " + "dynamicParameter2 = 2.0; " + "dynamicParameter3 = 3.0; " + # add species, which will have initial concentration in condition + # table but which should not show up in mapping + "species someSpecies = 1.0" + ) - # add species, which will have initial concentration in condition table - # but which should not show up in mapping - ss_model.addSpecies("[someSpecies]", 1.0) condition_df["someSpecies"] = [0.0, 0.0] # Test without parameter table @@ -80,7 +79,6 @@ def test_no_condition_specific(condition_df_2_conditions): ), ] - model = SbmlModel(sbml_model=ss_model.model) actual = petab.get_optimization_to_simulation_parameter_mapping( model=model, measurement_df=measurement_df, @@ -245,13 +243,9 @@ def test_no_condition_specific(condition_df_2_conditions): def test_all_override(condition_df_2_conditions): # Condition-specific parameters overriding original parameters condition_df = condition_df_2_conditions - - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("dynamicParameter1", 0.0) - ss_model.addParameter("dynamicParameter2", 0.0) - model = SbmlModel(sbml_model=ss_model.model) + model = SbmlModel.from_antimony( + "dynamicParameter1 = 0.0; dynamicParameter2 = 0.0" + ) measurement_df = pd.DataFrame( data={ @@ -364,15 +358,16 @@ def test_partial_override(condition_df_2_conditions): ) condition_df.set_index("conditionId", inplace=True) - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("fixedParameter1", 0.5) - ss_model.addParameter("fixedParameter2", 1.0) - ss_model.addParameter("dynamicParameter1", 0.0) - ss_model.addParameter("observableParameter1_obs1", 0.0) - ss_model.addParameter("observableParameter2_obs1", 0.0) - ss_model.addParameter("observableParameter1_obs2", 0.0) + model = SbmlModel.from_antimony( + """ + fixedParameter1 = 0.5 + fixedParameter2 = 1.0 + dynamicParameter1 = 0.0 + observableParameter1_obs1 = 0.0 + observableParameter2_obs1 = 0.0 + observableParameter1_obs2 = 0.0 + """ + ) measurement_df = pd.DataFrame( data={ @@ -454,7 +449,7 @@ def test_partial_override(condition_df_2_conditions): actual = petab.get_optimization_to_simulation_parameter_mapping( measurement_df=measurement_df, condition_df=condition_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, parameter_df=parameter_df, ) @@ -504,12 +499,9 @@ def test_parameterized_condition_table(): ) parameter_df.set_index(PARAMETER_ID, inplace=True) - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("dynamicParameter1", 1.0) + model = SbmlModel.from_antimony("dynamicParameter1 = 1.0") - assert petab.get_model_parameters(ss_model.model) == [ + assert petab.get_model_parameters(model.sbml_model) == [ "dynamicParameter1" ] @@ -517,7 +509,7 @@ def test_parameterized_condition_table(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, ) expected = [ @@ -550,13 +542,10 @@ def test_parameterized_condition_table_changed_scale(): overridee_id = "overridee" # set up model - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addParameter(overridee_id, 2.0) - assert petab.get_model_parameters(ss_model.model) == [overridee_id] + model = SbmlModel.from_antimony(f"{overridee_id} = 2.0") + assert petab.get_model_parameters(model.sbml_model) == [overridee_id] assert petab.get_model_parameters( - ss_model.model, with_values=True + model.sbml_model, with_values=True ) == {overridee_id: 2.0} # set up condition table @@ -614,7 +603,7 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, ) expected = [ @@ -638,7 +627,7 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, scaled_parameters=True, ) @@ -669,7 +658,7 @@ def test_parameterized_condition_table_changed_scale(): measurement_df=measurement_df, condition_df=condition_df, parameter_df=parameter_df, - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, ) expected = [ diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index eac237d2..ff9621fa 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -6,7 +6,6 @@ from math import nan from pathlib import Path -import libsbml import numpy as np import pandas as pd import pytest @@ -39,11 +38,8 @@ def condition_df_2_conditions(): def petab_problem(): """Test petab problem.""" # create test model - import simplesbml - - model = simplesbml.SbmlModel() - model.addParameter("fixedParameter1", 0.0) - model.addParameter("observable_1", 0.0) + ant_model = "fixedParameter1=0.0; observable_1=0.0" + model = SbmlModel.from_antimony(ant_model) petab_problem = petab.Problem() petab_problem.add_measurement( @@ -79,7 +75,7 @@ def petab_problem(): with tempfile.TemporaryDirectory() as temp_dir: sbml_file_name = Path(temp_dir, "model.xml") - libsbml.writeSBMLToFile(model.document, str(sbml_file_name)) + model.to_file(sbml_file_name) measurement_file_name = Path(temp_dir, "measurements.tsv") petab.write_measurement_df( @@ -285,13 +281,15 @@ def test_create_parameter_df( condition_df_2_conditions, ): # pylint: disable=W0621 """Test petab.create_parameter_df.""" - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addSpecies("[x1]", 1.0) - ss_model.addParameter("fixedParameter1", 2.0) - ss_model.addParameter("p0", 3.0) - model = SbmlModel(sbml_model=ss_model.model) + ant_model = """ + species x1 = 1.0 + fixedParameter1 = 2.0 + p0 = 3.0 + # Add assignment rule target which should be ignored + assignment_target = 0.0 + assignment_target := 1.0 + """ + model = SbmlModel.from_antimony(ant_model) observable_df = pd.DataFrame( data={ @@ -300,10 +298,6 @@ def test_create_parameter_df( } ).set_index(OBSERVABLE_ID) - # Add assignment rule target which should be ignored - ss_model.addParameter("assignment_target", 0.0) - ss_model.addAssignmentRule("assignment_target", "1.0") - measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["obs1", "obs2"], @@ -319,10 +313,10 @@ def test_create_parameter_df( with warnings.catch_warnings(record=True) as w: warnings.simplefilter("always") parameter_df = petab.v1.create_parameter_df( - ss_model.model, - condition_df_2_conditions, - observable_df, - measurement_df, + sbml_model=model.sbml_model, + condition_df=condition_df_2_conditions, + observable_df=observable_df, + measurement_df=measurement_df, ) assert len(w) == 1 assert issubclass(w[-1].category, DeprecationWarning) diff --git a/tests/v1/test_sbml.py b/tests/v1/test_sbml.py index 350a2f0d..5c262d43 100644 --- a/tests/v1/test_sbml.py +++ b/tests/v1/test_sbml.py @@ -13,17 +13,16 @@ def create_test_data(): # Create test model and data files - import simplesbml - - ss_model = simplesbml.SbmlModel() - ss_model.addCompartment(comp_id="compartment_1", vol=1) - for i in range(1, 4): - ss_model.addParameter(f"parameter_{i}", i) - - for i in range(1, 5): - ss_model.addSpecies(f"[species_{i}]", 10 * i) - - ss_model.addAssignmentRule("species_2", "25") + model = SbmlModel.from_antimony( + "\n".join( + [ + "compartment compartment_1 = 1", + *(f"species species_{i} = 10 * {i}" for i in range(1, 5)), + *(f"parameter_{i} = {i}" for i in range(1, 4)), + "species_2 := 25", + ] + ) + ) condition_df = pd.DataFrame( { @@ -68,7 +67,7 @@ def create_test_data(): ) parameter_df.set_index([petab.PARAMETER_ID], inplace=True) - return ss_model, condition_df, observable_df, measurement_df, parameter_df + return model, condition_df, observable_df, measurement_df, parameter_df def check_model(condition_model): @@ -99,7 +98,7 @@ def test_get_condition_specific_models(): """Test for petab.sbml.get_condition_specific_models""" # retrieve test data ( - ss_model, + model, condition_df, observable_df, measurement_df, @@ -107,7 +106,7 @@ def test_get_condition_specific_models(): ) = create_test_data() petab_problem = petab.Problem( - model=petab.models.sbml_model.SbmlModel(ss_model.model), + model=model, condition_df=condition_df, observable_df=observable_df, measurement_df=measurement_df, @@ -133,3 +132,18 @@ def test_sbml_model_repr(): sbml_model.setId("test") petab_model = SbmlModel(sbml_model) assert repr(petab_model) == "" + + +def test_sbml_from_ant(): + ant_model = """ + model test + R1: S1 -> S2; k1*S1 + k1 = 1 + end + """ + petab_model = SbmlModel.from_antimony(ant_model) + assert petab_model.model_id == "test" + assert petab_model.get_parameter_value("k1") == 1.0 + assert set(petab_model.get_valid_parameters_for_parameter_table()) == { + "k1" + } diff --git a/tests/v1/test_simplify.py b/tests/v1/test_simplify.py index 3d9a8909..9aa25f8f 100644 --- a/tests/v1/test_simplify.py +++ b/tests/v1/test_simplify.py @@ -3,7 +3,6 @@ import pandas as pd import pytest -import simplesbml from pandas.testing import * from petab import Problem @@ -14,9 +13,9 @@ @pytest.fixture def problem() -> Problem: - ss_model = simplesbml.SbmlModel() - ss_model.addParameter("some_parameter", val=1.0) - ss_model.addParameter("same_value_for_all_conditions", val=1.0) + model = SbmlModel.from_antimony( + "some_parameter = 1.0; same_value_for_all_conditions = 1.0" + ) observable_df = pd.DataFrame( { @@ -53,7 +52,7 @@ def problem() -> Problem: } ) yield Problem( - model=SbmlModel(sbml_model=ss_model.getModel()), + model=model, condition_df=conditions_df, observable_df=observable_df, measurement_df=measurement_df, From 4a551a7a1f17c02d2b495efee07664ae3331b8fc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 15:46:24 +0100 Subject: [PATCH 010/141] Store problem configuration in Problem (#326) Introduces Problem.config which contains the info from the PEtab yaml file. Sometimes it is convenient to have the original filenames around. Pydantic gives more helpful error messages than `jsonschema` in case of incorrect inputs. Later on, this could replace `jsonschema` completely. Closes #324. --- petab/v1/problem.py | 104 ++++++++++++++++++++++++++++++++------------ pyproject.toml | 1 + 2 files changed, 78 insertions(+), 27 deletions(-) diff --git a/petab/v1/problem.py b/petab/v1/problem.py index f4951ce6..ea300258 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -11,6 +11,7 @@ from warnings import warn import pandas as pd +from pydantic import AnyUrl, BaseModel, Field, RootModel from . import ( conditions, @@ -79,6 +80,7 @@ def __init__( observable_df: pd.DataFrame = None, mapping_df: pd.DataFrame = None, extensions_config: dict = None, + config: ProblemConfig = None, ): self.condition_df: pd.DataFrame | None = condition_df self.measurement_df: pd.DataFrame | None = measurement_df @@ -113,6 +115,7 @@ def __init__( self.model: Model | None = model self.extensions_config = extensions_config or {} + self.config = config def __getattr__(self, name): # For backward-compatibility, allow access to SBML model related @@ -262,10 +265,14 @@ def from_yaml( yaml_config: PEtab configuration as dictionary or YAML file name base_path: Base directory or URL to resolve relative paths """ + # path to the yaml file + filepath = None + if isinstance(yaml_config, Path): yaml_config = str(yaml_config) if isinstance(yaml_config, str): + filepath = yaml_config if base_path is None: base_path = get_path_prefix(yaml_config) yaml_config = yaml.load_yaml(yaml_config) @@ -297,24 +304,25 @@ def get_path(filename): DeprecationWarning, stacklevel=2, ) + config = ProblemConfig( + **yaml_config, base_path=base_path, filepath=filepath + ) + problem0 = config.problems[0] + # currently required for handling PEtab v2 in here + problem0_ = yaml_config["problems"][0] - problem0 = yaml_config["problems"][0] - - if isinstance(yaml_config[PARAMETER_FILE], list): + if isinstance(config.parameter_file, list): parameter_df = parameters.get_parameter_df( - [get_path(f) for f in yaml_config[PARAMETER_FILE]] + [get_path(f) for f in config.parameter_file] ) else: parameter_df = ( - parameters.get_parameter_df( - get_path(yaml_config[PARAMETER_FILE]) - ) - if yaml_config[PARAMETER_FILE] + parameters.get_parameter_df(get_path(config.parameter_file)) + if config.parameter_file else None ) - - if yaml_config[FORMAT_VERSION] in [1, "1", "1.0.0"]: - if len(problem0[SBML_FILES]) > 1: + if config.format_version.root in [1, "1", "1.0.0"]: + if len(problem0.sbml_files) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." @@ -322,24 +330,24 @@ def get_path(filename): model = ( model_factory( - get_path(problem0[SBML_FILES][0]), + get_path(problem0.sbml_files[0]), MODEL_TYPE_SBML, model_id=None, ) - if problem0[SBML_FILES] + if problem0.sbml_files else None ) else: - if len(problem0[MODEL_FILES]) > 1: + if len(problem0_[MODEL_FILES]) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." ) - if not problem0[MODEL_FILES]: + if not problem0_[MODEL_FILES]: model = None else: model_id, model_info = next( - iter(problem0[MODEL_FILES].items()) + iter(problem0_[MODEL_FILES].items()) ) model = model_factory( get_path(model_info[MODEL_LOCATION]), @@ -347,9 +355,7 @@ def get_path(filename): model_id=model_id, ) - measurement_files = [ - get_path(f) for f in problem0.get(MEASUREMENT_FILES, []) - ] + measurement_files = [get_path(f) for f in problem0.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( core.concat_tables( @@ -359,9 +365,7 @@ def get_path(filename): else None ) - condition_files = [ - get_path(f) for f in problem0.get(CONDITION_FILES, []) - ] + condition_files = [get_path(f) for f in problem0.condition_files] # If there are multiple tables, we will merge them condition_df = ( core.concat_tables(condition_files, conditions.get_condition_df) @@ -370,7 +374,7 @@ def get_path(filename): ) visualization_files = [ - get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) + get_path(f) for f in problem0.visualization_files ] # If there are multiple tables, we will merge them visualization_df = ( @@ -379,9 +383,7 @@ def get_path(filename): else None ) - observable_files = [ - get_path(f) for f in problem0.get(OBSERVABLE_FILES, []) - ] + observable_files = [get_path(f) for f in problem0.observable_files] # If there are multiple tables, we will merge them observable_df = ( core.concat_tables(observable_files, observables.get_observable_df) @@ -389,7 +391,7 @@ def get_path(filename): else None ) - mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])] + mapping_files = [get_path(f) for f in problem0_.get(MAPPING_FILES, [])] # If there are multiple tables, we will merge them mapping_df = ( core.concat_tables(mapping_files, mapping.get_mapping_df) @@ -406,6 +408,7 @@ def get_path(filename): visualization_df=visualization_df, mapping_df=mapping_df, extensions_config=yaml_config.get(EXTENSIONS, {}), + config=config, ) @staticmethod @@ -1184,3 +1187,50 @@ def add_measurement( if self.measurement_df is not None else tmp_df ) + + +class VersionNumber(RootModel): + root: str | int + + +class ListOfFiles(RootModel): + """List of files.""" + + root: list[str | AnyUrl] = Field(..., description="List of files.") + + def __iter__(self): + return iter(self.root) + + def __len__(self): + return len(self.root) + + def __getitem__(self, index): + return self.root[index] + + +class SubProblem(BaseModel): + """A `problems` object in the PEtab problem configuration.""" + + sbml_files: ListOfFiles = [] + measurement_files: ListOfFiles = [] + condition_files: ListOfFiles = [] + observable_files: ListOfFiles = [] + visualization_files: ListOfFiles = [] + + +class ProblemConfig(BaseModel): + """The PEtab problem configuration.""" + + filepath: str | AnyUrl | None = Field( + None, + description="The path to the PEtab problem configuration.", + exclude=True, + ) + base_path: str | AnyUrl | None = Field( + None, + description="The base path to resolve relative paths.", + exclude=True, + ) + format_version: VersionNumber = 1 + parameter_file: str | AnyUrl | None = None + problems: list[SubProblem] = [] diff --git a/pyproject.toml b/pyproject.toml index 6eeb3480..74a1aa1e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -22,6 +22,7 @@ dependencies = [ "pyyaml", "jsonschema", "antlr4-python3-runtime==4.13.1", + "pydantic>=2.10", ] license = {text = "MIT License"} authors = [ From d7f7e3ae6bebc809c020cb8086790e1c557400eb Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 9 Dec 2024 22:14:18 +0100 Subject: [PATCH 011/141] Store problem configuration in `v2.Problem` (#338) Introduces `v2.Problem.config` which contains the info from the PEtab yaml file. The same as #326, but for `v2.Problem`. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/problem.py | 102 ++++++++++++++++++++++++++++++-------------- 1 file changed, 71 insertions(+), 31 deletions(-) diff --git a/petab/v2/problem.py b/petab/v2/problem.py index c22d74e1..b61d8b14 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -12,6 +12,7 @@ from typing import TYPE_CHECKING import pandas as pd +from pydantic import AnyUrl, BaseModel, Field from ..v1 import ( conditions, @@ -25,6 +26,7 @@ yaml, ) from ..v1.models.model import Model, model_factory +from ..v1.problem import ListOfFiles, VersionNumber from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 from . import experiments @@ -73,6 +75,7 @@ def __init__( observable_df: pd.DataFrame = None, mapping_df: pd.DataFrame = None, extensions_config: dict = None, + config: ProblemConfig = None, ): from ..v2.lint import default_validation_tasks @@ -88,7 +91,7 @@ def __init__( self.validation_tasks: list[ ValidationTask ] = default_validation_tasks.copy() - + self.config = config if self.experiment_df is not None: warnings.warn( "The experiment table is not yet supported and " @@ -199,40 +202,37 @@ def get_path(filename): "Consider using " "petab.CompositeProblem.from_yaml() instead." ) + config = ProblemConfig( + **yaml_config, base_path=base_path, filepath=yaml_file + ) + problem0 = config.problems[0] - problem0 = yaml_config["problems"][0] - - if isinstance(yaml_config[PARAMETER_FILE], list): + if isinstance(config.parameter_file, list): parameter_df = parameters.get_parameter_df( - [get_path(f) for f in yaml_config[PARAMETER_FILE]] + [get_path(f) for f in config.parameter_file] ) else: parameter_df = ( - parameters.get_parameter_df( - get_path(yaml_config[PARAMETER_FILE]) - ) - if yaml_config[PARAMETER_FILE] + parameters.get_parameter_df(get_path(config.parameter_file)) + if config.parameter_file else None ) - if len(problem0[MODEL_FILES] or []) > 1: + if len(problem0.model_files or []) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." ) - if not problem0[MODEL_FILES]: - model = None - else: - model_id, model_info = next(iter(problem0[MODEL_FILES].items())) + model = None + if problem0.model_files: + model_id, model_info = next(iter(problem0.model_files.items())) model = model_factory( - get_path(model_info[MODEL_LOCATION]), - model_info[MODEL_LANGUAGE], + get_path(model_info.location), + model_info.language, model_id=model_id, ) - measurement_files = [ - get_path(f) for f in problem0.get(MEASUREMENT_FILES, []) - ] + measurement_files = [get_path(f) for f in problem0.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( core.concat_tables( @@ -242,9 +242,7 @@ def get_path(filename): else None ) - condition_files = [ - get_path(f) for f in problem0.get(CONDITION_FILES, []) - ] + condition_files = [get_path(f) for f in problem0.condition_files] # If there are multiple tables, we will merge them condition_df = ( core.concat_tables(condition_files, conditions.get_condition_df) @@ -252,9 +250,7 @@ def get_path(filename): else None ) - experiment_files = [ - get_path(f) for f in problem0.get(EXPERIMENT_FILES, []) - ] + experiment_files = [get_path(f) for f in problem0.experiment_files] # If there are multiple tables, we will merge them experiment_df = ( core.concat_tables(experiment_files, experiments.get_experiment_df) @@ -263,7 +259,7 @@ def get_path(filename): ) visualization_files = [ - get_path(f) for f in problem0.get(VISUALIZATION_FILES, []) + get_path(f) for f in problem0.visualization_files ] # If there are multiple tables, we will merge them visualization_df = ( @@ -272,9 +268,7 @@ def get_path(filename): else None ) - observable_files = [ - get_path(f) for f in problem0.get(OBSERVABLE_FILES, []) - ] + observable_files = [get_path(f) for f in problem0.observable_files] # If there are multiple tables, we will merge them observable_df = ( core.concat_tables(observable_files, observables.get_observable_df) @@ -282,7 +276,7 @@ def get_path(filename): else None ) - mapping_files = [get_path(f) for f in problem0.get(MAPPING_FILES, [])] + mapping_files = [get_path(f) for f in problem0.mapping_files] # If there are multiple tables, we will merge them mapping_df = ( core.concat_tables(mapping_files, mapping.get_mapping_df) @@ -299,7 +293,7 @@ def get_path(filename): model=model, visualization_df=visualization_df, mapping_df=mapping_df, - extensions_config=yaml_config.get(EXTENSIONS, {}), + extensions_config=config.extensions, ) @staticmethod @@ -981,3 +975,49 @@ def add_experiment(self, id_: str, *args): if self.experiment_df is not None else tmp_df ) + + +class ModelFile(BaseModel): + """A file in the PEtab problem configuration.""" + + location: str | AnyUrl + language: str + + +class SubProblem(BaseModel): + """A `problems` object in the PEtab problem configuration.""" + + model_files: dict[str, ModelFile] | None = {} + measurement_files: ListOfFiles = [] + condition_files: ListOfFiles = [] + experiment_files: ListOfFiles = [] + observable_files: ListOfFiles = [] + visualization_files: ListOfFiles = [] + mapping_files: ListOfFiles = [] + + +class ExtensionConfig(BaseModel): + """The configuration of a PEtab extension.""" + + name: str + version: str + config: dict + + +class ProblemConfig(BaseModel): + """The PEtab problem configuration.""" + + filepath: str | AnyUrl | None = Field( + None, + description="The path to the PEtab problem configuration.", + exclude=True, + ) + base_path: str | AnyUrl | None = Field( + None, + description="The base path to resolve relative paths.", + exclude=True, + ) + format_version: VersionNumber = "2.0.0" + parameter_file: str | AnyUrl | None = None + problems: list[SubProblem] = [] + extensions: list[ExtensionConfig] = [] From 8456635eef729fb4964f86ddcb309ba93781813a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 09:49:10 +0100 Subject: [PATCH 012/141] Refactor priors (#329) Introduces `Prior` and `Distribution` classes for handling PEtab-specific prior distributions, and (PEtab-version-invariant) univariate probability distributions. Supports sampling from them, and evaluating negative log-priors (#312). Later on, this can be extended to noise models for measurements and computing loglikelihoods. This also adds a notebook demonstrating the various prior options which are a common source confusion. Closes #311. :eyes: notebook: https://petab--329.org.readthedocs.build/projects/libpetab-python/en/329/example/distributions.html --- doc/example.rst | 1 + doc/example/distributions.ipynb | 208 +++++++++++++++++++++++++++ doc/modules.rst | 1 + petab/v1/C.py | 3 +- petab/v1/distributions.py | 243 ++++++++++++++++++++++++++++++++ petab/v1/parameters.py | 3 +- petab/v1/priors.py | 212 ++++++++++++++++++++++++++++ petab/v1/sampling.py | 110 ++++----------- tests/v1/test_distributions.py | 87 ++++++++++++ tests/v1/test_priors.py | 94 ++++++++---- 10 files changed, 851 insertions(+), 111 deletions(-) create mode 100644 doc/example/distributions.ipynb create mode 100644 petab/v1/distributions.py create mode 100644 tests/v1/test_distributions.py diff --git a/doc/example.rst b/doc/example.rst index 6fe6dab5..dfe54fb3 100644 --- a/doc/example.rst +++ b/doc/example.rst @@ -10,6 +10,7 @@ The following examples should help to get a better idea of how to use the PEtab example/example_petablint.ipynb example/example_visualization.ipynb + example/distributions.ipynb Examples of systems biology parameter estimation problems specified in PEtab can be found in the `systems biology benchmark model collection `_. diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb new file mode 100644 index 00000000..86235fe1 --- /dev/null +++ b/doc/example/distributions.ipynb @@ -0,0 +1,208 @@ +{ + "cells": [ + { + "metadata": {}, + "cell_type": "markdown", + "source": [ + "# Prior distributions in PEtab\n", + "\n", + "This notebook gives a brief overview of the prior distributions in PEtab and how they are represented in the PEtab library.\n", + "\n", + "Prior distributions are used to specify the prior knowledge about the parameters.\n", + "Parameter priors are specified in the parameter table. A prior is defined by its type and its parameters.\n", + "Each prior type has a specific set of parameters. For example, the normal distribution has two parameters: the mean and the standard deviation.\n", + "\n", + "There are two types of priors in PEtab - objective priors and initialization priors:\n", + "\n", + "* *Objective priors* are used to specify the prior knowledge about the parameters that are to be estimated. They will enter the objective function of the optimization problem. They are specified in the `objectivePriorType` and `objectivePriorParameters` columns of the parameter table.\n", + "* *Initialization priors* can be used as a hint for the optimization algorithm. They will not enter the objective function. They are specified in the `initializationPriorType` and `initializationPriorParameters` columns of the parameter table.\n", + "\n", + "\n" + ], + "id": "372289411a2aa7b3" + }, + { + "metadata": { + "collapsed": true + }, + "cell_type": "code", + "source": [ + "import matplotlib.pyplot as plt\n", + "import numpy as np\n", + "import seaborn as sns\n", + "\n", + "from petab.v1.C import *\n", + "from petab.v1.priors import Prior\n", + "\n", + "sns.set_style(None)\n", + "\n", + "\n", + "def plot(prior: Prior, ax=None):\n", + " \"\"\"Visualize a distribution.\"\"\"\n", + " if ax is None:\n", + " fig, ax = plt.subplots()\n", + "\n", + " sample = prior.sample(10000)\n", + "\n", + " # pdf\n", + " xmin = min(sample.min(), prior.lb_scaled if prior.bounds is not None else sample.min())\n", + " xmax = max(sample.max(), prior.ub_scaled if prior.bounds is not None else sample.max())\n", + " x = np.linspace(xmin, xmax, 500)\n", + " y = prior.pdf(x)\n", + " ax.plot(x, y, color='red', label='pdf')\n", + "\n", + " sns.histplot(sample, stat='density', ax=ax, label=\"sample\")\n", + "\n", + " # bounds\n", + " if prior.bounds is not None:\n", + " for bound in (prior.lb_scaled, prior.ub_scaled):\n", + " if bound is not None and np.isfinite(bound):\n", + " ax.axvline(bound, color='black', linestyle='--', label='bound')\n", + "\n", + " ax.set_title(str(prior))\n", + " ax.set_xlabel('Parameter value on the parameter scale')\n", + " ax.grid(False)\n", + " handles, labels = ax.get_legend_handles_labels()\n", + " unique_labels = dict(zip(labels, handles))\n", + " ax.legend(unique_labels.values(), unique_labels.keys())\n", + " plt.show()" + ], + "id": "initial_id", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n", + "id": "db36a4a93622ccb8" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Prior(UNIFORM, (0, 1)))\n", + "plot(Prior(NORMAL, (0, 1)))\n", + "plot(Prior(LAPLACE, (0, 1)))\n", + "plot(Prior(LOG_NORMAL, (0, 1)))\n", + "plot(Prior(LOG_LAPLACE, (1, 0.5)))" + ], + "id": "4f09e50a3db06d9f", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "If a parameter scale is specified (`parameterScale=lin|log|log10` not a `parameterScale*`-type distribution), the sample is transformed accordingly (but not the distribution parameters):\n", + "id": "dab4b2d1e0f312d8" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Prior(NORMAL, (10, 2), transformation=LIN))\n", + "plot(Prior(NORMAL, (10, 2), transformation=LOG))\n", + "\n", + "# Note that the log-normal distribution is different from a log-transformed normal distribution:\n", + "plot(Prior(LOG_NORMAL, (10, 2), transformation=LIN))" + ], + "id": "f6192c226f179ef9", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "On the log-transformed parameter scale, `Log*` and `parameterScale*` distributions are equivalent:", + "id": "4281ed48859e6431" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Prior(LOG_NORMAL, (10, 2), transformation=LOG))\n", + "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 2)))" + ], + "id": "34c95268e8921070", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Prior distributions can also be defined on the parameter scale by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, 1) the distribution parameter are interpreted on the transformed parameter scale, and 2) a sample from the given distribution is used directly, without applying any transformation according to `parameterScale` (this implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`):", + "id": "263c9fd31156a4d5" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Prior(UNIFORM, (0.01, 2), transformation=LOG10))\n", + "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LOG10))\n", + "\n", + "plot(Prior(UNIFORM, (0.01, 2), transformation=LIN))\n", + "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))\n" + ], + "id": "5ca940bc24312fc6", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "To prevent the sampled parameters from exceeding the bounds, the sampled parameters are clipped to the bounds. The bounds are defined in the parameter table. Note that the current implementation does not support sampling from a truncated distribution. Instead, the samples are clipped to the bounds. This may introduce unwanted bias, and thus, should only be used with caution (i.e., the bounds should be chosen wide enough):", + "id": "b1a8b17d765db826" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Prior(NORMAL, (0, 1), bounds=(-4, 4))) # negligible clipping-bias at 4 sigma\n", + "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9))) # significant clipping-bias" + ], + "id": "4ac42b1eed759bdd", + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "Further distribution examples:", + "id": "45ffce1341483f24" + }, + { + "metadata": {}, + "cell_type": "code", + "source": [ + "plot(Prior(NORMAL, (10, 1), bounds=(6, 14), transformation=\"log10\"))\n", + "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 1), bounds=(10**6, 10**14), transformation=\"log10\"))\n", + "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))" + ], + "id": "581e1ac431860419", + "outputs": [], + "execution_count": null + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 2 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython2", + "version": "2.7.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/doc/modules.rst b/doc/modules.rst index 87a9559d..e933c06f 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -14,6 +14,7 @@ API Reference petab.v1.composite_problem petab.v1.conditions petab.v1.core + petab.v1.distributions petab.v1.lint petab.v1.measurements petab.v1.models diff --git a/petab/v1/C.py b/petab/v1/C.py index a013a0cc..be044a5c 100644 --- a/petab/v1/C.py +++ b/petab/v1/C.py @@ -173,7 +173,8 @@ LOG10 = "log10" #: Supported observable transformations OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] - +#: Supported parameter transformations +PARAMETER_SCALES = [LIN, LOG, LOG10] # NOISE MODELS diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py new file mode 100644 index 00000000..418f5b44 --- /dev/null +++ b/petab/v1/distributions.py @@ -0,0 +1,243 @@ +"""Probability distributions used by PEtab.""" +from __future__ import annotations + +import abc + +import numpy as np +from scipy.stats import laplace, norm, uniform + +__all__ = [ + "Distribution", + "Normal", + "Uniform", + "Laplace", +] + + +class Distribution(abc.ABC): + """A univariate probability distribution. + + This class provides a common interface for sampling from and evaluating + the probability density function of a univariate probability distribution. + + The distribution can be transformed by applying a logarithm to the samples + and the PDF. This is useful, e.g., for log-normal distributions. + + :param log: If ``True``, the distribution is transformed to its + corresponding log distribution (e.g., Normal -> LogNormal). + If a float, the distribution is transformed to its corresponding + log distribution with the given base (e.g., Normal -> Log10Normal). + If ``False``, no transformation is applied. + """ + + def __init__(self, log: bool | float = False): + if log is True: + log = np.exp(1) + self._logbase = log + + def _undo_log(self, x: np.ndarray | float) -> np.ndarray | float: + """Undo the log transformation. + + :param x: The sample to transform. + :return: The transformed sample + """ + if self._logbase is False: + return x + return self._logbase**x + + def _apply_log(self, x: np.ndarray | float) -> np.ndarray | float: + """Apply the log transformation. + + :param x: The value to transform. + :return: The transformed value. + """ + if self._logbase is False: + return x + return np.log(x) / np.log(self._logbase) + + def sample(self, shape=None) -> np.ndarray: + """Sample from the distribution. + + :param shape: The shape of the sample. + :return: A sample from the distribution. + """ + sample = self._sample(shape) + return self._undo_log(sample) + + @abc.abstractmethod + def _sample(self, shape=None) -> np.ndarray: + """Sample from the underlying distribution. + + :param shape: The shape of the sample. + :return: A sample from the underlying distribution, + before applying, e.g., the log transformation. + """ + ... + + def pdf(self, x): + """Probability density function at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF at ``x``. + """ + # handle the log transformation; see also: + # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar + chain_rule_factor = ( + (1 / (x * np.log(self._logbase))) if self._logbase else 1 + ) + return self._pdf(self._apply_log(x)) * chain_rule_factor + + @abc.abstractmethod + def _pdf(self, x): + """Probability density function of the underlying distribution at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF at ``x``. + """ + ... + + @property + def logbase(self) -> bool | float: + """The base of the log transformation. + + If ``False``, no transformation is applied. + """ + return self._logbase + + +class Normal(Distribution): + """A (log-)normal distribution. + + :param loc: The location parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param truncation: The truncation limits of the distribution. + :param log: If ``True``, the distribution is transformed to a log-normal + distribution. If a float, the distribution is transformed to a + log-normal distribution with the given base. + If ``False``, no transformation is applied. + If a transformation is applied, the location and scale parameters + and the truncation limits are the location, scale and truncation limits + of the underlying normal distribution. + """ + + def __init__( + self, + loc: float, + scale: float, + truncation: tuple[float, float] | None = None, + log: bool | float = False, + ): + super().__init__(log=log) + self._loc = loc + self._scale = scale + self._truncation = truncation + + if truncation is not None: + raise NotImplementedError("Truncation is not yet implemented.") + + def __repr__(self): + trunc = f", truncation={self._truncation}" if self._truncation else "" + log = f", log={self._logbase}" if self._logbase else "" + return f"Normal(loc={self._loc}, scale={self._scale}{trunc}{log})" + + def _sample(self, shape=None): + return np.random.normal(loc=self._loc, scale=self._scale, size=shape) + + def _pdf(self, x): + return norm.pdf(x, loc=self._loc, scale=self._scale) + + @property + def loc(self): + """The location parameter of the underlying distribution.""" + return self._loc + + @property + def scale(self): + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Uniform(Distribution): + """A (log-)uniform distribution. + + :param low: The lower bound of the distribution. + :param high: The upper bound of the distribution. + :param log: If ``True``, the distribution is transformed to a log-uniform + distribution. If a float, the distribution is transformed to a + log-uniform distribution with the given base. + If ``False``, no transformation is applied. + If a transformation is applied, the lower and upper bounds are the + lower and upper bounds of the underlying uniform distribution. + """ + + def __init__( + self, + low: float, + high: float, + *, + log: bool | float = False, + ): + super().__init__(log=log) + self._low = low + self._high = high + + def __repr__(self): + log = f", log={self._logbase}" if self._logbase else "" + return f"Uniform(low={self._low}, high={self._high}{log})" + + def _sample(self, shape=None): + return np.random.uniform(low=self._low, high=self._high, size=shape) + + def _pdf(self, x): + return uniform.pdf(x, loc=self._low, scale=self._high - self._low) + + +class Laplace(Distribution): + """A (log-)Laplace distribution. + + :param loc: The location parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param truncation: The truncation limits of the distribution. + :param log: If ``True``, the distribution is transformed to a log-Laplace + distribution. If a float, the distribution is transformed to a + log-Laplace distribution with the given base. + If ``False``, no transformation is applied. + If a transformation is applied, the location and scale parameters + and the truncation limits are the location, scale and truncation limits + of the underlying Laplace distribution. + """ + + def __init__( + self, + loc: float, + scale: float, + truncation: tuple[float, float] | None = None, + log: bool | float = False, + ): + super().__init__(log=log) + self._loc = loc + self._scale = scale + self._truncation = truncation + if truncation is not None: + raise NotImplementedError("Truncation is not yet implemented.") + + def __repr__(self): + trunc = f", truncation={self._truncation}" if self._truncation else "" + log = f", log={self._logbase}" if self._logbase else "" + return f"Laplace(loc={self._loc}, scale={self._scale}{trunc}{log})" + + def _sample(self, shape=None): + return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) + + def _pdf(self, x): + return laplace.pdf(x, loc=self._loc, scale=self._scale) + + @property + def loc(self): + """The location parameter of the underlying distribution.""" + return self._loc + + @property + def scale(self): + """The scale parameter of the underlying distribution.""" + return self._scale diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 8f252988..8875c84f 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -524,7 +524,8 @@ def scale( if scale_str == LOG: return np.log(parameter) if scale_str == LOG10: - return np.log10(parameter) + with np.errstate(divide="ignore"): + return np.log10(parameter) raise ValueError(f"Invalid parameter scaling: {scale_str}") diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 52fec20d..f0f37f75 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -1,5 +1,8 @@ """Functions related to prior handling.""" +from __future__ import annotations + import copy +from typing import Literal import numpy as np import pandas as pd @@ -29,12 +32,221 @@ PARAMETER_SEPARATOR, SIMULATION_CONDITION_ID, TIME, + C, Problem, ) +from .distributions import * +from .parameters import scale, unscale __all__ = ["priors_to_measurements"] +class Prior: + """A PEtab parameter prior. + + Different from the general :class:`Distribution`, this class is used to + represent the prior distribution of a PEtab parameter using the + PEtab-specific options like `parameterScale`, `*PriorType`, + `*PriorParameters`, and `lowerBound` / `upperBounds`. + + :param type_: The type of the distribution. + :param transformation: The transformation to be applied to the sample. + Ignored if `parameter_scale` is `True`. + :param parameters: The parameters of the distribution (unaffected by + `parameter_scale` and `transformation`, but in the case of + `parameterScale*` distribution types, the parameters are assumed to be + on the `parameter_scale` scale). + :param bounds: The untransformed bounds of the sample (lower, upper). + :param transformation: The transformation of the distribution. + """ + + def __init__( + self, + type_: str, + parameters: tuple, + bounds: tuple = None, + transformation: str = C.LIN, + ): + if transformation not in C.PARAMETER_SCALES: + raise ValueError( + f"Unknown parameter transformation: {transformation}" + ) + + if len(parameters) != 2: + raise ValueError( + f"Expected two parameters, got {len(parameters)}: {parameters}" + ) + + if bounds is not None and len(bounds) != 2: + raise ValueError( + "Expected (lowerBound, upperBound), got " + f"{len(bounds)}: {bounds}" + ) + + self._type = type_ + self._parameters = parameters + self._bounds = bounds + self._transformation = transformation + + # create the underlying distribution + match type_, transformation: + case (C.UNIFORM, _) | (C.PARAMETER_SCALE_UNIFORM, C.LIN): + self.distribution = Uniform(*parameters) + case (C.NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LIN): + self.distribution = Normal(*parameters) + case (C.LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LIN): + self.distribution = Laplace(*parameters) + case (C.PARAMETER_SCALE_UNIFORM, C.LOG): + self.distribution = Uniform(*parameters, log=True) + case (C.LOG_NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LOG): + self.distribution = Normal(*parameters, log=True) + case (C.LOG_LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LOG): + self.distribution = Laplace(*parameters, log=True) + case (C.PARAMETER_SCALE_UNIFORM, C.LOG10): + self.distribution = Uniform(*parameters, log=10) + case (C.PARAMETER_SCALE_NORMAL, C.LOG10): + self.distribution = Normal(*parameters, log=10) + case (C.PARAMETER_SCALE_LAPLACE, C.LOG10): + self.distribution = Laplace(*parameters, log=10) + case _: + raise ValueError( + "Unsupported distribution type / transformation: " + f"{type_} / {transformation}" + ) + + def __repr__(self): + return ( + f"{self.__class__.__name__}(" + f"{self.type!r}, {self.parameters!r}," + f" bounds={self.bounds!r}, transformation={self.transformation!r}," + ")" + ) + + @property + def type(self): + return self._type + + @property + def parameters(self): + return self._parameters + + @property + def bounds(self): + return self._bounds + + @property + def transformation(self): + return self._transformation + + def sample(self, shape=None) -> np.ndarray: + """Sample from the distribution. + + :param shape: The shape of the sample. + :return: A sample from the distribution. + """ + raw_sample = self.distribution.sample(shape) + return self._clip_to_bounds(self._scale_sample(raw_sample)) + + def _scale_sample(self, sample): + """Scale the sample to the parameter space""" + # if self.on_parameter_scale: + # return sample + + return scale(sample, self.transformation) + + def _clip_to_bounds(self, x): + """Clip `x` values to bounds. + + :param x: The values to clip. Assumed to be on the parameter scale. + """ + # TODO: replace this by proper truncation + if self.bounds is None: + return x + + return np.maximum( + np.minimum(self.ub_scaled, x), + self.lb_scaled, + ) + + @property + def lb_scaled(self): + """The lower bound on the parameter scale.""" + return scale(self.bounds[0], self.transformation) + + @property + def ub_scaled(self): + """The upper bound on the parameter scale.""" + return scale(self.bounds[1], self.transformation) + + def pdf(self, x): + """Probability density function at x. + + :param x: The value at which to evaluate the PDF. + ``x`` is assumed to be on the parameter scale. + :return: The value of the PDF at ``x``. Note that the PDF does + currently not account for the clipping at the bounds. + """ + x = unscale(x, self.transformation) + + # scale the PDF to the parameter scale + if self.transformation == C.LIN: + coeff = 1 + elif self.transformation == C.LOG10: + coeff = x * np.log(10) + elif self.transformation == C.LOG: + coeff = x + else: + raise ValueError(f"Unknown transformation: {self.transformation}") + + return self.distribution.pdf(x) * coeff + + def neglogprior(self, x): + """Negative log-prior at x. + + :param x: The value at which to evaluate the negative log-prior. + ``x`` is assumed to be on the parameter scale. + :return: The negative log-prior at ``x``. + """ + return -np.log(self.pdf(x)) + + @staticmethod + def from_par_dict( + d, type_=Literal["initialization", "objective"] + ) -> Prior: + """Create a distribution from a row of the parameter table. + + :param d: A dictionary representing a row of the parameter table. + :param type_: The type of the distribution. + :return: A distribution object. + """ + dist_type = d.get(f"{type_}PriorType", C.PARAMETER_SCALE_UNIFORM) + if not isinstance(dist_type, str) and np.isnan(dist_type): + dist_type = C.PARAMETER_SCALE_UNIFORM + + pscale = d.get(C.PARAMETER_SCALE, C.LIN) + if ( + pd.isna(d[f"{type_}PriorParameters"]) + and dist_type == C.PARAMETER_SCALE_UNIFORM + ): + params = ( + scale(d[C.LOWER_BOUND], pscale), + scale(d[C.UPPER_BOUND], pscale), + ) + else: + params = tuple( + map( + float, + d[f"{type_}PriorParameters"].split(C.PARAMETER_SEPARATOR), + ) + ) + return Prior( + type_=dist_type, + parameters=params, + bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]), + transformation=pscale, + ) + + def priors_to_measurements(problem: Problem): """Convert priors to measurements. diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index be154f1c..a046879f 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -5,7 +5,6 @@ import numpy as np import pandas as pd -from . import parameters from .C import * # noqa: F403 __all__ = ["sample_from_prior", "sample_parameter_startpoints"] @@ -24,86 +23,14 @@ def sample_from_prior( Returns: Array with sampled values """ + from .priors import Prior + # unpack info p_type, p_params, scaling, bounds = prior - - # define a function to rescale the sampled points to parameter scale - def scale(x): - if scaling == LIN: - return x - if scaling == LOG: - return np.log(x) - if scaling == LOG10: - return np.log10(x) - raise NotImplementedError( - f"Parameter priors on the parameter scale {scaling} are " - "currently not implemented." - ) - - def clip_to_bounds(x: np.array): - """Clip values in array x to bounds""" - return np.maximum(np.minimum(scale(bounds[1]), x), scale(bounds[0])) - - # define lambda functions for each parameter - if p_type == UNIFORM: - sp = scale( - (p_params[1] - p_params[0]) * np.random.random((n_starts,)) - + p_params[0] - ) - - elif p_type == PARAMETER_SCALE_UNIFORM: - sp = (p_params[1] - p_params[0]) * np.random.random( - (n_starts,) - ) + p_params[0] - - elif p_type == NORMAL: - sp = scale( - np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - - elif p_type == LOG_NORMAL: - sp = scale( - np.exp( - np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - ) - - elif p_type == PARAMETER_SCALE_NORMAL: - sp = np.random.normal( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - - elif p_type == LAPLACE: - sp = scale( - np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - - elif p_type == LOG_LAPLACE: - sp = scale( - np.exp( - np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - ) - ) - - elif p_type == PARAMETER_SCALE_LAPLACE: - sp = np.random.laplace( - loc=p_params[0], scale=p_params[1], size=(n_starts,) - ) - - else: - raise NotImplementedError( - f"Parameter priors of type {prior[0]} are not implemented." - ) - - return clip_to_bounds(sp) + prior = Prior( + p_type, tuple(p_params), bounds=tuple(bounds), transformation=scaling + ) + return prior.sample(shape=(n_starts,)) def sample_parameter_startpoints( @@ -127,14 +54,27 @@ def sample_parameter_startpoints( Array of sampled starting points with dimensions `n_startpoints` x `n_optimization_parameters` """ + from .priors import Prior + if seed is not None: np.random.seed(seed) - # get types and parameters of priors from dataframe - prior_list = parameters.get_priors_from_df( - parameter_df, mode=INITIALIZATION, parameter_ids=parameter_ids - ) + par_to_estimate = parameter_df.loc[parameter_df[ESTIMATE] == 1] - startpoints = [sample_from_prior(prior, n_starts) for prior in prior_list] + if parameter_ids is not None: + try: + par_to_estimate = par_to_estimate.loc[parameter_ids, :] + except KeyError as e: + missing_ids = set(parameter_ids) - set(par_to_estimate.index) + raise KeyError( + "Parameter table does not contain estimated parameter(s) " + f"{missing_ids}." + ) from e - return np.array(startpoints).T + # get types and parameters of priors from dataframe + return np.array( + [ + Prior.from_par_dict(row, type_="initialization").sample(n_starts) + for row in par_to_estimate.to_dict("records") + ] + ).T diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py new file mode 100644 index 00000000..9df830fa --- /dev/null +++ b/tests/v1/test_distributions.py @@ -0,0 +1,87 @@ +import numpy as np +import pytest +from numpy.testing import assert_allclose +from scipy.integrate import cumulative_trapezoid +from scipy.stats import ( + kstest, + laplace, + loglaplace, + lognorm, + loguniform, + norm, + uniform, +) + +from petab.v1.distributions import * +from petab.v2.C import * + + +@pytest.mark.parametrize( + "distribution", + [ + Normal(2, 1), + Normal(2, 1, log=True), + Normal(2, 1, log=10), + Uniform(2, 4), + Uniform(-2, 4, log=True), + Uniform(2, 4, log=10), + Laplace(1, 2), + Laplace(1, 0.5, log=True), + ], +) +def test_sample_matches_pdf(distribution): + """Test that the sample matches the PDF.""" + np.random.seed(1) + N_SAMPLES = 10_000 + sample = distribution.sample(N_SAMPLES) + + def cdf(x): + # pdf -> cdf + return cumulative_trapezoid(distribution.pdf(x), x) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) + + # if p < 0.05: + # import matplotlib.pyplot as plt + # plt.hist(sample, bins=100, density=True) + # x = np.linspace(min(sample), max(sample), 100) + # plt.plot(x, distribution.pdf(x)) + # plt.show() + + assert p > 0.05, (p, distribution) + + # Test samples match scipy CDFs + reference_pdf = None + if isinstance(distribution, Normal) and distribution.logbase is False: + reference_pdf = norm.pdf(sample, distribution.loc, distribution.scale) + elif isinstance(distribution, Uniform) and distribution.logbase is False: + reference_pdf = uniform.pdf( + sample, distribution._low, distribution._high - distribution._low + ) + elif isinstance(distribution, Laplace) and distribution.logbase is False: + reference_pdf = laplace.pdf( + sample, distribution.loc, distribution.scale + ) + elif isinstance(distribution, Normal) and distribution.logbase == np.exp( + 1 + ): + reference_pdf = lognorm.pdf( + sample, scale=np.exp(distribution.loc), s=distribution.scale + ) + elif isinstance(distribution, Uniform) and distribution.logbase == np.exp( + 1 + ): + reference_pdf = loguniform.pdf( + sample, np.exp(distribution._low), np.exp(distribution._high) + ) + elif isinstance(distribution, Laplace) and distribution.logbase == np.exp( + 1 + ): + reference_pdf = loglaplace.pdf( + sample, c=1 / distribution.scale, scale=np.exp(distribution.loc) + ) + if reference_pdf is not None: + assert_allclose( + distribution.pdf(sample), reference_pdf, rtol=1e-10, atol=1e-14 + ) diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index ac07d089..d98879e3 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -1,11 +1,13 @@ from copy import deepcopy +from itertools import product from pathlib import Path import benchmark_models_petab import numpy as np import pandas as pd import pytest -from scipy.stats import norm +from scipy.integrate import cumulative_trapezoid +from scipy.stats import kstest import petab.v1 from petab.v1 import ( @@ -14,10 +16,11 @@ OBJECTIVE_PRIOR_TYPE, OBSERVABLE_ID, SIMULATION, + C, get_simulation_conditions, get_simulation_df, ) -from petab.v1.priors import priors_to_measurements +from petab.v1.priors import Prior, priors_to_measurements @pytest.mark.parametrize( @@ -48,6 +51,13 @@ def test_priors_to_measurements(problem_id): strict=True, ) ) + x_unscaled_dict = dict( + zip( + original_problem.x_free_ids, + original_problem.x_nominal_free, + strict=True, + ) + ) # convert priors to measurements petab_problem_measurements = priors_to_measurements(petab_problem_priors) @@ -110,9 +120,13 @@ def test_priors_to_measurements(problem_id): def apply_parameter_values(row): # apply the parameter values to the observable formula for the prior if row[OBSERVABLE_ID].startswith("prior_"): - row[SIMULATION] = x_scaled_dict[ - row[OBSERVABLE_ID].removeprefix("prior_") - ] + parameter_id = row[OBSERVABLE_ID].removeprefix("prior_") + if original_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_TYPE + ].startswith("parameterScale"): + row[SIMULATION] = x_scaled_dict[parameter_id] + else: + row[SIMULATION] = x_unscaled_dict[parameter_id] return row simulated_prior_observables = simulated_prior_observables.apply( @@ -140,29 +154,61 @@ def apply_parameter_values(row): (petab_problem_priors.parameter_df[ESTIMATE] == 1) & petab_problem_priors.parameter_df[OBJECTIVE_PRIOR_TYPE].notna() ] - priors = petab.v1.get_priors_from_df( - petab_problem_priors.parameter_df, - mode="objective", - parameter_ids=parameter_ids, - ) + priors = [ + Prior.from_par_dict( + petab_problem_priors.parameter_df.loc[par_id], type_="objective" + ) + for par_id in parameter_ids + ] prior_contrib = 0 for parameter_id, prior in zip(parameter_ids, priors, strict=True): - prior_type, prior_pars, par_scale, par_bounds = prior - if prior_type == petab.v1.PARAMETER_SCALE_NORMAL: - prior_contrib += norm.logpdf( - x_scaled_dict[parameter_id], - loc=prior_pars[0], - scale=prior_pars[1], - ) - else: - # enable other models, once libpetab has proper support for - # evaluating the prior contribution. until then, two test - # problems should suffice - assert problem_id == "Raimundez_PCB2020" - pytest.skip(f"Prior type {prior_type} not implemented") + prior_contrib -= prior.neglogprior(x_scaled_dict[parameter_id]) assert np.isclose( - llh_priors + prior_contrib, llh_measurements, rtol=1e-3, atol=1e-16 + llh_priors + prior_contrib, llh_measurements, rtol=1e-8, atol=1e-16 ), (llh_priors + prior_contrib, llh_measurements) # check that the tolerance is not too high assert np.abs(prior_contrib) > 1e-8 * np.abs(llh_priors) + + +cases = list( + product( + [ + (C.NORMAL, (10, 1)), + (C.LOG_NORMAL, (2, 1)), + (C.UNIFORM, (1, 2)), + (C.LAPLACE, (20, 2)), + (C.LOG_LAPLACE, (1, 0.5)), + (C.PARAMETER_SCALE_NORMAL, (1, 1)), + (C.PARAMETER_SCALE_LAPLACE, (1, 2)), + (C.PARAMETER_SCALE_UNIFORM, (1, 2)), + ], + C.PARAMETER_SCALES, + ) +) +ids = [f"{prior_args[0]}_{transform}" for prior_args, transform in cases] + + +@pytest.mark.parametrize("prior_args, transform", cases, ids=ids) +def test_sample_matches_pdf(prior_args, transform): + """Test that the sample matches the PDF.""" + np.random.seed(1) + N_SAMPLES = 10_000 + prior = Prior(*prior_args, transformation=transform) + sample = prior.sample(N_SAMPLES) + + # pdf -> cdf + def cdf(x): + return cumulative_trapezoid(prior.pdf(x), x) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) + + # if p < 0.05: + # import matplotlib.pyplot as plt + # plt.hist(sample, bins=100, density=True) + # x = np.linspace(min(sample), max(sample), 100) + # plt.plot(x, distribution.pdf(x)) + # plt.show() + + assert p > 0.05, (p, prior) From b83e3453a2aa819f4c75efc49667124323b44edb Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 21:14:23 +0100 Subject: [PATCH 013/141] Fix `get_required_parameters_for_parameter_table` (#340) The previous check did not make any sense. See discussion at https://github.com/PEtab-dev/libpetab-python/pull/339#discussion_r1878856556. Also fix a missing import. --- petab/v1/parameters.py | 5 ++--- petab/v2/lint.py | 7 ++----- petab/v2/problem.py | 8 ++++++-- 3 files changed, 10 insertions(+), 10 deletions(-) diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 8875c84f..69957911 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -332,9 +332,8 @@ def append_overrides(overrides): if not model.has_entity_with_id(p): parameter_ids[p] = None - # remove parameters that occur in the condition table and are overridden - # for ALL conditions - for p in condition_df.columns[~condition_df.isnull().any()]: + # parameters that are overridden via the condition table are not allowed + for p in condition_df.columns: try: del parameter_ids[p] except KeyError: diff --git a/petab/v2/lint.py b/petab/v2/lint.py index fdf6de0c..458882f6 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -621,11 +621,8 @@ def append_overrides(overrides): if not problem.model.has_entity_with_id(p) ) - # remove parameters that occur in the condition table and are overridden - # for ALL conditions - for p in problem.condition_df.columns[ - ~problem.condition_df.isnull().any() - ]: + # parameters that are overridden via the condition table are not allowed + for p in problem.condition_df.columns: try: parameter_ids.remove(p) except KeyError: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index b61d8b14..01c84f9b 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -32,7 +32,7 @@ from . import experiments if TYPE_CHECKING: - from ..v2.lint import ValidationIssue, ValidationResultList, ValidationTask + from ..v2.lint import ValidationResultList, ValidationTask __all__ = ["Problem"] @@ -722,7 +722,11 @@ def validate( Returns: A list of validation results. """ - from ..v2.lint import ValidationIssueSeverity, ValidationResultList + from ..v2.lint import ( + ValidationIssue, + ValidationIssueSeverity, + ValidationResultList, + ) validation_results = ValidationResultList() if self.extensions_config: From 540710d267905ba9ab1018cd21c0db541d48f0e0 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 11 Dec 2024 22:02:01 +0100 Subject: [PATCH 014/141] v2: Adapt to long conditions table (#339) Update creation of PEtab problems, validation, conversion from v1 to v2,... to the new long condition table. Many TODOs remain. To be continued after #337. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/__init__.py | 2 + petab/v1/lint.py | 2 +- petab/v1/parameters.py | 4 +- petab/v2/C.py | 3 +- petab/v2/__init__.py | 34 +++-- petab/v2/_helpers.py | 2 + petab/v2/conditions.py | 67 ++++++++++ petab/v2/lint.py | 244 ++++++++++++++++++++++++++++-------- petab/v2/petab1to2.py | 123 ++++++++++++++---- petab/v2/problem.py | 34 +++-- tests/v2/test_conversion.py | 33 +++-- tests/v2/test_problem.py | 19 +-- tox.ini | 2 +- 13 files changed, 456 insertions(+), 113 deletions(-) create mode 100644 petab/v2/_helpers.py create mode 100644 petab/v2/conditions.py diff --git a/petab/v1/__init__.py b/petab/v1/__init__.py index a8609621..cd21b88a 100644 --- a/petab/v1/__init__.py +++ b/petab/v1/__init__.py @@ -4,6 +4,7 @@ """ from ..version import __version__ # noqa: F401, E402 +from . import models # noqa: F401, E402 from .C import * # noqa: F403, F401, E402 from .calculate import * # noqa: F403, F401, E402 from .composite_problem import * # noqa: F403, F401, E402 @@ -13,6 +14,7 @@ from .lint import * # noqa: F403, F401, E402 from .mapping import * # noqa: F403, F401, E402 from .measurements import * # noqa: F403, F401, E402 +from .models import Model # noqa: F401, E402 from .observables import * # noqa: F403, F401, E402 from .parameter_mapping import * # noqa: F403, F401, E402 from .parameters import * # noqa: F403, F401, E402 diff --git a/petab/v1/lint.py b/petab/v1/lint.py index 6f70520b..e970bfde 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -67,7 +67,7 @@ def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """ if missing_cols := set(req_cols) - set(df.columns.values): raise AssertionError( - f"DataFrame {name} requires the columns {missing_cols}." + f"{name.capitalize()} table requires the columns {missing_cols}." ) diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 69957911..20457dcb 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -3,7 +3,7 @@ import numbers import warnings from collections import OrderedDict -from collections.abc import Iterable, Sequence +from collections.abc import Iterable, Sequence, Set from pathlib import Path from typing import ( Literal, @@ -258,7 +258,7 @@ def get_required_parameters_for_parameter_table( observable_df: pd.DataFrame, measurement_df: pd.DataFrame, mapping_df: pd.DataFrame = None, -) -> set[str]: +) -> Set[str]: """ Get set of parameters which need to go into the parameter table diff --git a/petab/v2/C.py b/petab/v2/C.py index 2d55355a..cb095c68 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -136,7 +136,6 @@ # TODO: removed? #: Condition name column in the condition table CONDITION_NAME = "conditionName" - #: Column in the condition table with the ID of an entity that is changed TARGET_ID = "targetId" #: Column in the condition table with the type of value that is changed @@ -166,6 +165,8 @@ TARGET_VALUE, ] +CONDITION_DF_REQUIRED_COLS = CONDITION_DF_COLS + # EXPERIMENTS EXPERIMENT_DF_REQUIRED_COLS = [ EXPERIMENT_ID, diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index ca55f7d0..0525d66c 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -4,17 +4,35 @@ """ from warnings import warn -from ..v1 import * # noqa: F403, F401, E402 -from .experiments import ( # noqa: F401 - get_experiment_df, - write_experiment_df, -) - -# import after v1 -from .problem import Problem # noqa: F401 +# TODO: remove v1 star imports +from ..v1.calculate import * # noqa: F403, F401, E402 +from ..v1.composite_problem import * # noqa: F403, F401, E402 +from ..v1.core import * # noqa: F403, F401, E402 +from ..v1.format_version import __format_version__ # noqa: F401, E402 +from ..v1.mapping import * # noqa: F403, F401, E402 +from ..v1.measurements import * # noqa: F403, F401, E402 +from ..v1.observables import * # noqa: F403, F401, E402 +from ..v1.parameter_mapping import * # noqa: F403, F401, E402 +from ..v1.parameters import * # noqa: F403, F401, E402 +from ..v1.sampling import * # noqa: F403, F401, E402 +from ..v1.sbml import * # noqa: F403, F401, E402 +from ..v1.simulate import * # noqa: F403, F401, E402 +from ..v1.yaml import * # noqa: F403, F401, E402 warn( "Support for PEtab2.0 and all of petab.v2 is experimental " "and subject to changes!", stacklevel=1, ) + +# import after v1 +from ..version import __version__ # noqa: F401, E402 +from . import models # noqa: F401, E402 +from .conditions import * # noqa: F403, F401, E402 +from .experiments import ( # noqa: F401, E402 + get_experiment_df, + write_experiment_df, +) +from .lint import lint_problem # noqa: F401, E402 +from .models import Model # noqa: F401, E402 +from .problem import Problem # noqa: F401, E402 diff --git a/petab/v2/_helpers.py b/petab/v2/_helpers.py new file mode 100644 index 00000000..a7522f35 --- /dev/null +++ b/petab/v2/_helpers.py @@ -0,0 +1,2 @@ +"""Various internal helper functions.""" +from ..v1.core import to_float_if_float # noqa: F401, E402 diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py new file mode 100644 index 00000000..7bb6d262 --- /dev/null +++ b/petab/v2/conditions.py @@ -0,0 +1,67 @@ +"""Functions operating on the PEtab condition table""" +from __future__ import annotations + +from pathlib import Path + +import pandas as pd +import sympy as sp + +from .. import v2 +from ..v1.math import sympify_petab +from .C import * +from .lint import assert_no_leading_trailing_whitespace + +__all__ = [ + "get_condition_df", + "write_condition_df", +] + + +def get_condition_df( + condition_file: str | pd.DataFrame | Path | None, +) -> pd.DataFrame | None: + """Read the provided condition file into a ``pandas.Dataframe``. + + Arguments: + condition_file: File name of PEtab condition file or pandas.Dataframe + """ + if condition_file is None: + return condition_file + + if isinstance(condition_file, str | Path): + condition_file = pd.read_csv( + condition_file, sep="\t", float_precision="round_trip" + ) + + assert_no_leading_trailing_whitespace( + condition_file.columns.values, "condition" + ) + + return condition_file + + +def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: + """Write PEtab condition table + + Arguments: + df: PEtab condition table + filename: Destination file name + """ + df = get_condition_df(df) + df.to_csv(filename, sep="\t", index=False) + + +def get_condition_table_free_symbols(problem: v2.Problem) -> set[sp.Basic]: + """Free symbols from condition table assignments. + + Collects all free symbols from the condition table `targetValue` column. + + :returns: Set of free symbols. + """ + if problem.condition_df is None: + return set() + + free_symbols = set() + for target_value in problem.condition_df[TARGET_VALUE]: + free_symbols |= sympify_petab(target_value).free_symbols + return free_symbols diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 458882f6..76f3cdb6 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -3,6 +3,8 @@ import logging from abc import ABC, abstractmethod +from collections import OrderedDict +from collections.abc import Set from dataclasses import dataclass, field from enum import IntEnum from pathlib import Path @@ -10,9 +12,10 @@ import numpy as np import pandas as pd -from petab.v1.conditions import get_parametric_overrides -from petab.v1.lint import ( +from .. import v2 +from ..v1.lint import ( _check_df, + assert_model_parameters_in_condition_or_parameter_table, assert_no_leading_trailing_whitespace, assert_parameter_bounds_are_numeric, assert_parameter_estimate_is_boolean, @@ -22,25 +25,14 @@ assert_parameter_scale_is_valid, assert_unique_parameter_ids, check_ids, - check_parameter_bounds, -) -from petab.v1.measurements import split_parameter_replacement_list -from petab.v1.observables import get_output_parameters, get_placeholders -from petab.v1.parameters import ( - get_valid_parameters_for_parameter_table, -) -from petab.v1.visualize.lint import validate_visualization_df -from petab.v2 import ( - assert_model_parameters_in_condition_or_parameter_table, -) -from petab.v2.C import * - -from ..v1 import ( - assert_measurement_conditions_present_in_condition_table, - check_condition_df, check_measurement_df, check_observable_df, + check_parameter_bounds, ) +from ..v1.measurements import split_parameter_replacement_list +from ..v1.observables import get_output_parameters, get_placeholders +from ..v1.visualize.lint import validate_visualization_df +from ..v2.C import * from .problem import Problem logger = logging.getLogger(__name__) @@ -247,15 +239,55 @@ def run(self, problem: Problem) -> ValidationIssue | None: try: check_measurement_df(problem.measurement_df, problem.observable_df) - - if problem.condition_df is not None: - # TODO: handle missing condition_df - assert_measurement_conditions_present_in_condition_table( - problem.measurement_df, problem.condition_df - ) except AssertionError as e: return ValidationError(str(e)) + # TODO: introduce some option for validation partial vs full + # problem. if this is supposed to be a complete problem, a missing + # condition table should be an error if the measurement table refers + # to conditions + + # check that measured experiments/conditions exist + # TODO: fully switch to experiment table and remove this: + if SIMULATION_CONDITION_ID in problem.measurement_df: + if problem.condition_df is None: + return + used_conditions = set( + problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values + ) + if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df: + used_conditions |= set( + problem.measurement_df[PREEQUILIBRATION_CONDITION_ID] + .dropna() + .values + ) + available_conditions = set( + problem.condition_df[CONDITION_ID].unique() + ) + if missing_conditions := (used_conditions - available_conditions): + return ValidationError( + "Measurement table references conditions that " + "are not specified in the condition table: " + + str(missing_conditions) + ) + elif EXPERIMENT_ID in problem.measurement_df: + if problem.experiment_df is None: + return + used_experiments = set( + problem.measurement_df[EXPERIMENT_ID].values + ) + available_experiments = set( + problem.condition_df[CONDITION_ID].unique() + ) + if missing_experiments := ( + used_experiments - available_experiments + ): + raise AssertionError( + "Measurement table references experiments that " + "are not specified in the experiments table: " + + str(missing_experiments) + ) + class CheckConditionTable(ValidationTask): """A task to validate the condition table of a PEtab problem.""" @@ -264,16 +296,42 @@ def run(self, problem: Problem) -> ValidationIssue | None: if problem.condition_df is None: return + df = problem.condition_df + try: - check_condition_df( - problem.condition_df, - model=problem.model, - observable_df=problem.observable_df, - mapping_df=problem.mapping_df, - ) + _check_df(df, CONDITION_DF_REQUIRED_COLS, "condition") + check_ids(df[CONDITION_ID], kind="condition") + check_ids(df[TARGET_ID], kind="target") except AssertionError as e: return ValidationError(str(e)) + # TODO: check value types + + if problem.model is None: + return + + # check targets are valid + allowed_targets = set( + problem.model.get_valid_ids_for_condition_table() + ) + if problem.observable_df is not None: + allowed_targets |= set( + get_output_parameters( + model=problem.model, + observable_df=problem.observable_df, + mapping_df=problem.mapping_df, + ) + ) + if problem.mapping_df is not None: + allowed_targets |= set(problem.mapping_df.index.values) + invalid = set(df[TARGET_ID].unique()) - allowed_targets + if invalid: + return ValidationError( + f"Condition table contains invalid targets: {invalid}" + ) + + # TODO check that all value types are valid for the given targets + class CheckObservableTable(ValidationTask): """A task to validate the observable table of a PEtab problem.""" @@ -454,14 +512,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: return required = get_required_parameters_for_parameter_table(problem) - - allowed = get_valid_parameters_for_parameter_table( - model=problem.model, - condition_df=problem.condition_df, - observable_df=problem.observable_df, - measurement_df=problem.measurement_df, - mapping_df=problem.mapping_df, - ) + allowed = get_valid_parameters_for_parameter_table(problem) actual = set(problem.parameter_df.index) missing = required - actual @@ -542,9 +593,103 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -def get_required_parameters_for_parameter_table( +def get_valid_parameters_for_parameter_table( problem: Problem, ) -> set[str]: + """ + Get set of parameters which may be present inside the parameter table + + Arguments: + model: PEtab model + condition_df: PEtab condition table + observable_df: PEtab observable table + measurement_df: PEtab measurement table + mapping_df: PEtab mapping table for additional checks + + Returns: + Set of parameter IDs which PEtab allows to be present in the + parameter table. + """ + # - grab all allowed model parameters + # - grab corresponding names from mapping table + # - grab all output parameters defined in {observable,noise}Formula + # - grab all parameters from measurement table + # - grab all parametric overrides from condition table + # - remove parameters for which condition table columns exist + # - remove placeholder parameters + # (only partial overrides are not supported) + model = problem.model + condition_df = problem.condition_df + observable_df = problem.observable_df + measurement_df = problem.measurement_df + mapping_df = problem.mapping_df + + # must not go into parameter table + blackset = set() + + if observable_df is not None: + placeholders = set(get_placeholders(observable_df)) + + # collect assignment targets + blackset |= placeholders + + if condition_df is not None: + blackset |= set(condition_df.columns.values) - {CONDITION_NAME} + + # don't use sets here, to have deterministic ordering, + # e.g. for creating parameter tables + parameter_ids = OrderedDict.fromkeys( + p + for p in model.get_valid_parameters_for_parameter_table() + if p not in blackset + ) + + if mapping_df is not None: + for from_id, to_id in mapping_df[MODEL_ENTITY_ID].items(): + if to_id in parameter_ids.keys(): + parameter_ids[from_id] = None + + if observable_df is not None: + # add output parameters from observables table + output_parameters = get_output_parameters( + observable_df=observable_df, model=model + ) + for p in output_parameters: + if p not in blackset: + parameter_ids[p] = None + + # Append parameters from measurement table, unless they occur as condition + # table columns + def append_overrides(overrides): + for p in overrides: + if isinstance(p, str) and p not in blackset: + parameter_ids[p] = None + + if measurement_df is not None: + for _, row in measurement_df.iterrows(): + # we trust that the number of overrides matches + append_overrides( + split_parameter_replacement_list( + row.get(OBSERVABLE_PARAMETERS, None) + ) + ) + append_overrides( + split_parameter_replacement_list( + row.get(NOISE_PARAMETERS, None) + ) + ) + + # Append parameter overrides from condition table + if condition_df is not None: + for p in v2.conditions.get_condition_table_free_symbols(problem): + parameter_ids[str(p)] = None + + return set(parameter_ids.keys()) + + +def get_required_parameters_for_parameter_table( + problem: Problem, +) -> Set[str]: """ Get set of parameters which need to go into the parameter table @@ -563,7 +708,11 @@ def append_overrides(overrides): parameter_ids.update( p for p in overrides - if isinstance(p, str) and p not in problem.condition_df.columns + if isinstance(p, str) + and ( + problem.condition_df is None + or p not in problem.condition_df[TARGET_ID] + ) ) for _, row in problem.measurement_df.iterrows(): @@ -616,17 +765,13 @@ def append_overrides(overrides): # Add condition table parametric overrides unless already defined in the # model parameter_ids.update( - p - for p in get_parametric_overrides(problem.condition_df) - if not problem.model.has_entity_with_id(p) + str(p) + for p in v2.conditions.get_condition_table_free_symbols(problem) + if not problem.model.has_entity_with_id(str(p)) ) # parameters that are overridden via the condition table are not allowed - for p in problem.condition_df.columns: - try: - parameter_ids.remove(p) - except KeyError: - pass + parameter_ids -= set(problem.condition_df[TARGET_ID].unique()) return parameter_ids @@ -647,6 +792,7 @@ def append_overrides(overrides): CheckObservablesDoNotShadowModelEntities(), CheckParameterTable(), CheckAllParametersPresentInParameterTable(), - CheckVisualizationTable(), + # TODO: atomize checks, update to long condition table, re-enable + # CheckVisualizationTable(), CheckValidParameterInConditionOrParameterTable(), ] diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 866414c3..d5d06229 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,18 +1,20 @@ """Convert PEtab version 1 problems to version 2.""" import shutil +from contextlib import suppress from itertools import chain from pathlib import Path from urllib.parse import urlparse +import numpy as np +import pandas as pd from pandas.io.common import get_handle, is_url -import petab.v1.C as C +import petab.v1.C from petab.models import MODEL_TYPE_SBML from petab.v1 import Problem as ProblemV1 -from petab.v2.lint import lint_problem as lint_v2_problem from petab.yaml import get_path_prefix -from ..v1 import lint_problem as lint_v1_problem +from .. import v1, v2 from ..v1.yaml import load_yaml, validate, write_yaml from ..versions import get_major_version @@ -61,8 +63,8 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config) - if lint_v1_problem(petab_problem): - raise ValueError("PEtab problem does not pass linting.") + if v1.lint_problem(petab_problem): + raise ValueError("Provided PEtab problem does not pass linting.") # Update YAML file new_yaml_config = _update_yaml(yaml_config) @@ -76,28 +78,55 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): # Update tables # condition tables, observable tables, SBML files, parameter table: # no changes - just copy - file = yaml_config[C.PARAMETER_FILE] + file = yaml_config[v2.C.PARAMETER_FILE] _copy_file(get_src_path(file), Path(get_dest_path(file))) - for problem_config in yaml_config[C.PROBLEMS]: + for problem_config in yaml_config[v2.C.PROBLEMS]: for file in chain( - problem_config.get(C.CONDITION_FILES, []), - problem_config.get(C.OBSERVABLE_FILES, []), + problem_config.get(v2.C.OBSERVABLE_FILES, []), ( - model[C.MODEL_LOCATION] - for model in problem_config.get(C.MODEL_FILES, {}).values() + model[v2.C.MODEL_LOCATION] + for model in problem_config.get(v2.C.MODEL_FILES, {}).values() ), - problem_config.get(C.MEASUREMENT_FILES, []), - problem_config.get(C.VISUALIZATION_FILES, []), + problem_config.get(v2.C.VISUALIZATION_FILES, []), ): _copy_file(get_src_path(file), Path(get_dest_path(file))) + # Update condition table + for condition_file in problem_config.get(v2.C.CONDITION_FILES, []): + condition_df = v1.get_condition_df(get_src_path(condition_file)) + condition_df = v1v2_condition_df(condition_df, petab_problem.model) + v2.write_condition_df(condition_df, get_dest_path(condition_file)) + + for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []): + measurement_df = v1.get_measurement_df( + get_src_path(measurement_file) + ) + if ( + petab_problem.condition_df is not None + and len( + set(petab_problem.condition_df.columns) + - {petab.v1.C.CONDITION_NAME} + ) + == 0 + ): + # can't have "empty" conditions with no overrides in v2 + # TODO: this needs to be done condition wise + measurement_df[v2.C.SIMULATION_CONDITION_ID] = np.nan + if ( + v1.C.PREEQUILIBRATION_CONDITION_ID + in measurement_df.columns + ): + measurement_df[v2.C.PREEQUILIBRATION_CONDITION_ID] = np.nan + v2.write_measurement_df( + measurement_df, get_dest_path(measurement_file) + ) # TODO: Measurements: preequilibration to experiments/timecourses once # finalized ... # validate updated Problem - validation_issues = lint_v2_problem(new_yaml_file) + validation_issues = v2.lint_problem(new_yaml_file) if validation_issues: raise ValueError( @@ -111,23 +140,23 @@ def _update_yaml(yaml_config: dict) -> dict: yaml_config = yaml_config.copy() # Update format_version - yaml_config[C.FORMAT_VERSION] = "2.0.0" + yaml_config[v2.C.FORMAT_VERSION] = "2.0.0" # Add extensions - yaml_config[C.EXTENSIONS] = [] + yaml_config[v2.C.EXTENSIONS] = [] # Move models and set IDs (filename for now) - for problem in yaml_config[C.PROBLEMS]: - problem[C.MODEL_FILES] = {} - models = problem[C.MODEL_FILES] - for sbml_file in problem[C.SBML_FILES]: + for problem in yaml_config[v2.C.PROBLEMS]: + problem[v2.C.MODEL_FILES] = {} + models = problem[v2.C.MODEL_FILES] + for sbml_file in problem[v1.C.SBML_FILES]: model_id = sbml_file.split("/")[-1].split(".")[0] models[model_id] = { - C.MODEL_LANGUAGE: MODEL_TYPE_SBML, - C.MODEL_LOCATION: sbml_file, + v2.C.MODEL_LANGUAGE: MODEL_TYPE_SBML, + v2.C.MODEL_LOCATION: sbml_file, } - problem[C.MODEL_FILES] = problem.get(C.MODEL_FILES, {}) - del problem[C.SBML_FILES] + problem[v2.C.MODEL_FILES] = problem.get(v2.C.MODEL_FILES, {}) + del problem[v1.C.SBML_FILES] return yaml_config @@ -152,3 +181,49 @@ def _copy_file(src: Path | str, dest: Path): return except FileNotFoundError: shutil.copy(str(src), str(dest)) + + +def v1v2_condition_df( + condition_df: pd.DataFrame, model: v1.Model +) -> pd.DataFrame: + """Convert condition table from petab v1 to v2.""" + condition_df = condition_df.copy().reset_index() + with suppress(KeyError): + # TODO: are condition names still supported in v2? + condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True) + + condition_df = condition_df.melt( + id_vars=[v1.C.CONDITION_ID], + var_name=v2.C.TARGET_ID, + value_name=v2.C.TARGET_VALUE, + ) + + if condition_df.empty: + # This happens if there weren't any condition-specific changes + return pd.DataFrame( + columns=[ + v2.C.CONDITION_ID, + v2.C.TARGET_ID, + v2.C.VALUE_TYPE, + v2.C.TARGET_VALUE, + ] + ) + + targets = set(condition_df[v2.C.TARGET_ID].unique()) + valid_cond_pars = set(model.get_valid_parameters_for_parameter_table()) + # entities to which we assign constant values + constant = targets & valid_cond_pars + # entities to which we assign initial values + initial = set() + for target in targets - constant: + if model.is_state_variable(target): + initial.add(target) + else: + raise NotImplementedError( + f"Unable to determine value type {target} in the condition " + "table." + ) + condition_df[v2.C.VALUE_TYPE] = condition_df[v2.C.TARGET_ID].apply( + lambda x: v2.C.VT_INITIAL if x in initial else v2.C.VT_CONSTANT + ) + return condition_df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 01c84f9b..1df2c677 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -4,6 +4,7 @@ import logging import os import tempfile +import traceback import warnings from collections.abc import Sequence from math import nan @@ -15,7 +16,6 @@ from pydantic import AnyUrl, BaseModel, Field from ..v1 import ( - conditions, core, mapping, measurements, @@ -29,7 +29,7 @@ from ..v1.problem import ListOfFiles, VersionNumber from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 -from . import experiments +from . import conditions, experiments if TYPE_CHECKING: from ..v2.lint import ValidationResultList, ValidationTask @@ -745,7 +745,8 @@ def validate( except Exception as e: cur_result = ValidationIssue( ValidationIssueSeverity.CRITICAL, - f"Validation task {task} failed with exception: {e}", + f"Validation task {task} failed with exception: {e}\n" + f"{traceback.format_exc()}", ) if cur_result: @@ -756,20 +757,35 @@ def validate( return validation_results - def add_condition(self, id_: str, name: str = None, **kwargs): + def add_condition( + self, id_: str, name: str = None, **kwargs: tuple[str, Number | str] + ): """Add a simulation condition to the problem. Arguments: id_: The condition id name: The condition name - kwargs: Parameter, value pairs to add to the condition table. + kwargs: Entities to be added to the condition table in the form + `target_id=(value_type, target_value)`. """ - record = {CONDITION_ID: [id_], **kwargs} + if not kwargs: + return + records = [ + { + CONDITION_ID: id_, + TARGET_ID: target_id, + VALUE_TYPE: value_type, + TARGET_VALUE: target_value, + } + for target_id, (value_type, target_value) in kwargs.items() + ] + # TODO: is the condition name supported in v2? if name is not None: - record[CONDITION_NAME] = name - tmp_df = pd.DataFrame(record).set_index([CONDITION_ID]) + for record in records: + record[CONDITION_NAME] = [name] + tmp_df = pd.DataFrame(records) self.condition_df = ( - pd.concat([self.condition_df, tmp_df]) + pd.concat([self.condition_df, tmp_df], ignore_index=True) if self.condition_df is not None else tmp_df ) diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index c309a00e..4b982fcf 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -1,6 +1,8 @@ import logging import tempfile +import pytest + from petab.v2.petab1to2 import petab1to2 @@ -16,19 +18,28 @@ def test_petab1to2_remote(): petab1to2(yaml_url, tmpdirname) -def test_benchmark_collection(): - """Test that we can upgrade all benchmark collection models.""" +try: import benchmark_models_petab + parametrize_or_skip = pytest.mark.parametrize( + "problem_id", benchmark_models_petab.MODELS + ) +except ImportError: + parametrize_or_skip = pytest.mark.skip( + reason="benchmark_models_petab not installed" + ) + + +@parametrize_or_skip +def test_benchmark_collection(problem_id): + """Test that we can upgrade all benchmark collection models.""" logging.basicConfig(level=logging.DEBUG) - for problem_id in benchmark_models_petab.MODELS: - if problem_id == "Lang_PLOSComputBiol2024": - # Does not pass initial linting - continue + if problem_id == "Froehlich_CellSystems2018": + pytest.skip("Too slow. Re-enable once we are faster.") - yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) - with tempfile.TemporaryDirectory( - prefix=f"test_petab1to2_{problem_id}" - ) as tmpdirname: - petab1to2(yaml_path, tmpdirname) + yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) + with tempfile.TemporaryDirectory( + prefix=f"test_petab1to2_{problem_id}" + ) as tmpdirname: + petab1to2(yaml_path, tmpdirname) diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 41ecc238..ba210af0 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -18,7 +18,11 @@ OBSERVABLE_ID, PARAMETER_ID, PETAB_ENTITY_ID, + TARGET_ID, + TARGET_VALUE, UPPER_BOUND, + VALUE_TYPE, + VT_CONSTANT, ) @@ -26,7 +30,7 @@ def test_load_remote(): """Test loading remote files""" yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/main/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml" + "/update_v2/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml" ) petab_problem = Problem.from_yaml(yaml_url) @@ -69,7 +73,7 @@ def test_problem_from_yaml_multiple_files(): for i in (1, 2): problem = Problem() - problem.add_condition(f"condition{i}") + problem.add_condition(f"condition{i}", parameter1=(VT_CONSTANT, i)) petab.write_condition_df( problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) @@ -105,16 +109,17 @@ def test_problem_from_yaml_multiple_files(): def test_modify_problem(): """Test modifying a problem via the API.""" problem = Problem() - problem.add_condition("condition1", parameter1=1) - problem.add_condition("condition2", parameter2=2) + problem.add_condition("condition1", parameter1=(VT_CONSTANT, 1)) + problem.add_condition("condition2", parameter2=(VT_CONSTANT, 2)) exp_condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1", "condition2"], - "parameter1": [1.0, np.nan], - "parameter2": [np.nan, 2.0], + TARGET_ID: ["parameter1", "parameter2"], + VALUE_TYPE: [VT_CONSTANT, VT_CONSTANT], + TARGET_VALUE: [1.0, 2.0], } - ).set_index([CONDITION_ID]) + ) assert_frame_equal( problem.condition_df, exp_condition_df, check_dtype=False ) diff --git a/tox.ini b/tox.ini index d57aa91d..7d0cdccc 100644 --- a/tox.ini +++ b/tox.ini @@ -19,7 +19,7 @@ deps= commands = python -m pip install sympy>=1.12.1 - python -m pytest --cov=petab --cov-report=xml --cov-append \ + python -m pytest --cov=petab --cov-report=xml --cov-append --durations=10 \ tests description = Basic tests From 6fffafb6108387d274d6cbf886b977589dbd660d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 12 Dec 2024 10:40:43 +0100 Subject: [PATCH 015/141] Fix Prior.from_par_dict for missing priorParameters columns (#341) Previously, missing `*PriorParameters` would have resulted in a KeyError. --- petab/v1/priors.py | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/petab/v1/priors.py b/petab/v1/priors.py index f0f37f75..e1263946 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -224,10 +224,8 @@ def from_par_dict( dist_type = C.PARAMETER_SCALE_UNIFORM pscale = d.get(C.PARAMETER_SCALE, C.LIN) - if ( - pd.isna(d[f"{type_}PriorParameters"]) - and dist_type == C.PARAMETER_SCALE_UNIFORM - ): + params = d.get(f"{type_}PriorParameters", None) + if pd.isna(params) and dist_type == C.PARAMETER_SCALE_UNIFORM: params = ( scale(d[C.LOWER_BOUND], pscale), scale(d[C.UPPER_BOUND], pscale), @@ -236,7 +234,7 @@ def from_par_dict( params = tuple( map( float, - d[f"{type_}PriorParameters"].split(C.PARAMETER_SEPARATOR), + params.split(C.PARAMETER_SEPARATOR), ) ) return Prior( From 6d70b2079a99ea792d8ba070d4df81d5e2f93df0 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 16 Dec 2024 12:58:19 +0100 Subject: [PATCH 016/141] Fix petablint v2 warning (#342) Don't show `Support for PEtab2.0 and all of petab.v2 is experimental` warning when validating PEtab v1 problems. --- petab/petablint.py | 5 +++-- petab/versions.py | 27 ++++++++++++++++----------- 2 files changed, 19 insertions(+), 13 deletions(-) diff --git a/petab/petablint.py b/petab/petablint.py index f8228d42..43796c42 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -12,9 +12,8 @@ import petab.v1 as petab from petab.v1.C import FORMAT_VERSION -from petab.v2.lint import lint_problem +from petab.v1.yaml import validate from petab.versions import get_major_version -from petab.yaml import validate logger = logging.getLogger(__name__) @@ -178,6 +177,8 @@ def main(): ret = petab.lint.lint_problem(problem) sys.exit(ret) case 2: + from petab.v2.lint import lint_problem + validation_issues = lint_problem(args.yaml_file_name) if validation_issues: validation_issues.log(logger=logger) diff --git a/petab/versions.py b/petab/versions.py index 2b263aff..93f6a60a 100644 --- a/petab/versions.py +++ b/petab/versions.py @@ -3,10 +3,10 @@ from pathlib import Path +import petab from petab.v1 import Problem as V1Problem from petab.v1.C import FORMAT_VERSION from petab.v1.yaml import load_yaml -from petab.v2 import Problem as V2Problem __all__ = [ "get_major_version", @@ -14,22 +14,27 @@ def get_major_version( - problem: str | dict | Path | V1Problem | V2Problem, + problem: str | dict | Path | V1Problem | petab.v2.Problem, ) -> int: """Get the major version number of the given problem.""" - if isinstance(problem, V1Problem): - return 1 - - if isinstance(problem, V2Problem): - return 2 + version = None if isinstance(problem, str | Path): yaml_config = load_yaml(problem) version = yaml_config.get(FORMAT_VERSION) elif isinstance(problem, dict): version = problem.get(FORMAT_VERSION) - else: - raise ValueError(f"Unsupported argument type: {type(problem)}") - version = str(version) - return int(version.split(".")[0]) + if version is not None: + version = str(version) + return int(version.split(".")[0]) + + if isinstance(problem, V1Problem): + return 1 + + from . import v2 + + if isinstance(problem, v2.Problem): + return 2 + + raise ValueError(f"Unsupported argument type: {type(problem)}") From 1d3fda13a3e7ec213cf0d97ba3d8663d25fdcdcf Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 16 Dec 2024 13:17:52 +0100 Subject: [PATCH 017/141] Doc: fix deprecated petablint invocation (#343) Closes https://github.com/PEtab-dev/PEtab/issues/592. --- doc/example/example_petablint.ipynb | 79 ++++------------------------- 1 file changed, 10 insertions(+), 69 deletions(-) diff --git a/doc/example/example_petablint.ipynb b/doc/example/example_petablint.ipynb index ed20b0d9..6925a433 100644 --- a/doc/example/example_petablint.ipynb +++ b/doc/example/example_petablint.ipynb @@ -16,75 +16,26 @@ }, { "cell_type": "code", - "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "usage: petablint [-h] [-v] [-s SBML_FILE_NAME] [-m MEASUREMENT_FILE_NAME]\r\n", - " [-c CONDITION_FILE_NAME] [-p PARAMETER_FILE_NAME]\r\n", - " [-y YAML_FILE_NAME | -n MODEL_NAME] [-d DIRECTORY]\r\n", - "\r\n", - "Check if a set of files adheres to the PEtab format.\r\n", - "\r\n", - "optional arguments:\r\n", - " -h, --help show this help message and exit\r\n", - " -v, --verbose More verbose output\r\n", - " -s SBML_FILE_NAME, --sbml SBML_FILE_NAME\r\n", - " SBML model filename\r\n", - " -m MEASUREMENT_FILE_NAME, --measurements MEASUREMENT_FILE_NAME\r\n", - " Measurement table\r\n", - " -c CONDITION_FILE_NAME, --conditions CONDITION_FILE_NAME\r\n", - " Conditions table\r\n", - " -p PARAMETER_FILE_NAME, --parameters PARAMETER_FILE_NAME\r\n", - " Parameter table\r\n", - " -y YAML_FILE_NAME, --yaml YAML_FILE_NAME\r\n", - " PEtab YAML problem filename\r\n", - " -n MODEL_NAME, --model-name MODEL_NAME\r\n", - " Model name where all files are in the working\r\n", - " directory and follow PEtab naming convention.\r\n", - " Specifying -[smcp] will override defaults\r\n", - " -d DIRECTORY, --directory DIRECTORY\r\n" - ] - } - ], "source": [ "!petablint -h" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "metadata": {}, - "source": [ - "Let's look at an example: In the example_Fujita folder, we have a PEtab configuration file `Fujita.yaml` telling which files belong to the Fujita model:" - ] + "source": "Let's look at an example: In the `example_Fujita/` directory, we have a PEtab problem configuration file `Fujita.yaml` telling which files belong to the \"Fujita\" problem:" }, { "cell_type": "code", - "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "parameter_file: Fujita_parameters.tsv\r\n", - "petab_version: 0.0.0a17\r\n", - "problems:\r\n", - "- condition_files:\r\n", - " - Fujita_experimentalCondition.tsv\r\n", - " measurement_files:\r\n", - " - Fujita_measurementData.tsv\r\n", - " sbml_files:\r\n", - " - Fujita_model.xml\r\n" - ] - } - ], "source": [ "!cat example_Fujita/Fujita.yaml" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -95,20 +46,10 @@ }, { "cell_type": "code", - "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[0m" - ] - } - ], - "source": [ - "!petablint -y example_Fujita/Fujita.yaml" - ] + "source": "!petablint example_Fujita/Fujita.yaml", + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", From 6a433e0606e56c36f8c82de8bd4a4f6b25fc65e2 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 18 Dec 2024 19:24:05 +0100 Subject: [PATCH 018/141] Support for petab v2 experiments (#332) Add basic support for PEtab version 2 experiments (see also https://github.com/PEtab-dev/PEtab/issues/586, and https://github.com/PEtab-dev/PEtab/pull/581). Follow-up to #334. Partially supersedes #263, which was started before petab.v1/petab.v2 were introduced and before https://github.com/PEtab-dev/PEtab/issues/586. * updates the required fields in the measurement table * updates some validation functions to not expect the old `simulationConditionId`s (but does not do full validation yet) * extends PEtab v1 up-conversion to create a new experiment table. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/calculate.py | 7 +++ petab/v1/problem.py | 8 +-- petab/v2/C.py | 17 ++---- petab/v2/__init__.py | 5 +- petab/v2/lint.py | 129 ++++++++++++++++++++++++++------------- petab/v2/petab1to2.py | 102 ++++++++++++++++++++++++++++--- petab/v2/problem.py | 27 +++----- tests/v2/test_problem.py | 4 +- 8 files changed, 207 insertions(+), 92 deletions(-) diff --git a/petab/v1/calculate.py b/petab/v1/calculate.py index 3cc86f73..32930807 100644 --- a/petab/v1/calculate.py +++ b/petab/v1/calculate.py @@ -97,6 +97,9 @@ def calculate_residuals_for_table( Calculate residuals for a single measurement table. For the arguments, see `calculate_residuals`. """ + # below, we rely on a unique index + measurement_df = measurement_df.reset_index(drop=True) + # create residual df as copy of measurement df, change column residual_df = measurement_df.copy(deep=True).rename( columns={MEASUREMENT: RESIDUAL} @@ -120,6 +123,10 @@ def calculate_residuals_for_table( for col in compared_cols ] mask = reduce(lambda x, y: x & y, masks) + if mask.sum() == 0: + raise ValueError( + f"Could not find simulation for measurement {row}." + ) simulation = simulation_df.loc[mask][SIMULATION].iloc[0] if scale: # apply scaling diff --git a/petab/v1/problem.py b/petab/v1/problem.py index ea300258..6be96c68 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -1149,8 +1149,8 @@ def add_measurement( sim_cond_id: str, time: float, measurement: float, - observable_parameters: Sequence[str] = None, - noise_parameters: Sequence[str] = None, + observable_parameters: Sequence[str | float] = None, + noise_parameters: Sequence[str | float] = None, preeq_cond_id: str = None, ): """Add a measurement to the problem. @@ -1172,11 +1172,11 @@ def add_measurement( } if observable_parameters is not None: record[OBSERVABLE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(observable_parameters) + PARAMETER_SEPARATOR.join(map(str, observable_parameters)) ] if noise_parameters is not None: record[NOISE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(noise_parameters) + PARAMETER_SEPARATOR.join(map(str, noise_parameters)) ] if preeq_cond_id is not None: record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id] diff --git a/petab/v2/C.py b/petab/v2/C.py index cb095c68..1ab6f795 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -13,14 +13,6 @@ #: Experiment ID column in the measurement table EXPERIMENT_ID = "experimentId" -# TODO: remove -#: Preequilibration condition ID column in the measurement table -PREEQUILIBRATION_CONDITION_ID = "preequilibrationConditionId" - -# TODO: remove -#: Simulation condition ID column in the measurement table -SIMULATION_CONDITION_ID = "simulationConditionId" - #: Measurement value column in the measurement table MEASUREMENT = "measurement" @@ -30,6 +22,9 @@ #: Time value that indicates steady-state measurements TIME_STEADY_STATE = _math.inf +#: Time value that indicates pre-equilibration in the experiments table +TIME_PREEQUILIBRATION = -_math.inf + #: Observable parameters column in the measurement table OBSERVABLE_PARAMETERS = "observableParameters" @@ -45,17 +40,13 @@ #: Mandatory columns of measurement table MEASUREMENT_DF_REQUIRED_COLS = [ OBSERVABLE_ID, - # TODO: add - # EXPERIMENT_ID, - SIMULATION_CONDITION_ID, + EXPERIMENT_ID, MEASUREMENT, TIME, ] #: Optional columns of measurement table MEASUREMENT_DF_OPTIONAL_COLS = [ - # TODO: remove - PREEQUILIBRATION_CONDITION_ID, OBSERVABLE_PARAMETERS, NOISE_PARAMETERS, DATASET_ID, diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 0525d66c..adeb0e84 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -27,7 +27,10 @@ # import after v1 from ..version import __version__ # noqa: F401, E402 -from . import models # noqa: F401, E402 +from . import ( # noqa: F401, E402 + C, # noqa: F401, E402 + models, # noqa: F401, E402 +) from .conditions import * # noqa: F403, F401, E402 from .experiments import ( # noqa: F401, E402 get_experiment_df, diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 76f3cdb6..2473c74d 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -15,6 +15,9 @@ from .. import v2 from ..v1.lint import ( _check_df, + assert_measured_observables_defined, + assert_measurements_not_null, + assert_measurements_numeric, assert_model_parameters_in_condition_or_parameter_table, assert_no_leading_trailing_whitespace, assert_parameter_bounds_are_numeric, @@ -23,13 +26,16 @@ assert_parameter_prior_parameters_are_valid, assert_parameter_prior_type_is_valid, assert_parameter_scale_is_valid, + assert_unique_observable_ids, assert_unique_parameter_ids, check_ids, - check_measurement_df, check_observable_df, check_parameter_bounds, ) -from ..v1.measurements import split_parameter_replacement_list +from ..v1.measurements import ( + assert_overrides_match_parameter_count, + split_parameter_replacement_list, +) from ..v1.observables import get_output_parameters, get_placeholders from ..v1.visualize.lint import validate_visualization_df from ..v2.C import * @@ -102,6 +108,23 @@ class ValidationError(ValidationIssue): level: ValidationIssueSeverity = field( default=ValidationIssueSeverity.ERROR, init=False ) + task: str | None = None + + def __post_init__(self): + if self.task is None: + self.task = self._get_task_name() + + def _get_task_name(self): + """Get the name of the ValidationTask that raised this error.""" + import inspect + + # walk up the stack until we find the ValidationTask.run method + for frame_info in inspect.stack(): + frame = frame_info.frame + if "self" in frame.f_locals: + task = frame.f_locals["self"] + if isinstance(task, ValidationTask): + return task.__class__.__name__ class ValidationResultList(list[ValidationIssue]): @@ -237,8 +260,51 @@ def run(self, problem: Problem) -> ValidationIssue | None: if problem.measurement_df is None: return + df = problem.measurement_df try: - check_measurement_df(problem.measurement_df, problem.observable_df) + _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") + + for column_name in MEASUREMENT_DF_REQUIRED_COLS: + if not np.issubdtype(df[column_name].dtype, np.number): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + for column_name in MEASUREMENT_DF_OPTIONAL_COLS: + if column_name in df and not np.issubdtype( + df[column_name].dtype, np.number + ): + assert_no_leading_trailing_whitespace( + df[column_name].values, column_name + ) + + if problem.observable_df is not None: + assert_measured_observables_defined(df, problem.observable_df) + assert_overrides_match_parameter_count( + df, problem.observable_df + ) + + if OBSERVABLE_TRANSFORMATION in problem.observable_df: + # Check for positivity of measurements in case of + # log-transformation + assert_unique_observable_ids(problem.observable_df) + # If the above is not checked, in the following loop + # trafo may become a pandas Series + for measurement, obs_id in zip( + df[MEASUREMENT], df[OBSERVABLE_ID], strict=True + ): + trafo = problem.observable_df.loc[ + obs_id, OBSERVABLE_TRANSFORMATION + ] + if measurement <= 0.0 and trafo in [LOG, LOG10]: + raise ValueError( + "Measurements with observable " + f"transformation {trafo} must be " + f"positive, but {measurement} <= 0." + ) + + assert_measurements_not_null(df) + assert_measurements_numeric(df) except AssertionError as e: return ValidationError(str(e)) @@ -247,46 +313,20 @@ def run(self, problem: Problem) -> ValidationIssue | None: # condition table should be an error if the measurement table refers # to conditions - # check that measured experiments/conditions exist - # TODO: fully switch to experiment table and remove this: - if SIMULATION_CONDITION_ID in problem.measurement_df: - if problem.condition_df is None: - return - used_conditions = set( - problem.measurement_df[SIMULATION_CONDITION_ID].dropna().values - ) - if PREEQUILIBRATION_CONDITION_ID in problem.measurement_df: - used_conditions |= set( - problem.measurement_df[PREEQUILIBRATION_CONDITION_ID] - .dropna() - .values - ) - available_conditions = set( - problem.condition_df[CONDITION_ID].unique() - ) - if missing_conditions := (used_conditions - available_conditions): - return ValidationError( - "Measurement table references conditions that " - "are not specified in the condition table: " - + str(missing_conditions) - ) - elif EXPERIMENT_ID in problem.measurement_df: - if problem.experiment_df is None: - return - used_experiments = set( - problem.measurement_df[EXPERIMENT_ID].values - ) - available_experiments = set( - problem.condition_df[CONDITION_ID].unique() + # check that measured experiments + if problem.experiment_df is None: + return + + used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values) + available_experiments = set( + problem.experiment_df[EXPERIMENT_ID].unique() + ) + if missing_experiments := (used_experiments - available_experiments): + raise AssertionError( + "Measurement table references experiments that " + "are not specified in the experiments table: " + + str(missing_experiments) ) - if missing_experiments := ( - used_experiments - available_experiments - ): - raise AssertionError( - "Measurement table references experiments that " - "are not specified in the experiments table: " - + str(missing_experiments) - ) class CheckConditionTable(ValidationTask): @@ -486,7 +526,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) required_conditions = problem.experiment_df[CONDITION_ID].unique() - existing_conditions = problem.condition_df.index + existing_conditions = problem.condition_df[CONDITION_ID].unique() missing_conditions = set(required_conditions) - set( existing_conditions @@ -771,7 +811,8 @@ def append_overrides(overrides): ) # parameters that are overridden via the condition table are not allowed - parameter_ids -= set(problem.condition_df[TARGET_ID].unique()) + if problem.condition_df is not None: + parameter_ids -= set(problem.condition_df[TARGET_ID].unique()) return parameter_ids diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index d5d06229..78304328 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -4,8 +4,8 @@ from itertools import chain from pathlib import Path from urllib.parse import urlparse +from uuid import uuid4 -import numpy as np import pandas as pd from pandas.io.common import get_handle, is_url @@ -98,10 +98,81 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): condition_df = v1v2_condition_df(condition_df, petab_problem.model) v2.write_condition_df(condition_df, get_dest_path(condition_file)) + # records for the experiment table to be created + experiments = [] + + def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: + if not sim_cond_id and not preeq_cond_id: + return "" + if preeq_cond_id: + preeq_cond_id = f"{preeq_cond_id}_" + exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}" + if exp_id in experiments: # noqa: B023 + i = 1 + while f"{exp_id}_{i}" in experiments: # noqa: B023 + i += 1 + exp_id = f"{exp_id}_{i}" + return exp_id + + measured_experiments = ( + petab_problem.get_simulation_conditions_from_measurement_df() + ) + for ( + _, + row, + ) in measured_experiments.iterrows(): + # generate a new experiment for each simulation / pre-eq condition + # combination + sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID] + preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "") + exp_id = create_experiment_id(sim_cond_id, preeq_cond_id) + if preeq_cond_id: + experiments.append( + { + v2.C.EXPERIMENT_ID: exp_id, + v2.C.CONDITION_ID: preeq_cond_id, + v2.C.TIME: v2.C.TIME_PREEQUILIBRATION, + } + ) + experiments.append( + { + v2.C.EXPERIMENT_ID: exp_id, + v2.C.CONDITION_ID: sim_cond_id, + v2.C.TIME: 0, + } + ) + if experiments: + exp_table_path = output_dir / "experiments.tsv" + if exp_table_path.exists(): + raise ValueError( + f"Experiment table file {exp_table_path} already exists." + ) + problem_config[v2.C.EXPERIMENT_FILES] = [exp_table_path.name] + v2.write_experiment_df( + v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path + ) + for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []): measurement_df = v1.get_measurement_df( get_src_path(measurement_file) ) + # if there is already an experiment ID column, we rename it + if v2.C.EXPERIMENT_ID in measurement_df.columns: + measurement_df.rename( + columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"}, + inplace=True, + ) + # add pre-eq condition id if not present or convert to string + # for simplicity + if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: + measurement_df[ + v1.C.PREEQUILIBRATION_CONDITION_ID + ] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype( + str + ) + else: + measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" + if ( petab_problem.condition_df is not None and len( @@ -110,20 +181,33 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): ) == 0 ): - # can't have "empty" conditions with no overrides in v2 - # TODO: this needs to be done condition wise - measurement_df[v2.C.SIMULATION_CONDITION_ID] = np.nan + # we can't have "empty" conditions with no overrides in v2, + # therefore, we drop the respective condition ID completely + # TODO: or can we? + # TODO: this needs to be checked condition-wise, not globally + measurement_df[v1.C.SIMULATION_CONDITION_ID] = "" if ( v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns ): - measurement_df[v2.C.PREEQUILIBRATION_CONDITION_ID] = np.nan + measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" + # condition IDs to experiment IDs + measurement_df.insert( + 0, + v2.C.EXPERIMENT_ID, + measurement_df.apply( + lambda row: create_experiment_id( + row[v1.C.SIMULATION_CONDITION_ID], + row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""), + ), + axis=1, + ), + ) + del measurement_df[v1.C.SIMULATION_CONDITION_ID] + del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] v2.write_measurement_df( measurement_df, get_dest_path(measurement_file) ) - # TODO: Measurements: preequilibration to experiments/timecourses once - # finalized - ... # validate updated Problem validation_issues = v2.lint_problem(new_yaml_file) @@ -189,7 +273,7 @@ def v1v2_condition_df( """Convert condition table from petab v1 to v2.""" condition_df = condition_df.copy().reset_index() with suppress(KeyError): - # TODO: are condition names still supported in v2? + # conditionName was dropped in PEtab v2 condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True) condition_df = condition_df.melt( diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 1df2c677..f8dad754 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -5,7 +5,6 @@ import os import tempfile import traceback -import warnings from collections.abc import Sequence from math import nan from numbers import Number @@ -92,12 +91,6 @@ def __init__( ValidationTask ] = default_validation_tasks.copy() self.config = config - if self.experiment_df is not None: - warnings.warn( - "The experiment table is not yet supported and " - "will be ignored.", - stacklevel=2, - ) def __str__(self): model = f"with model ({self.model})" if self.model else "without model" @@ -908,47 +901,43 @@ def add_parameter( def add_measurement( self, obs_id: str, - sim_cond_id: str, + experiment_id: str, time: float, measurement: float, - observable_parameters: Sequence[str] = None, - noise_parameters: Sequence[str] = None, - preeq_cond_id: str = None, + observable_parameters: Sequence[str | float] = None, + noise_parameters: Sequence[str | float] = None, ): """Add a measurement to the problem. Arguments: obs_id: The observable ID - sim_cond_id: The simulation condition ID + experiment_id: The experiment ID time: The measurement time measurement: The measurement value observable_parameters: The observable parameters noise_parameters: The noise parameters - preeq_cond_id: The pre-equilibration condition ID """ record = { OBSERVABLE_ID: [obs_id], - SIMULATION_CONDITION_ID: [sim_cond_id], + EXPERIMENT_ID: [experiment_id], TIME: [time], MEASUREMENT: [measurement], } if observable_parameters is not None: record[OBSERVABLE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(observable_parameters) + PARAMETER_SEPARATOR.join(map(str, observable_parameters)) ] if noise_parameters is not None: record[NOISE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(noise_parameters) + PARAMETER_SEPARATOR.join(map(str, noise_parameters)) ] - if preeq_cond_id is not None: - record[PREEQUILIBRATION_CONDITION_ID] = [preeq_cond_id] tmp_df = pd.DataFrame(record) self.measurement_df = ( pd.concat([self.measurement_df, tmp_df]) if self.measurement_df is not None else tmp_df - ) + ).reset_index(drop=True) def add_mapping(self, petab_id: str, model_id: str): """Add a mapping table entry to the problem. diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index ba210af0..dadc3a7c 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -30,7 +30,7 @@ def test_load_remote(): """Test loading remote files""" yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/update_v2/petabtests/cases/v2.0.0/sbml/0001/_0001.yaml" + "/update_v2/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" ) petab_problem = Problem.from_yaml(yaml_url) @@ -83,7 +83,7 @@ def test_problem_from_yaml_multiple_files(): problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") ) - problem.add_measurement(f"observable{i}", f"condition{i}", 1, 1) + problem.add_measurement(f"observable{i}", f"experiment{i}", 1, 1) petab.write_measurement_df( problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") ) From cc33da21fe42723c585fabf78fa28ba54f3f7ac2 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 18 Dec 2024 22:56:16 +0100 Subject: [PATCH 019/141] Fix v2 import Deprecation warning (#346) Fixes this nonsensical warning: > E DeprecationWarning: Accessing `petab.v2` is deprecated and will be removed in the next major release. Please use `petab.v1.v2` instead. --- petab/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/petab/__init__.py b/petab/__init__.py index 81b58729..36110069 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -23,6 +23,8 @@ def __getattr__(name): return attr if name == "v1": return importlib.import_module("petab.v1") + if name == "v2": + return importlib.import_module("petab.v2") if name != "__path__": warn( f"Accessing `petab.{name}` is deprecated and will be removed in " From e7edebf3cd5b1495b7a31f40ef34e4da2268c318 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 18 Dec 2024 23:19:52 +0100 Subject: [PATCH 020/141] petab.calculate: compare all common columns (#347) For computing residuals, ... from measurement + simulation tables, we need to match the corresponding rows. Previously, this was done using a subset of PEtab measurement table columns and checking whether all values in these columns match. This changes it to using the full set of overlapping columns, not only the known measurement columns. With that, the same functions can be used for PEtab v2 measurement/simulation tables. --- petab/v1/calculate.py | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) diff --git a/petab/v1/calculate.py b/petab/v1/calculate.py index 32930807..4c129b88 100644 --- a/petab/v1/calculate.py +++ b/petab/v1/calculate.py @@ -106,10 +106,7 @@ def calculate_residuals_for_table( ) residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") # matching columns - compared_cols = set(MEASUREMENT_DF_COLS) - compared_cols -= {MEASUREMENT} - compared_cols &= set(measurement_df.columns) - compared_cols &= set(simulation_df.columns) + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) # compute noise formulas for observables noise_formulas = get_symbolic_noise_formulas(observable_df) @@ -127,6 +124,16 @@ def calculate_residuals_for_table( raise ValueError( f"Could not find simulation for measurement {row}." ) + # if we have multiple matches, check that the rows are all identical + elif ( + mask.sum() > 1 + and simulation_df.loc[mask].drop_duplicates().shape[0] > 1 + ): + raise ValueError( + f"Multiple different simulations found for measurement " + f"{row}:\n{simulation_df.loc[mask]}" + ) + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] if scale: # apply scaling @@ -343,10 +350,7 @@ def calculate_llh_for_table( llhs = [] # matching columns - compared_cols = set(MEASUREMENT_DF_COLS) - compared_cols -= {MEASUREMENT} - compared_cols &= set(measurement_df.columns) - compared_cols &= set(simulation_df.columns) + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) # compute noise formulas for observables noise_formulas = get_symbolic_noise_formulas(observable_df) From 9f11e7319b44b991fb0dc3315f1f3441ae9ac5fd Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 19 Dec 2024 08:58:31 +0100 Subject: [PATCH 021/141] Problem.add_parameter: Rename kwarg estimated->estimate (#348) Change the `estimated` argument in Problem.add_parameter to `estimate` to match the column name. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/problem.py | 14 +++++--------- petab/v2/problem.py | 14 +++++--------- 2 files changed, 10 insertions(+), 18 deletions(-) diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 6be96c68..5c0dcf61 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -1074,8 +1074,8 @@ def add_observable( def add_parameter( self, id_: str, - estimated: bool | str | int = True, - nominal_value=None, + estimate: bool | str | int = True, + nominal_value: Number | None = None, scale: str = None, lb: Number = None, ub: Number = None, @@ -1089,7 +1089,7 @@ def add_parameter( Arguments: id_: The parameter id - estimated: Whether the parameter is estimated + estimate: Whether the parameter is estimated nominal_value: The nominal value of the parameter scale: The parameter scale lb: The lower bound of the parameter @@ -1104,12 +1104,8 @@ def add_parameter( record = { PARAMETER_ID: [id_], } - if estimated is not None: - record[ESTIMATE] = [ - int(estimated) - if isinstance(estimated, bool | int) - else estimated - ] + if estimate is not None: + record[ESTIMATE] = [int(estimate)] if nominal_value is not None: record[NOMINAL_VALUE] = [nominal_value] if scale is not None: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index f8dad754..79bb6196 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -829,8 +829,8 @@ def add_observable( def add_parameter( self, id_: str, - estimated: bool | str | int = True, - nominal_value=None, + estimate: bool | str | int = True, + nominal_value: Number | None = None, scale: str = None, lb: Number = None, ub: Number = None, @@ -844,7 +844,7 @@ def add_parameter( Arguments: id_: The parameter id - estimated: Whether the parameter is estimated + estimate: Whether the parameter is estimated nominal_value: The nominal value of the parameter scale: The parameter scale lb: The lower bound of the parameter @@ -859,12 +859,8 @@ def add_parameter( record = { PARAMETER_ID: [id_], } - if estimated is not None: - record[ESTIMATE] = [ - int(estimated) - if isinstance(estimated, bool | int) - else estimated - ] + if estimate is not None: + record[ESTIMATE] = [int(estimate)] if nominal_value is not None: record[NOMINAL_VALUE] = [nominal_value] if scale is not None: From 6a9ecd0088a59e842206d800f5013fe8d9e00f1c Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 20 Dec 2024 12:13:35 +0100 Subject: [PATCH 022/141] Fix version checks (#350) Add function for parsing version numbers. Update checks. Fixes #349. --- petab/v1/problem.py | 12 ++++++------ petab/v1/yaml.py | 16 ++++++++++------ petab/v2/problem.py | 10 ++++++---- petab/versions.py | 41 ++++++++++++++++++++++++++++++++++------- tests/test_version.py | 13 +++++++++++++ 5 files changed, 69 insertions(+), 23 deletions(-) create mode 100644 tests/test_version.py diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 5c0dcf61..373b6b47 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -13,10 +13,10 @@ import pandas as pd from pydantic import AnyUrl, BaseModel, Field, RootModel +from ..versions import get_major_version from . import ( conditions, core, - format_version, mapping, measurements, observables, @@ -290,13 +290,13 @@ def get_path(filename): "petab.CompositeProblem.from_yaml() instead." ) - if yaml_config[FORMAT_VERSION] not in {"1", 1, "1.0.0", "2.0.0"}: + major_version = get_major_version(yaml_config) + if major_version not in {1, 2}: raise ValueError( "Provided PEtab files are of unsupported version " - f"{yaml_config[FORMAT_VERSION]}. Expected " - f"{format_version.__format_version__}." + f"{yaml_config[FORMAT_VERSION]}." ) - if yaml_config[FORMAT_VERSION] == "2.0.0": + if major_version == 2: warn("Support for PEtab2.0 is experimental!", stacklevel=2) warn( "Using petab.v1.Problem with PEtab2.0 is deprecated. " @@ -321,7 +321,7 @@ def get_path(filename): if config.parameter_file else None ) - if config.format_version.root in [1, "1", "1.0.0"]: + if major_version == 1: if len(problem0.sbml_files) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index ecffc48e..51159453 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -12,15 +12,15 @@ import yaml from pandas.io.common import get_handle +from ..versions import parse_version from .C import * # noqa: F403 # directory with PEtab yaml schema files SCHEMA_DIR = Path(__file__).parent.parent / "schemas" # map of version number to validation schema SCHEMAS = { - "1": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - "1.0.0": SCHEMA_DIR / "petab_schema.v1.0.0.yaml", - "2.0.0": SCHEMA_DIR / "petab_schema.v2.0.0.yaml", + (1, 0): SCHEMA_DIR / "petab_schema.v1.0.0.yaml", + (2, 0): SCHEMA_DIR / "petab_schema.v2.0.0.yaml", } __all__ = [ @@ -71,14 +71,18 @@ def validate_yaml_syntax( yaml_config = load_yaml(yaml_config) if schema is None: - # try get PEtab version from yaml file + # try to get PEtab version from the yaml file # if this is not the available, the file is not valid anyways, # but let's still use the latest PEtab schema for full validation + version = yaml_config.get(FORMAT_VERSION, None) version = ( - yaml_config.get(FORMAT_VERSION, None) or list(SCHEMAS.values())[-1] + parse_version(version)[:2] + if version + else list(SCHEMAS.values())[-1] ) + try: - schema = SCHEMAS[str(version)] + schema = SCHEMAS[version] except KeyError as e: raise ValueError( "Unknown PEtab version given in problem " diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 79bb6196..bcf38768 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -28,6 +28,7 @@ from ..v1.problem import ListOfFiles, VersionNumber from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 +from ..versions import parse_version from . import conditions, experiments if TYPE_CHECKING: @@ -161,14 +162,15 @@ def get_path(filename): return filename return f"{base_path}/{filename}" - if yaml_config[FORMAT_VERSION] not in {"2.0.0"}: + if (format_version := parse_version(yaml_config[FORMAT_VERSION]))[ + 0 + ] != 2: # If we got a path to a v1 yaml file, try to auto-upgrade from tempfile import TemporaryDirectory - from ..versions import get_major_version from .petab1to2 import petab1to2 - if get_major_version(yaml_config) == 1 and yaml_file: + if format_version[0] == 1 and yaml_file: logging.debug( "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" ) @@ -185,7 +187,7 @@ def get_path(filename): ) raise ValueError( "Provided PEtab files are of unsupported version " - f"{yaml_config[FORMAT_VERSION]}. Expected 2.0.0." + f"{yaml_config[FORMAT_VERSION]}." ) if yaml.is_composite_problem(yaml_config): diff --git a/petab/versions.py b/petab/versions.py index 93f6a60a..e19d0cd0 100644 --- a/petab/versions.py +++ b/petab/versions.py @@ -1,35 +1,62 @@ """Handling of PEtab version numbers.""" from __future__ import annotations +import re from pathlib import Path import petab -from petab.v1 import Problem as V1Problem -from petab.v1.C import FORMAT_VERSION -from petab.v1.yaml import load_yaml __all__ = [ "get_major_version", + "parse_version", ] +from . import v1 + +# version regex pattern +_version_pattern = ( + r"(?P\d+)(?:\.(?P\d+))?" + r"(?:\.(?P\d+))?(?P[\w.]+)?" +) +_version_re = re.compile(_version_pattern) + + +def parse_version(version: str | int) -> tuple[int, int, int, str]: + """Parse a version string into a tuple of integers and suffix.""" + if isinstance(version, int): + return version, 0, 0, "" + + version = str(version) + match = _version_re.match(version) + if match is None: + raise ValueError(f"Invalid version string: {version}") + + major = int(match.group("major")) + minor = int(match.group("minor") or 0) + patch = int(match.group("patch") or 0) + suffix = match.group("suffix") or "" + + return major, minor, patch, suffix def get_major_version( - problem: str | dict | Path | V1Problem | petab.v2.Problem, + problem: str | dict | Path | petab.v1.Problem | petab.v2.Problem, ) -> int: """Get the major version number of the given problem.""" version = None if isinstance(problem, str | Path): + from petab.v1.yaml import load_yaml + yaml_config = load_yaml(problem) - version = yaml_config.get(FORMAT_VERSION) + version = yaml_config.get(v1.C.FORMAT_VERSION) elif isinstance(problem, dict): - version = problem.get(FORMAT_VERSION) + version = problem.get(v1.C.FORMAT_VERSION) if version is not None: version = str(version) return int(version.split(".")[0]) - if isinstance(problem, V1Problem): + if isinstance(problem, petab.v1.Problem): return 1 from . import v2 diff --git a/tests/test_version.py b/tests/test_version.py new file mode 100644 index 00000000..757a9b50 --- /dev/null +++ b/tests/test_version.py @@ -0,0 +1,13 @@ +"""Tests related to petab.versions""" + +from petab.versions import * + + +def test_parse_version(): + assert parse_version("1.2.3") == (1, 2, 3, "") + assert parse_version("1.2.3a") == (1, 2, 3, "a") + assert parse_version("1.2") == (1, 2, 0, "") + assert parse_version("1") == (1, 0, 0, "") + assert parse_version(1) == (1, 0, 0, "") + assert parse_version("1.2.3.a") == (1, 2, 3, ".a") + assert parse_version("1.2.3.4") == (1, 2, 3, ".4") From 6e762c6ca0d1dbaca0cf3614b797204f9767f73e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 20 Dec 2024 15:03:56 +0100 Subject: [PATCH 023/141] Create output directories in write_* (#352) --- petab/v1/conditions.py | 4 +++- petab/v1/core.py | 4 +++- petab/v1/mapping.py | 4 +++- petab/v1/measurements.py | 4 +++- petab/v1/observables.py | 4 +++- petab/v1/parameters.py | 4 +++- petab/v1/yaml.py | 4 +++- petab/v2/experiments.py | 4 +++- 8 files changed, 24 insertions(+), 8 deletions(-) diff --git a/petab/v1/conditions.py b/petab/v1/conditions.py index 4e691d62..5dc46565 100644 --- a/petab/v1/conditions.py +++ b/petab/v1/conditions.py @@ -60,9 +60,11 @@ def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab condition table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_condition_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) diff --git a/petab/v1/core.py b/petab/v1/core.py index 10274b8c..3b4a4082 100644 --- a/petab/v1/core.py +++ b/petab/v1/core.py @@ -60,8 +60,10 @@ def write_simulation_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab simulation table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=False) diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py index bae9d5fb..813cf7d3 100644 --- a/petab/v1/mapping.py +++ b/petab/v1/mapping.py @@ -60,9 +60,11 @@ def write_mapping_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab mapping table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_mapping_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py index 757ce9ce..f78511df 100644 --- a/petab/v1/measurements.py +++ b/petab/v1/measurements.py @@ -58,9 +58,11 @@ def write_measurement_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab measurement table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_measurement_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=False) diff --git a/petab/v1/observables.py b/petab/v1/observables.py index 1485302d..411c2a4c 100644 --- a/petab/v1/observables.py +++ b/petab/v1/observables.py @@ -67,9 +67,11 @@ def write_observable_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab observable table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_observable_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index 20457dcb..c2f37f62 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -112,9 +112,11 @@ def write_parameter_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab parameter table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_parameter_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=True) diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index 51159453..ac134d5b 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -238,8 +238,10 @@ def write_yaml(yaml_config: dict[str, Any], filename: str | Path) -> None: Arguments: yaml_config: Data to write - filename: File to create + filename: Destination file name. The parent directory will be created + if necessary. """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) with open(filename, "w") as outfile: yaml.dump( yaml_config, outfile, default_flow_style=False, sort_keys=False diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py index 7833fa1f..17137b5c 100644 --- a/petab/v2/experiments.py +++ b/petab/v2/experiments.py @@ -34,7 +34,9 @@ def write_experiment_df(df: pd.DataFrame, filename: str | Path) -> None: Arguments: df: PEtab experiments table - filename: Destination file name + filename: Destination file name. The parent directory will be created + if necessary. """ df = get_experiment_df(df) + Path(filename).parent.mkdir(parents=True, exist_ok=True) df.to_csv(filename, sep="\t", index=False) From ec584630c30392f422079e6c0e7323d6269b0fe2 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 20 Dec 2024 15:12:12 +0100 Subject: [PATCH 024/141] v2: fixes to validation and upconversion (#351) * don't create unnecessary experiment tables * write yaml file *after* updating the config dict * check for missing experiments * handle anonymous experiments --- petab/v2/lint.py | 38 +++++++++++++++++++++++----------- petab/v2/petab1to2.py | 48 ++++++++++++++++++++++++++++--------------- 2 files changed, 57 insertions(+), 29 deletions(-) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2473c74d..c5cf5eb9 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -228,11 +228,16 @@ class CheckValidPetabIdColumn(ValidationTask): """A task to check that a given column contains only valid PEtab IDs.""" def __init__( - self, table_name: str, column_name: str, required_column: bool = True + self, + table_name: str, + column_name: str, + required_column: bool = True, + ignore_nan: bool = False, ): self.table_name = table_name self.column_name = column_name self.required_column = required_column + self.ignore_nan = ignore_nan def run(self, problem: Problem) -> ValidationIssue | None: df = getattr(problem, f"{self.table_name}_df") @@ -248,7 +253,10 @@ def run(self, problem: Problem) -> ValidationIssue | None: return try: - check_ids(df[self.column_name].values, kind=self.column_name) + ids = df[self.column_name].values + if self.ignore_nan: + ids = ids[~pd.isna(ids)] + check_ids(ids, kind=self.column_name) except ValueError as e: return ValidationError(str(e)) @@ -308,21 +316,26 @@ def run(self, problem: Problem) -> ValidationIssue | None: except AssertionError as e: return ValidationError(str(e)) - # TODO: introduce some option for validation partial vs full + # TODO: introduce some option for validation of partial vs full # problem. if this is supposed to be a complete problem, a missing # condition table should be an error if the measurement table refers - # to conditions - - # check that measured experiments - if problem.experiment_df is None: - return - + # to conditions, otherwise it should maximally be a warning used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values) - available_experiments = set( - problem.experiment_df[EXPERIMENT_ID].unique() + # handle default-experiment + used_experiments = set( + filter( + lambda x: not pd.isna(x), + used_experiments, + ) + ) + # check that measured experiments exist + available_experiments = ( + set(problem.experiment_df[EXPERIMENT_ID].unique()) + if problem.experiment_df is not None + else set() ) if missing_experiments := (used_experiments - available_experiments): - raise AssertionError( + return ValidationError( "Measurement table references experiments that " "are not specified in the experiments table: " + str(missing_experiments) @@ -826,6 +839,7 @@ def append_overrides(overrides): CheckMeasurementTable(), CheckConditionTable(), CheckExperimentTable(), + CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True), CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), CheckValidPetabIdColumn("experiment", CONDITION_ID), CheckExperimentConditionsExist(), diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 78304328..a5690882 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -9,14 +9,11 @@ import pandas as pd from pandas.io.common import get_handle, is_url -import petab.v1.C -from petab.models import MODEL_TYPE_SBML -from petab.v1 import Problem as ProblemV1 -from petab.yaml import get_path_prefix - from .. import v1, v2 -from ..v1.yaml import load_yaml, validate, write_yaml +from ..v1 import Problem as ProblemV1 +from ..v1.yaml import get_path_prefix, load_yaml, validate, write_yaml from ..versions import get_major_version +from .models import MODEL_TYPE_SBML __all__ = ["petab1to2"] @@ -63,18 +60,18 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config) + # get rid of conditionName column if present (unsupported in v2) + petab_problem.condition_df = petab_problem.condition_df.drop( + columns=[v1.C.CONDITION_NAME], errors="ignore" + ) if v1.lint_problem(petab_problem): raise ValueError("Provided PEtab problem does not pass linting.") + output_dir = Path(output_dir) + # Update YAML file new_yaml_config = _update_yaml(yaml_config) - # Write new YAML file - output_dir = Path(output_dir) - output_dir.mkdir(parents=True, exist_ok=True) - new_yaml_file = output_dir / Path(yaml_file).name - write_yaml(new_yaml_config, new_yaml_file) - # Update tables # condition tables, observable tables, SBML files, parameter table: # no changes - just copy @@ -104,6 +101,19 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: if not sim_cond_id and not preeq_cond_id: return "" + # check whether the conditions will exist in the v2 condition table + sim_cond_exists = ( + petab_problem.condition_df.loc[sim_cond_id].notna().any() + ) + preeq_cond_exists = ( + preeq_cond_id + and petab_problem.condition_df.loc[preeq_cond_id].notna().any() + ) + if not sim_cond_exists and not preeq_cond_exists: + # if we have only all-NaN conditions, we don't create a new + # experiment + return "" + if preeq_cond_id: preeq_cond_id = f"{preeq_cond_id}_" exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}" @@ -126,6 +136,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID] preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "") exp_id = create_experiment_id(sim_cond_id, preeq_cond_id) + if not exp_id: + continue if preeq_cond_id: experiments.append( { @@ -165,10 +177,8 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: # add pre-eq condition id if not present or convert to string # for simplicity if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: - measurement_df[ - v1.C.PREEQUILIBRATION_CONDITION_ID - ] = measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID].astype( - str + measurement_df.fillna( + {v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True ) else: measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" @@ -177,7 +187,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: petab_problem.condition_df is not None and len( set(petab_problem.condition_df.columns) - - {petab.v1.C.CONDITION_NAME} + - {v1.C.CONDITION_NAME} ) == 0 ): @@ -209,6 +219,10 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: measurement_df, get_dest_path(measurement_file) ) + # Write new YAML file + new_yaml_file = output_dir / Path(yaml_file).name + write_yaml(new_yaml_config, new_yaml_file) + # validate updated Problem validation_issues = v2.lint_problem(new_yaml_file) From 89004e4be134fedb54385a8d850e738392c4896c Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 10 Mar 2025 14:40:49 +0100 Subject: [PATCH 025/141] GHA: Test with Python3.13 --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 620d12cd..a719b3ef 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.10", "3.12"] + python-version: ["3.10", "3.13"] runs-on: ${{ matrix.platform }} steps: From ab5ad9f7c88ccb0e1f0bae0b54eff8c3e70edcb9 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 10 Mar 2025 14:51:11 +0100 Subject: [PATCH 026/141] GHA: fixup PyPI URL --- .github/workflows/deploy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 2b00c08f..46d280d4 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -9,7 +9,7 @@ jobs: runs-on: ubuntu-latest environment: name: pypi - url: https://pypi.org/p/sbmlmath + url: https://pypi.org/p/petab permissions: id-token: write From cdaaaf27249e96597906055ef9c117b17c0d2d0d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 10 Mar 2025 15:12:58 +0100 Subject: [PATCH 027/141] Update/use ProblemConfig (#353) * Update/use ProblemConfig * add to_yaml * refactor model, avoid extra elements in object tree * use ProblemConfig in petab1to2 * review * format_version_tuple --- petab/v1/problem.py | 43 +++++++++++++++++-------------------------- petab/v2/petab1to2.py | 26 ++++++++++++-------------- petab/v2/problem.py | 32 ++++++++++++++++++++++++-------- 3 files changed, 53 insertions(+), 48 deletions(-) diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 373b6b47..91bbcd64 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -11,7 +11,7 @@ from warnings import warn import pandas as pd -from pydantic import AnyUrl, BaseModel, Field, RootModel +from pydantic import AnyUrl, BaseModel, Field from ..versions import get_major_version from . import ( @@ -1185,33 +1185,14 @@ def add_measurement( ) -class VersionNumber(RootModel): - root: str | int - - -class ListOfFiles(RootModel): - """List of files.""" - - root: list[str | AnyUrl] = Field(..., description="List of files.") - - def __iter__(self): - return iter(self.root) - - def __len__(self): - return len(self.root) - - def __getitem__(self, index): - return self.root[index] - - class SubProblem(BaseModel): """A `problems` object in the PEtab problem configuration.""" - sbml_files: ListOfFiles = [] - measurement_files: ListOfFiles = [] - condition_files: ListOfFiles = [] - observable_files: ListOfFiles = [] - visualization_files: ListOfFiles = [] + sbml_files: list[str | AnyUrl] = [] + measurement_files: list[str | AnyUrl] = [] + condition_files: list[str | AnyUrl] = [] + observable_files: list[str | AnyUrl] = [] + visualization_files: list[str | AnyUrl] = [] class ProblemConfig(BaseModel): @@ -1227,6 +1208,16 @@ class ProblemConfig(BaseModel): description="The base path to resolve relative paths.", exclude=True, ) - format_version: VersionNumber = 1 + format_version: str | int = 1 parameter_file: str | AnyUrl | None = None problems: list[SubProblem] = [] + + def to_yaml(self, filename: str | Path): + """Write the configuration to a YAML file. + + :param filename: Destination file name. The parent directory will be + created if necessary. + """ + from .yaml import write_yaml + + write_yaml(self.model_dump(), filename) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index a5690882..0d48d9cf 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -10,10 +10,10 @@ from pandas.io.common import get_handle, is_url from .. import v1, v2 -from ..v1 import Problem as ProblemV1 -from ..v1.yaml import get_path_prefix, load_yaml, validate, write_yaml +from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version from .models import MODEL_TYPE_SBML +from .problem import ProblemConfig __all__ = ["petab1to2"] @@ -59,7 +59,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): validate(yaml_config, path_prefix=path_prefix) if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") - petab_problem = ProblemV1.from_yaml(yaml_file or yaml_config) + petab_problem = v1.Problem.from_yaml(yaml_file or yaml_config) # get rid of conditionName column if present (unsupported in v2) petab_problem.condition_df = petab_problem.condition_df.drop( columns=[v1.C.CONDITION_NAME], errors="ignore" @@ -71,6 +71,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): # Update YAML file new_yaml_config = _update_yaml(yaml_config) + new_yaml_config = ProblemConfig(**new_yaml_config) # Update tables # condition tables, observable tables, SBML files, parameter table: @@ -78,19 +79,16 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): file = yaml_config[v2.C.PARAMETER_FILE] _copy_file(get_src_path(file), Path(get_dest_path(file))) - for problem_config in yaml_config[v2.C.PROBLEMS]: + for problem_config in new_yaml_config.problems: for file in chain( - problem_config.get(v2.C.OBSERVABLE_FILES, []), - ( - model[v2.C.MODEL_LOCATION] - for model in problem_config.get(v2.C.MODEL_FILES, {}).values() - ), - problem_config.get(v2.C.VISUALIZATION_FILES, []), + problem_config.observable_files, + (model.location for model in problem_config.model_files.values()), + problem_config.visualization_files, ): _copy_file(get_src_path(file), Path(get_dest_path(file))) # Update condition table - for condition_file in problem_config.get(v2.C.CONDITION_FILES, []): + for condition_file in problem_config.condition_files: condition_df = v1.get_condition_df(get_src_path(condition_file)) condition_df = v1v2_condition_df(condition_df, petab_problem.model) v2.write_condition_df(condition_df, get_dest_path(condition_file)) @@ -159,12 +157,12 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: raise ValueError( f"Experiment table file {exp_table_path} already exists." ) - problem_config[v2.C.EXPERIMENT_FILES] = [exp_table_path.name] + problem_config.experiment_files.append("experiments.tsv") v2.write_experiment_df( v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path ) - for measurement_file in problem_config.get(v2.C.MEASUREMENT_FILES, []): + for measurement_file in problem_config.measurement_files: measurement_df = v1.get_measurement_df( get_src_path(measurement_file) ) @@ -221,7 +219,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: # Write new YAML file new_yaml_file = output_dir / Path(yaml_file).name - write_yaml(new_yaml_config, new_yaml_file) + new_yaml_config.to_yaml(new_yaml_file) # validate updated Problem validation_issues = v2.lint_problem(new_yaml_file) diff --git a/petab/v2/problem.py b/petab/v2/problem.py index bcf38768..d07c7f2e 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -25,7 +25,6 @@ yaml, ) from ..v1.models.model import Model, model_factory -from ..v1.problem import ListOfFiles, VersionNumber from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 from ..versions import parse_version @@ -994,13 +993,14 @@ class ModelFile(BaseModel): class SubProblem(BaseModel): """A `problems` object in the PEtab problem configuration.""" + # TODO: consider changing str to Path model_files: dict[str, ModelFile] | None = {} - measurement_files: ListOfFiles = [] - condition_files: ListOfFiles = [] - experiment_files: ListOfFiles = [] - observable_files: ListOfFiles = [] - visualization_files: ListOfFiles = [] - mapping_files: ListOfFiles = [] + measurement_files: list[str | AnyUrl] = [] + condition_files: list[str | AnyUrl] = [] + experiment_files: list[str | AnyUrl] = [] + observable_files: list[str | AnyUrl] = [] + visualization_files: list[str | AnyUrl] = [] + mapping_files: list[str | AnyUrl] = [] class ExtensionConfig(BaseModel): @@ -1024,7 +1024,23 @@ class ProblemConfig(BaseModel): description="The base path to resolve relative paths.", exclude=True, ) - format_version: VersionNumber = "2.0.0" + format_version: str = "2.0.0" parameter_file: str | AnyUrl | None = None problems: list[SubProblem] = [] extensions: list[ExtensionConfig] = [] + + def to_yaml(self, filename: str | Path): + """Write the configuration to a YAML file. + + :param filename: Destination file name. The parent directory will be + created if necessary. + """ + from ..v1.yaml import write_yaml + + write_yaml(self.model_dump(), filename) + + @property + def format_version_tuple(self) -> tuple[int, int, int, str]: + """The format version as a tuple of major/minor/patch `int`s and a + suffix.""" + return parse_version(self.format_version) From 98b011502671f66edff043f5eb187c8e2198b240 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 12 Mar 2025 13:48:53 +0100 Subject: [PATCH 028/141] pre-commit update, re-ruff (#357) --- .pre-commit-config.yaml | 4 +- doc/example/distributions.ipynb | 150 ++++++++++-------- petab/__init__.py | 1 + petab/petablint.py | 6 +- petab/v1/C.py | 1 + petab/v1/composite_problem.py | 1 + petab/v1/core.py | 1 + petab/v1/distributions.py | 1 + petab/v1/format_version.py | 1 + petab/v1/lint.py | 12 +- petab/v1/mapping.py | 1 + petab/v1/math/SympyVisitor.py | 1 + petab/v1/math/__init__.py | 1 + petab/v1/models/__init__.py | 1 + petab/v1/models/model.py | 10 +- petab/v1/models/pysb_model.py | 4 +- petab/v1/models/sbml_model.py | 1 + petab/v1/parameter_mapping.py | 6 +- petab/v1/parameters.py | 2 +- petab/v1/priors.py | 17 +- petab/v1/problem.py | 5 +- petab/v1/sbml.py | 6 +- petab/v1/simplify.py | 1 + petab/v1/simulate.py | 1 + petab/v1/visualize/__init__.py | 1 + petab/v1/visualize/cli.py | 1 + petab/v1/visualize/helper_functions.py | 1 - petab/v1/visualize/lint.py | 1 + .../v1/visualize/plot_data_and_simulation.py | 5 +- petab/v1/visualize/plot_residuals.py | 1 + petab/v1/visualize/plotter.py | 1 + petab/v1/visualize/plotting.py | 15 +- petab/v1/yaml.py | 1 + petab/v2/C.py | 1 + petab/v2/__init__.py | 1 + petab/v2/_helpers.py | 1 + petab/v2/conditions.py | 1 + petab/v2/experiments.py | 1 + petab/v2/lint.py | 1 + petab/v2/models/__init__.py | 1 + petab/v2/models/model.py | 1 + petab/v2/models/pysb_model.py | 1 + petab/v2/models/sbml_model.py | 1 + petab/v2/petab1to2.py | 1 + petab/v2/problem.py | 7 +- petab/version.py | 1 + petab/versions.py | 1 + tests/v1/math/test_math.py | 12 +- tests/v1/test_calculate.py | 2 +- tests/v1/test_combine.py | 1 + tests/v1/test_conditions.py | 1 + tests/v1/test_deprecated.py | 1 + tests/v1/test_lint.py | 6 +- tests/v1/test_measurements.py | 1 + tests/v1/test_model_pysb.py | 1 + tests/v1/test_observables.py | 1 + tests/v1/test_parameters.py | 1 + tests/v1/test_petab.py | 6 +- tests/v1/test_sbml.py | 6 +- tests/v1/test_simplify.py | 1 + tests/v1/test_simulate.py | 1 + tests/v1/test_yaml.py | 1 + tests/v2/test_experiments.py | 1 + tests/v2/test_mapping.py | 1 + 64 files changed, 191 insertions(+), 135 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index d32e5b68..4ba9c6fb 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -2,7 +2,7 @@ # See https://pre-commit.com/hooks.html for more hooks repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.4.0 + rev: v5.0.0 hooks: - id: check-added-large-files - id: check-merge-conflict @@ -12,7 +12,7 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit # Ruff version. - rev: v0.1.11 + rev: v0.9.10 hooks: # Run the linter. - id: ruff diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index 86235fe1..7776ef8d 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -1,8 +1,9 @@ { "cells": [ { - "metadata": {}, "cell_type": "markdown", + "id": "372289411a2aa7b3", + "metadata": {}, "source": [ "# Prior distributions in PEtab\n", "\n", @@ -18,14 +19,16 @@ "* *Initialization priors* can be used as a hint for the optimization algorithm. They will not enter the objective function. They are specified in the `initializationPriorType` and `initializationPriorParameters` columns of the parameter table.\n", "\n", "\n" - ], - "id": "372289411a2aa7b3" + ] }, { + "cell_type": "code", + "execution_count": null, + "id": "initial_id", "metadata": { "collapsed": true }, - "cell_type": "code", + "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", @@ -45,143 +48,156 @@ " sample = prior.sample(10000)\n", "\n", " # pdf\n", - " xmin = min(sample.min(), prior.lb_scaled if prior.bounds is not None else sample.min())\n", - " xmax = max(sample.max(), prior.ub_scaled if prior.bounds is not None else sample.max())\n", + " xmin = min(\n", + " sample.min(),\n", + " prior.lb_scaled if prior.bounds is not None else sample.min(),\n", + " )\n", + " xmax = max(\n", + " sample.max(),\n", + " prior.ub_scaled if prior.bounds is not None else sample.max(),\n", + " )\n", " x = np.linspace(xmin, xmax, 500)\n", " y = prior.pdf(x)\n", - " ax.plot(x, y, color='red', label='pdf')\n", + " ax.plot(x, y, color=\"red\", label=\"pdf\")\n", "\n", - " sns.histplot(sample, stat='density', ax=ax, label=\"sample\")\n", + " sns.histplot(sample, stat=\"density\", ax=ax, label=\"sample\")\n", "\n", " # bounds\n", " if prior.bounds is not None:\n", " for bound in (prior.lb_scaled, prior.ub_scaled):\n", " if bound is not None and np.isfinite(bound):\n", - " ax.axvline(bound, color='black', linestyle='--', label='bound')\n", + " ax.axvline(bound, color=\"black\", linestyle=\"--\", label=\"bound\")\n", "\n", " ax.set_title(str(prior))\n", - " ax.set_xlabel('Parameter value on the parameter scale')\n", + " ax.set_xlabel(\"Parameter value on the parameter scale\")\n", " ax.grid(False)\n", " handles, labels = ax.get_legend_handles_labels()\n", - " unique_labels = dict(zip(labels, handles))\n", + " unique_labels = dict(zip(labels, handles, strict=False))\n", " ax.legend(unique_labels.values(), unique_labels.keys())\n", " plt.show()" - ], - "id": "initial_id", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n", - "id": "db36a4a93622ccb8" + "id": "db36a4a93622ccb8", + "metadata": {}, + "source": "The basic distributions are the uniform, normal, Laplace, log-normal, and log-laplace distributions:\n" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "4f09e50a3db06d9f", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(UNIFORM, (0, 1)))\n", "plot(Prior(NORMAL, (0, 1)))\n", "plot(Prior(LAPLACE, (0, 1)))\n", "plot(Prior(LOG_NORMAL, (0, 1)))\n", "plot(Prior(LOG_LAPLACE, (1, 0.5)))" - ], - "id": "4f09e50a3db06d9f", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "If a parameter scale is specified (`parameterScale=lin|log|log10` not a `parameterScale*`-type distribution), the sample is transformed accordingly (but not the distribution parameters):\n", - "id": "dab4b2d1e0f312d8" + "id": "dab4b2d1e0f312d8", + "metadata": {}, + "source": "If a parameter scale is specified (`parameterScale=lin|log|log10` not a `parameterScale*`-type distribution), the sample is transformed accordingly (but not the distribution parameters):\n" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "f6192c226f179ef9", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(NORMAL, (10, 2), transformation=LIN))\n", "plot(Prior(NORMAL, (10, 2), transformation=LOG))\n", "\n", - "# Note that the log-normal distribution is different from a log-transformed normal distribution:\n", + "# Note that the log-normal distribution is different\n", + "# from a log-transformed normal distribution:\n", "plot(Prior(LOG_NORMAL, (10, 2), transformation=LIN))" - ], - "id": "f6192c226f179ef9", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "On the log-transformed parameter scale, `Log*` and `parameterScale*` distributions are equivalent:", - "id": "4281ed48859e6431" + "id": "4281ed48859e6431", + "metadata": {}, + "source": "On the log-transformed parameter scale, `Log*` and `parameterScale*` distributions are equivalent:" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "34c95268e8921070", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(LOG_NORMAL, (10, 2), transformation=LOG))\n", "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 2)))" - ], - "id": "34c95268e8921070", - "outputs": [], - "execution_count": null + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "Prior distributions can also be defined on the parameter scale by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, 1) the distribution parameter are interpreted on the transformed parameter scale, and 2) a sample from the given distribution is used directly, without applying any transformation according to `parameterScale` (this implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`):", - "id": "263c9fd31156a4d5" + "id": "263c9fd31156a4d5", + "metadata": {}, + "source": "Prior distributions can also be defined on the parameter scale by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, 1) the distribution parameter are interpreted on the transformed parameter scale, and 2) a sample from the given distribution is used directly, without applying any transformation according to `parameterScale` (this implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`):" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "5ca940bc24312fc6", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(UNIFORM, (0.01, 2), transformation=LOG10))\n", "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LOG10))\n", "\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LIN))\n", - "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))\n" - ], - "id": "5ca940bc24312fc6", - "outputs": [], - "execution_count": null + "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))" + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "To prevent the sampled parameters from exceeding the bounds, the sampled parameters are clipped to the bounds. The bounds are defined in the parameter table. Note that the current implementation does not support sampling from a truncated distribution. Instead, the samples are clipped to the bounds. This may introduce unwanted bias, and thus, should only be used with caution (i.e., the bounds should be chosen wide enough):", - "id": "b1a8b17d765db826" + "id": "b1a8b17d765db826", + "metadata": {}, + "source": "To prevent the sampled parameters from exceeding the bounds, the sampled parameters are clipped to the bounds. The bounds are defined in the parameter table. Note that the current implementation does not support sampling from a truncated distribution. Instead, the samples are clipped to the bounds. This may introduce unwanted bias, and thus, should only be used with caution (i.e., the bounds should be chosen wide enough):" }, { - "metadata": {}, "cell_type": "code", - "source": [ - "plot(Prior(NORMAL, (0, 1), bounds=(-4, 4))) # negligible clipping-bias at 4 sigma\n", - "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9))) # significant clipping-bias" - ], + "execution_count": null, "id": "4ac42b1eed759bdd", + "metadata": {}, "outputs": [], - "execution_count": null + "source": [ + "plot(\n", + " Prior(NORMAL, (0, 1), bounds=(-4, 4))\n", + ") # negligible clipping-bias at 4 sigma\n", + "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9))) # significant clipping-bias" + ] }, { - "metadata": {}, "cell_type": "markdown", - "source": "Further distribution examples:", - "id": "45ffce1341483f24" + "id": "45ffce1341483f24", + "metadata": {}, + "source": "Further distribution examples:" }, { - "metadata": {}, "cell_type": "code", + "execution_count": null, + "id": "581e1ac431860419", + "metadata": {}, + "outputs": [], "source": [ "plot(Prior(NORMAL, (10, 1), bounds=(6, 14), transformation=\"log10\"))\n", - "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 1), bounds=(10**6, 10**14), transformation=\"log10\"))\n", + "plot(\n", + " Prior(\n", + " PARAMETER_SCALE_NORMAL,\n", + " (10, 1),\n", + " bounds=(10**6, 10**14),\n", + " transformation=\"log10\",\n", + " )\n", + ")\n", "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))" - ], - "id": "581e1ac431860419", - "outputs": [], - "execution_count": null + ] } ], "metadata": { diff --git a/petab/__init__.py b/petab/__init__.py index 36110069..dd30d186 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -8,6 +8,7 @@ PEtab should use for operations that can be performed in parallel. By default, all operations are performed sequentially. """ + import importlib import sys from functools import partial diff --git a/petab/petablint.py b/petab/petablint.py index 43796c42..244b7536 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -161,7 +161,7 @@ def main(): validate(args.yaml_file_name) except SchemaValidationError as e: logger.error( - "Provided YAML file does not adhere to PEtab " f"schema: {e}" + f"Provided YAML file does not adhere to PEtab schema: {e}" ) sys.exit(1) @@ -205,9 +205,7 @@ def main(): if args.parameter_file_name: logger.debug(f"\tParameter table: {args.parameter_file_name}") if args.visualization_file_name: - logger.debug( - "\tVisualization table: " f"{args.visualization_file_name}" - ) + logger.debug(f"\tVisualization table: {args.visualization_file_name}") try: problem = petab.Problem.from_files( diff --git a/petab/v1/C.py b/petab/v1/C.py index be044a5c..0c9310b2 100644 --- a/petab/v1/C.py +++ b/petab/v1/C.py @@ -2,6 +2,7 @@ """ This file contains constant definitions. """ + import math as _math import sys diff --git a/petab/v1/composite_problem.py b/petab/v1/composite_problem.py index 5f07d523..f887ec03 100644 --- a/petab/v1/composite_problem.py +++ b/petab/v1/composite_problem.py @@ -1,4 +1,5 @@ """PEtab problems consisting of multiple models""" + import os import pandas as pd diff --git a/petab/v1/core.py b/petab/v1/core.py index 3b4a4082..1149c67e 100644 --- a/petab/v1/core.py +++ b/petab/v1/core.py @@ -1,4 +1,5 @@ """PEtab core functions (or functions that don't fit anywhere else)""" + import logging import os import re diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 418f5b44..23deb423 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -1,4 +1,5 @@ """Probability distributions used by PEtab.""" + from __future__ import annotations import abc diff --git a/petab/v1/format_version.py b/petab/v1/format_version.py index a8d63484..f303237e 100644 --- a/petab/v1/format_version.py +++ b/petab/v1/format_version.py @@ -1,2 +1,3 @@ """PEtab file format version""" + __format_version__ = 1 diff --git a/petab/v1/lint.py b/petab/v1/lint.py index e970bfde..b2260b83 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -556,7 +556,7 @@ def check_parameter_bounds(parameter_df: pd.DataFrame) -> None: ] in [LOG, LOG10]: raise AssertionError( f"Bounds for {row[PARAMETER_SCALE]} scaled parameter " - f"{ row.name} must be positive." + f"{row.name} must be positive." ) if ( row.get(PARAMETER_SCALE, LIN) in [LOG, LOG10] @@ -586,8 +586,7 @@ def assert_parameter_prior_type_is_valid(parameter_df: pd.DataFrame) -> None: for _, row in parameter_df.iterrows(): if row[col] not in PRIOR_TYPES and not core.is_empty(row[col]): raise AssertionError( - f"{col} must be one of {PRIOR_TYPES} but is " - f"'{row[col]}'." + f"{col} must be one of {PRIOR_TYPES} but is '{row[col]}'." ) @@ -945,7 +944,7 @@ def lint_problem(problem: "petab.Problem") -> bool: for obs_id in problem.observable_df.index: if problem.model.has_entity_with_id(obs_id): logger.error( - f"Observable ID {obs_id} shadows model " "entity." + f"Observable ID {obs_id} shadows model entity." ) errors_occurred = True else: @@ -1002,8 +1001,7 @@ def lint_problem(problem: "petab.Problem") -> bool: or problem.observable_df is None ): logger.warning( - "Not all files of the PEtab problem definition could " - "be checked." + "Not all files of the PEtab problem definition could be checked." ) else: logger.info("PEtab format check completed successfully.") @@ -1213,7 +1211,7 @@ def check_ids(ids: Iterable[str], kind: str = "") -> None: offset = 2 error_output = "\n".join( [ - f"Line {index+offset}: " + f"Line {index + offset}: " + ("Missing ID" if pd.isna(_id) else _id) for index, _id in invalids ] diff --git a/petab/v1/mapping.py b/petab/v1/mapping.py index 813cf7d3..81f77017 100644 --- a/petab/v1/mapping.py +++ b/petab/v1/mapping.py @@ -1,4 +1,5 @@ """Functionality related to the PEtab entity mapping table""" + # TODO: Move to petab.v2.mapping from pathlib import Path diff --git a/petab/v1/math/SympyVisitor.py b/petab/v1/math/SympyVisitor.py index 016e872c..b8154301 100644 --- a/petab/v1/math/SympyVisitor.py +++ b/petab/v1/math/SympyVisitor.py @@ -1,4 +1,5 @@ """PEtab-math to sympy conversion.""" + import sympy as sp from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue diff --git a/petab/v1/math/__init__.py b/petab/v1/math/__init__.py index 27ebacd2..b9a4f59b 100644 --- a/petab/v1/math/__init__.py +++ b/petab/v1/math/__init__.py @@ -1,2 +1,3 @@ """Functions for parsing and evaluating mathematical expressions.""" + from .sympify import sympify_petab # noqa: F401 diff --git a/petab/v1/models/__init__.py b/petab/v1/models/__init__.py index 938f55fb..a35ad432 100644 --- a/petab/v1/models/__init__.py +++ b/petab/v1/models/__init__.py @@ -1,4 +1,5 @@ """Handling of different model types supported by PEtab.""" + #: SBML model type as used in a PEtab v2 yaml file as `language`. MODEL_TYPE_SBML = "sbml" #: PySB model type as used in a PEtab v2 yaml file as `language`. diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py index 795c7f0b..e25ca0b2 100644 --- a/petab/v1/models/model.py +++ b/petab/v1/models/model.py @@ -1,4 +1,5 @@ """PEtab model abstraction""" + from __future__ import annotations import abc @@ -13,8 +14,7 @@ class Model(abc.ABC): """Base class for wrappers for any PEtab-supported model type""" @abc.abstractmethod - def __init__(self): - ... + def __init__(self): ... def __repr__(self): return f"<{self.__class__.__name__} {self.model_id!r}>" @@ -41,13 +41,11 @@ def to_file(self, filename: [str, Path]): @classmethod @property @abc.abstractmethod - def type_id(cls): - ... + def type_id(cls): ... @property @abc.abstractmethod - def model_id(self): - ... + def model_id(self): ... @abc.abstractmethod def get_parameter_value(self, id_: str) -> float: diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py index f0147990..0b69d797 100644 --- a/petab/v1/models/pysb_model.py +++ b/petab/v1/models/pysb_model.py @@ -192,7 +192,7 @@ def parse_species_name( match = complex_constituent_pattern.match(complex_constituent) if not match: raise ValueError( - f"Invalid species name: '{name}' " f"('{complex_constituent}')" + f"Invalid species name: '{name}' ('{complex_constituent}')" ) monomer = match.groupdict()["monomer"] site_config_str = match.groupdict()["site_config"] @@ -208,7 +208,7 @@ def parse_species_name( elif config.startswith("'"): if not config.endswith("'"): raise ValueError( - f"Invalid species name: '{name}' " f"('{config}')" + f"Invalid species name: '{name}' ('{config}')" ) # strip quotes config = config[1:-1] diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index 55cd7b4d..8e8cf498 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -1,4 +1,5 @@ """Functions for handling SBML models""" + from __future__ import annotations import itertools diff --git a/petab/v1/parameter_mapping.py b/petab/v1/parameter_mapping.py index 014b4a8e..06e31fe4 100644 --- a/petab/v1/parameter_mapping.py +++ b/petab/v1/parameter_mapping.py @@ -133,7 +133,7 @@ def get_optimization_to_simulation_parameter_mapping( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) @@ -383,7 +383,7 @@ def get_parameter_mapping_for_condition( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) @@ -495,7 +495,7 @@ def _apply_overrides_for_observable( overrides: list of overrides for noise or observable parameters """ for i, override in enumerate(overrides): - overridee_id = f"{override_type}Parameter{i+1}_{observable_id}" + overridee_id = f"{override_type}Parameter{i + 1}_{observable_id}" mapping[overridee_id] = override diff --git a/petab/v1/parameters.py b/petab/v1/parameters.py index c2f37f62..82e23669 100644 --- a/petab/v1/parameters.py +++ b/petab/v1/parameters.py @@ -201,7 +201,7 @@ def create_parameter_df( if model: raise ValueError( - "Arguments `model` and `sbml_model` are " "mutually exclusive." + "Arguments `model` and `sbml_model` are mutually exclusive." ) model = SbmlModel(sbml_model=sbml_model) if include_optional: diff --git a/petab/v1/priors.py b/petab/v1/priors.py index e1263946..1d2b9802 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -1,4 +1,5 @@ """Functions related to prior handling.""" + from __future__ import annotations import copy @@ -384,17 +385,17 @@ def scaled_observable_formula(parameter_id, parameter_scale): ].iloc[0], } if PREEQUILIBRATION_CONDITION_ID in new_problem.measurement_df: - new_measurement[ - PREEQUILIBRATION_CONDITION_ID - ] = new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ - 0 - ] + new_measurement[PREEQUILIBRATION_CONDITION_ID] = ( + new_problem.measurement_df[PREEQUILIBRATION_CONDITION_ID].iloc[ + 0 + ] + ) new_measurement_dicts.append(new_measurement) # remove prior from parameter table - new_problem.parameter_df.loc[ - parameter_id, OBJECTIVE_PRIOR_TYPE - ] = np.nan + new_problem.parameter_df.loc[parameter_id, OBJECTIVE_PRIOR_TYPE] = ( + np.nan + ) new_problem.parameter_df.loc[ parameter_id, OBJECTIVE_PRIOR_PARAMETERS ] = np.nan diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 91bbcd64..2405f5c0 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -1,4 +1,5 @@ """PEtab Problem class""" + from __future__ import annotations import os @@ -123,7 +124,7 @@ def __getattr__(self, name): if name in {"sbml_model", "sbml_reader", "sbml_document"}: return getattr(self.model, name) if self.model else None raise AttributeError( - f"'{self.__class__.__name__}' object has no " f"attribute '{name}'" + f"'{self.__class__.__name__}' object has no attribute '{name}'" ) def __setattr__(self, name, value): @@ -486,7 +487,7 @@ def to_files_generic( if self.model: if not isinstance(self.model, SbmlModel): raise NotImplementedError( - "Saving non-SBML models is " "currently not supported." + "Saving non-SBML models is currently not supported." ) filenames["model_file"] = "model.xml" diff --git a/petab/v1/sbml.py b/petab/v1/sbml.py index 6395e41b..9e5549d2 100644 --- a/petab/v1/sbml.py +++ b/petab/v1/sbml.py @@ -258,9 +258,9 @@ def get_model_for_condition( condition_dict = {petab.SIMULATION_CONDITION_ID: sim_condition_id} if preeq_condition_id: - condition_dict[ - petab.PREEQUILIBRATION_CONDITION_ID - ] = preeq_condition_id + condition_dict[petab.PREEQUILIBRATION_CONDITION_ID] = ( + preeq_condition_id + ) cur_measurement_df = petab.measurements.get_rows_for_condition( measurement_df=petab_problem.measurement_df, condition=condition_dict, diff --git a/petab/v1/simplify.py b/petab/v1/simplify.py index c4cdeb91..78c039b1 100644 --- a/petab/v1/simplify.py +++ b/petab/v1/simplify.py @@ -1,4 +1,5 @@ """Functionality for simplifying PEtab problems""" + from math import nan import pandas as pd diff --git a/petab/v1/simulate.py b/petab/v1/simulate.py index 682c470f..334929ad 100644 --- a/petab/v1/simulate.py +++ b/petab/v1/simulate.py @@ -1,4 +1,5 @@ """PEtab simulator base class and related functions.""" + from __future__ import annotations import abc diff --git a/petab/v1/visualize/__init__.py b/petab/v1/visualize/__init__.py index 924be86a..15385697 100644 --- a/petab/v1/visualize/__init__.py +++ b/petab/v1/visualize/__init__.py @@ -6,6 +6,7 @@ ``import petab.visualize``. """ + # ruff: noqa: F401 import importlib.util diff --git a/petab/v1/visualize/cli.py b/petab/v1/visualize/cli.py index 72074936..1416cae0 100644 --- a/petab/v1/visualize/cli.py +++ b/petab/v1/visualize/cli.py @@ -1,4 +1,5 @@ """Command-line interface for visualization.""" + import argparse from pathlib import Path diff --git a/petab/v1/visualize/helper_functions.py b/petab/v1/visualize/helper_functions.py index b1a6f1b1..85b5d936 100644 --- a/petab/v1/visualize/helper_functions.py +++ b/petab/v1/visualize/helper_functions.py @@ -4,7 +4,6 @@ hence not be directly visible/usable when using `import petab.visualize`. """ - import pandas as pd from ..C import * diff --git a/petab/v1/visualize/lint.py b/petab/v1/visualize/lint.py index b5de74bc..29ea4f7d 100644 --- a/petab/v1/visualize/lint.py +++ b/petab/v1/visualize/lint.py @@ -1,4 +1,5 @@ """Validation of PEtab visualization files""" + from __future__ import annotations import logging diff --git a/petab/v1/visualize/plot_data_and_simulation.py b/petab/v1/visualize/plot_data_and_simulation.py index c76bcd43..5ca8c6fb 100644 --- a/petab/v1/visualize/plot_data_and_simulation.py +++ b/petab/v1/visualize/plot_data_and_simulation.py @@ -2,7 +2,6 @@ the same format. """ - import matplotlib.pyplot as plt import pandas as pd @@ -73,7 +72,7 @@ def plot_with_vis_spec( plotter = MPLPlotter(figure, dataprovider) else: raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." + "Currently, only visualization with matplotlib is possible." ) return plotter.generate_figure(subplot_dir, format_=format_) @@ -150,7 +149,7 @@ def plot_without_vis_spec( plotter = MPLPlotter(figure, dataprovider) else: raise NotImplementedError( - "Currently, only visualization with " "matplotlib is possible." + "Currently, only visualization with matplotlib is possible." ) return plotter.generate_figure(subplot_dir, format_=format_) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 90298154..a45fcde3 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -1,6 +1,7 @@ """ Functions for plotting residuals. """ + from pathlib import Path import matplotlib diff --git a/petab/v1/visualize/plotter.py b/petab/v1/visualize/plotter.py index 2a1eaaa9..14af5650 100644 --- a/petab/v1/visualize/plotter.py +++ b/petab/v1/visualize/plotter.py @@ -1,4 +1,5 @@ """PEtab visualization plotter classes""" + import os from abc import ABC, abstractmethod diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py index b607350b..e474c4c8 100644 --- a/petab/v1/visualize/plotting.py +++ b/petab/v1/visualize/plotting.py @@ -1,4 +1,5 @@ """PEtab visualization data selection and visualization settings classes""" + import warnings from numbers import Number, Real from pathlib import Path @@ -609,9 +610,9 @@ def get_data_series( isinstance(tmp_noise, Number) or tmp_noise.dtype == "float64" ): - measurements_to_plot.at[ - var_cond_id, "noise_model" - ] = tmp_noise + measurements_to_plot.at[var_cond_id, "noise_model"] = ( + tmp_noise + ) # standard error of mean measurements_to_plot.at[var_cond_id, "sem"] = np.std( @@ -619,9 +620,9 @@ def get_data_series( ) / np.sqrt(len(data_measurements)) # single replicates - measurements_to_plot.at[ - var_cond_id, "repl" - ] = data_measurements.values + measurements_to_plot.at[var_cond_id, "repl"] = ( + data_measurements.values + ) data_series = DataSeries(conditions_, measurements_to_plot) data_series.add_offsets(dataplot.xOffset, dataplot.yOffset) @@ -964,7 +965,7 @@ def _get_vis_spec_dependent_columns_dict( # get number of plots and create plotId-lists plot_id_column = [ - "plot%s" % str(ind + 1) + f"plot{ind + 1}" for ind, inner_list in enumerate(dataset_id_list) for _ in inner_list ] diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index ac134d5b..b8330028 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -1,4 +1,5 @@ """Code regarding the PEtab YAML config files""" + from __future__ import annotations import os diff --git a/petab/v2/C.py b/petab/v2/C.py index 1ab6f795..0a406fac 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -2,6 +2,7 @@ """ This file contains constant definitions. """ + import math as _math import sys diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index adeb0e84..4d147828 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -2,6 +2,7 @@ Contains all functionality related to handling PEtab 2.0 problems. """ + from warnings import warn # TODO: remove v1 star imports diff --git a/petab/v2/_helpers.py b/petab/v2/_helpers.py index a7522f35..3201769a 100644 --- a/petab/v2/_helpers.py +++ b/petab/v2/_helpers.py @@ -1,2 +1,3 @@ """Various internal helper functions.""" + from ..v1.core import to_float_if_float # noqa: F401, E402 diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 7bb6d262..8d5a3067 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -1,4 +1,5 @@ """Functions operating on the PEtab condition table""" + from __future__ import annotations from pathlib import Path diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py index 17137b5c..9837b953 100644 --- a/petab/v2/experiments.py +++ b/petab/v2/experiments.py @@ -1,4 +1,5 @@ """Functions operating on the PEtab experiments table.""" + from pathlib import Path import pandas as pd diff --git a/petab/v2/lint.py b/petab/v2/lint.py index c5cf5eb9..2deb0ebd 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -1,4 +1,5 @@ """Validation of PEtab problems""" + from __future__ import annotations import logging diff --git a/petab/v2/models/__init__.py b/petab/v2/models/__init__.py index a387c27b..79ec7639 100644 --- a/petab/v2/models/__init__.py +++ b/petab/v2/models/__init__.py @@ -1,2 +1,3 @@ """Handling of different model types supported by PEtab.""" + from ...v1.models import * # noqa: F401, F403 diff --git a/petab/v2/models/model.py b/petab/v2/models/model.py index 403a03e2..345247eb 100644 --- a/petab/v2/models/model.py +++ b/petab/v2/models/model.py @@ -1,2 +1,3 @@ """PEtab model abstraction""" + from ...v1.models.model import * # noqa: F401, F403 diff --git a/petab/v2/models/pysb_model.py b/petab/v2/models/pysb_model.py index 111c9864..4da866e7 100644 --- a/petab/v2/models/pysb_model.py +++ b/petab/v2/models/pysb_model.py @@ -1,2 +1,3 @@ """Functions for handling PySB models""" + from ...v1.models.pysb_model import * # noqa: F401, F403 diff --git a/petab/v2/models/sbml_model.py b/petab/v2/models/sbml_model.py index 2a0eadc7..b696ce31 100644 --- a/petab/v2/models/sbml_model.py +++ b/petab/v2/models/sbml_model.py @@ -1,2 +1,3 @@ """Functions for handling SBML models""" + from ...v1.models.sbml_model import * # noqa: F401, F403 diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 0d48d9cf..dc1b2b8c 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,4 +1,5 @@ """Convert PEtab version 1 problems to version 2.""" + import shutil from contextlib import suppress from itertools import chain diff --git a/petab/v2/problem.py b/petab/v2/problem.py index d07c7f2e..32684d0b 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -1,4 +1,5 @@ """PEtab v2 problems.""" + from __future__ import annotations import logging @@ -87,9 +88,9 @@ def __init__( self.mapping_df: pd.DataFrame | None = mapping_df self.model: Model | None = model self.extensions_config = extensions_config or {} - self.validation_tasks: list[ - ValidationTask - ] = default_validation_tasks.copy() + self.validation_tasks: list[ValidationTask] = ( + default_validation_tasks.copy() + ) self.config = config def __str__(self): diff --git a/petab/version.py b/petab/version.py index c59cab99..ca57250b 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,2 +1,3 @@ """PEtab library version""" + __version__ = "0.5.0" diff --git a/petab/versions.py b/petab/versions.py index e19d0cd0..b1fdecf4 100644 --- a/petab/versions.py +++ b/petab/versions.py @@ -1,4 +1,5 @@ """Handling of PEtab version numbers.""" + from __future__ import annotations import re diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 4b350d4e..828aac88 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -61,13 +61,13 @@ def test_parse_cases(expr_str, expected): else: try: result = float(result.evalf()) - assert np.isclose( - result, expected - ), f"{expr_str}: Expected {expected}, got {result}" + assert np.isclose(result, expected), ( + f"{expr_str}: Expected {expected}, got {result}" + ) except TypeError: - assert ( - result == expected - ), f"{expr_str}: Expected {expected}, got {result}" + assert result == expected, ( + f"{expr_str}: Expected {expected}, got {result}" + ) def test_ids(): diff --git a/tests/v1/test_calculate.py b/tests/v1/test_calculate.py index ca93c33a..526ea7c9 100644 --- a/tests/v1/test_calculate.py +++ b/tests/v1/test_calculate.py @@ -15,7 +15,7 @@ def model_simple(): - "Simple model." "" + "Simple model." measurement_df = pd.DataFrame( data={ OBSERVABLE_ID: ["obs_a", "obs_a", "obs_b", "obs_b"], diff --git a/tests/v1/test_combine.py b/tests/v1/test_combine.py index 4fca4105..e685bf2b 100644 --- a/tests/v1/test_combine.py +++ b/tests/v1/test_combine.py @@ -1,4 +1,5 @@ """Test COMBINE archive""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_conditions.py b/tests/v1/test_conditions.py index b240241d..45059ba1 100644 --- a/tests/v1/test_conditions.py +++ b/tests/v1/test_conditions.py @@ -1,4 +1,5 @@ """Tests related to petab.conditions""" + import os import tempfile from pathlib import Path diff --git a/tests/v1/test_deprecated.py b/tests/v1/test_deprecated.py index b78e7856..ef96f2e9 100644 --- a/tests/v1/test_deprecated.py +++ b/tests/v1/test_deprecated.py @@ -1,4 +1,5 @@ """Check that deprecated functionality raises but still works.""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_lint.py b/tests/v1/test_lint.py index d75bdcea..4ad2e9b1 100644 --- a/tests/v1/test_lint.py +++ b/tests/v1/test_lint.py @@ -202,9 +202,9 @@ def test_assert_overrides_match_parameter_count(): # 3 observable parameters given, 2 expected measurement_df = measurement_df_orig.copy() - measurement_df.loc[ - 1, OBSERVABLE_PARAMETERS - ] = "override1;override2;oneTooMuch" + measurement_df.loc[1, OBSERVABLE_PARAMETERS] = ( + "override1;override2;oneTooMuch" + ) with pytest.raises(AssertionError): petab.assert_overrides_match_parameter_count( measurement_df, observable_df diff --git a/tests/v1/test_measurements.py b/tests/v1/test_measurements.py index ac3e59a3..10f5ba98 100644 --- a/tests/v1/test_measurements.py +++ b/tests/v1/test_measurements.py @@ -1,4 +1,5 @@ """Tests related to petab.measurements""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_model_pysb.py b/tests/v1/test_model_pysb.py index 922dab2f..57371c79 100644 --- a/tests/v1/test_model_pysb.py +++ b/tests/v1/test_model_pysb.py @@ -1,4 +1,5 @@ """Test related to petab.models.model_pysb""" + import pysb import pytest diff --git a/tests/v1/test_observables.py b/tests/v1/test_observables.py index e870ac12..c9932b0d 100644 --- a/tests/v1/test_observables.py +++ b/tests/v1/test_observables.py @@ -1,4 +1,5 @@ """Tests for petab.observables""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_parameters.py b/tests/v1/test_parameters.py index c28528fe..33e7c97d 100644 --- a/tests/v1/test_parameters.py +++ b/tests/v1/test_parameters.py @@ -1,4 +1,5 @@ """Tests for petab/parameters.py""" + import tempfile from pathlib import Path diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index ff9621fa..564dcb7f 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -331,9 +331,9 @@ def test_create_parameter_df( assert parameter_df.index.values.tolist() == expected # test with condition parameter override: - condition_df_2_conditions.loc[ - "condition2", "fixedParameter1" - ] = "overrider" + condition_df_2_conditions.loc["condition2", "fixedParameter1"] = ( + "overrider" + ) expected = ["p3", "p4", "p1", "p2", "p5", "overrider"] parameter_df = petab.create_parameter_df( diff --git a/tests/v1/test_sbml.py b/tests/v1/test_sbml.py index 5c262d43..b29f1ea7 100644 --- a/tests/v1/test_sbml.py +++ b/tests/v1/test_sbml.py @@ -85,9 +85,9 @@ def check_model(condition_model): condition_model.getSpecies("species_4").getInitialConcentration() == 3.25 ) - assert ( - len(condition_model.getListOfInitialAssignments()) == 0 - ), "InitialAssignment not removed" + assert len(condition_model.getListOfInitialAssignments()) == 0, ( + "InitialAssignment not removed" + ) assert condition_model.getCompartment("compartment_1").getSize() == 2.0 assert condition_model.getParameter("parameter_1").getValue() == 1.25 assert condition_model.getParameter("parameter_2").getValue() == 2.25 diff --git a/tests/v1/test_simplify.py b/tests/v1/test_simplify.py index 9aa25f8f..1724f8bb 100644 --- a/tests/v1/test_simplify.py +++ b/tests/v1/test_simplify.py @@ -1,4 +1,5 @@ """Tests for petab.simplify.*""" + from math import nan import pandas as pd diff --git a/tests/v1/test_simulate.py b/tests/v1/test_simulate.py index e23b63cb..7945b1bb 100644 --- a/tests/v1/test_simulate.py +++ b/tests/v1/test_simulate.py @@ -1,4 +1,5 @@ """Tests for petab/simulate.py.""" + import functools from collections.abc import Callable from pathlib import Path diff --git a/tests/v1/test_yaml.py b/tests/v1/test_yaml.py index 82ab242c..168d7697 100644 --- a/tests/v1/test_yaml.py +++ b/tests/v1/test_yaml.py @@ -1,4 +1,5 @@ """Test for petab.yaml""" + import tempfile from pathlib import Path diff --git a/tests/v2/test_experiments.py b/tests/v2/test_experiments.py index 234552f2..205f200d 100644 --- a/tests/v2/test_experiments.py +++ b/tests/v2/test_experiments.py @@ -1,4 +1,5 @@ """Tests related to ``petab.v2.experiments``.""" + from tempfile import TemporaryDirectory import pandas as pd diff --git a/tests/v2/test_mapping.py b/tests/v2/test_mapping.py index 60ba6b49..e60e9082 100644 --- a/tests/v2/test_mapping.py +++ b/tests/v2/test_mapping.py @@ -1,4 +1,5 @@ """Tests related to petab.v2.mapping""" + import tempfile import pandas as pd From dbd7e04cb53634d38bebef5ef1bf6788851495db Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 13 Mar 2025 09:46:23 +0100 Subject: [PATCH 029/141] v2: Observable/conditions/experiments/... objects (#345) Implement an object model for the various PEtab entities as an alternative to working with the plain DataFrames. Related to #337. Object names are very likely to change. This is more of a proof of concept. Also adapt to recent changes in https://github.com/PEtab-dev/PEtab/pull/581 --- petab/v2/C.py | 27 +- petab/v2/core.py | 701 +++++++++++++++++++++++++++++++++++++++ petab/v2/petab1to2.py | 8 +- petab/v2/problem.py | 57 +++- tests/v2/test_core.py | 74 +++++ tests/v2/test_problem.py | 12 +- 6 files changed, 835 insertions(+), 44 deletions(-) create mode 100644 petab/v2/core.py create mode 100644 tests/v2/test_core.py diff --git a/petab/v2/C.py b/petab/v2/C.py index 0a406fac..617977c1 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -130,30 +130,23 @@ CONDITION_NAME = "conditionName" #: Column in the condition table with the ID of an entity that is changed TARGET_ID = "targetId" -#: Column in the condition table with the type of value that is changed -VALUE_TYPE = "valueType" +#: Column in the condition table with the operation type +OPERATION_TYPE = "operationType" #: Column in the condition table with the new value of the target entity TARGET_VALUE = "targetValue" -# value types: -VT_CONSTANT = "constant" -VT_INITIAL = "initial" -VT_RATE = "rate" -VT_ASSIGNMENT = "assignment" -VT_RELATIVE_RATE = "relativeRate" -VT_RELATIVE_ASSIGNMENT = "relativeAssignment" -VALUE_TYPES = [ - VT_CONSTANT, - VT_INITIAL, - VT_RATE, - VT_ASSIGNMENT, - VT_RELATIVE_RATE, - VT_RELATIVE_ASSIGNMENT, +# operation types: +OT_CUR_VAL = "setCurrentValue" +OT_NO_CHANGE = "noChange" + +OPERATION_TYPES = [ + OT_CUR_VAL, + OT_NO_CHANGE, ] CONDITION_DF_COLS = [ CONDITION_ID, TARGET_ID, - VALUE_TYPE, + OPERATION_TYPE, TARGET_VALUE, ] diff --git a/petab/v2/core.py b/petab/v2/core.py new file mode 100644 index 00000000..1f826788 --- /dev/null +++ b/petab/v2/core.py @@ -0,0 +1,701 @@ +"""Types around the PEtab object model.""" + +from __future__ import annotations + +from enum import Enum +from pathlib import Path + +import numpy as np +import pandas as pd +import sympy as sp +from pydantic import ( + BaseModel, + Field, + ValidationInfo, + field_validator, + model_validator, +) + +from ..v1.lint import is_valid_identifier +from ..v1.math import sympify_petab +from . import C + +__all__ = [ + "Observable", + "ObservablesTable", + "ObservableTransformation", + "NoiseDistribution", + "Change", + "ChangeSet", + "ConditionsTable", + "OperationType", + "ExperimentPeriod", + "Experiment", + "ExperimentsTable", + "Measurement", + "MeasurementTable", + "Mapping", + "MappingTable", + "Parameter", + "ParameterScale", + "ParameterTable", +] + + +class ObservableTransformation(str, Enum): + """Observable transformation types. + + Observable transformations as used in the PEtab observables table. + """ + + LIN = C.LIN + LOG = C.LOG + LOG10 = C.LOG10 + + +class ParameterScale(str, Enum): + """Parameter scales. + + Parameter scales as used in the PEtab parameters table. + """ + + LIN = C.LIN + LOG = C.LOG + LOG10 = C.LOG10 + + +class NoiseDistribution(str, Enum): + """Noise distribution types. + + Noise distributions as used in the PEtab observables table. + """ + + NORMAL = C.NORMAL + LAPLACE = C.LAPLACE + + +class PriorType(str, Enum): + """Prior types. + + Prior types as used in the PEtab parameters table. + """ + + NORMAL = C.NORMAL + LAPLACE = C.LAPLACE + UNIFORM = C.UNIFORM + LOG_NORMAL = C.LOG_NORMAL + LOG_LAPLACE = C.LOG_LAPLACE + PARAMETER_SCALE_NORMAL = C.PARAMETER_SCALE_NORMAL + PARAMETER_SCALE_LAPLACE = C.PARAMETER_SCALE_LAPLACE + PARAMETER_SCALE_UNIFORM = C.PARAMETER_SCALE_UNIFORM + + +#: Objective prior types as used in the PEtab parameters table. +ObjectivePriorType = PriorType +#: Initialization prior types as used in the PEtab parameters table. +InitializationPriorType = PriorType + +assert set(C.PRIOR_TYPES) == {e.value for e in ObjectivePriorType}, ( + "ObjectivePriorType enum does not match C.PRIOR_TYPES: " + f"{set(C.PRIOR_TYPES)} vs { {e.value for e in ObjectivePriorType} }" +) + + +class Observable(BaseModel): + """Observable definition.""" + + id: str = Field(alias=C.OBSERVABLE_ID) + name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) + formula: sp.Basic | None = Field(alias=C.OBSERVABLE_FORMULA, default=None) + transformation: ObservableTransformation = Field( + alias=C.OBSERVABLE_TRANSFORMATION, default=ObservableTransformation.LIN + ) + noise_formula: sp.Basic | None = Field(alias=C.NOISE_FORMULA, default=None) + noise_distribution: NoiseDistribution = Field( + alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL + ) + + @field_validator("id") + @classmethod + def validate_id(cls, v): + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + @field_validator( + "name", + "formula", + "noise_formula", + "noise_formula", + "noise_distribution", + "transformation", + mode="before", + ) + @classmethod + def convert_nan_to_default(cls, v, info: ValidationInfo): + if isinstance(v, float) and np.isnan(v): + return cls.model_fields[info.field_name].default + return v + + @field_validator("formula", "noise_formula", mode="before") + @classmethod + def sympify(cls, v): + if v is None or isinstance(v, sp.Basic): + return v + if isinstance(v, float) and np.isnan(v): + return None + + return sympify_petab(v) + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + + +class ObservablesTable(BaseModel): + """PEtab observables table.""" + + observables: list[Observable] + + def __getitem__(self, observable_id: str) -> Observable: + """Get an observable by ID.""" + for observable in self.observables: + if observable.id == observable_id: + return observable + raise KeyError(f"Observable ID {observable_id} not found") + + @classmethod + def from_df(cls, df: pd.DataFrame) -> ObservablesTable: + if df is None: + return cls(observables=[]) + + observables = [ + Observable(**row.to_dict()) + for _, row in df.reset_index().iterrows() + ] + + return cls(observables=observables) + + def to_df(self) -> pd.DataFrame: + return pd.DataFrame(self.model_dump()["observables"]) + + @classmethod + def from_tsv(cls, file_path: str | Path) -> ObservablesTable: + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + df = self.to_df() + df.to_csv(file_path, sep="\t", index=False) + + def __add__(self, other: Observable) -> ObservablesTable: + """Add an observable to the table.""" + if not isinstance(other, Observable): + raise TypeError("Can only add Observable to ObservablesTable") + return ObservablesTable(observables=self.observables + [other]) + + def __iadd__(self, other: Observable) -> ObservablesTable: + """Add an observable to the table in place.""" + if not isinstance(other, Observable): + raise TypeError("Can only add Observable to ObservablesTable") + self.observables.append(other) + return self + + +class OperationType(str, Enum): + """Operation types for model changes in the PEtab conditions table.""" + + # TODO update names + SET_CURRENT_VALUE = "setCurrentValue" + NO_CHANGE = "noChange" + ... + + +class Change(BaseModel): + """A change to the model or model state. + + A change to the model or model state, corresponding to an individual + row of the PEtab conditions table. + """ + + target_id: str | None = Field(alias=C.TARGET_ID, default=None) + operation_type: OperationType = Field(alias=C.OPERATION_TYPE) + target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + use_enum_values = True + + @model_validator(mode="before") + @classmethod + def validate_id(cls, data: dict): + if ( + data.get("operation_type", data.get(C.OPERATION_TYPE)) + != C.OT_NO_CHANGE + ): + target_id = data.get("target_id", data.get(C.TARGET_ID)) + + if not is_valid_identifier(target_id): + raise ValueError(f"Invalid ID: {target_id}") + return data + + @field_validator("target_value", mode="before") + @classmethod + def sympify(cls, v): + if v is None or isinstance(v, sp.Basic): + return v + if isinstance(v, float) and np.isnan(v): + return None + + return sympify_petab(v) + + +class ChangeSet(BaseModel): + """A set of changes to the model or model state. + + A set of simultaneously occurring changes to the model or model state, + corresponding to a perturbation of the underlying system. This corresponds + to all rows of the PEtab conditions table with the same condition ID. + """ + + id: str = Field(alias=C.CONDITION_ID) + changes: list[Change] + + class Config: + populate_by_name = True + + @field_validator("id") + @classmethod + def validate_id(cls, v): + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + def __add__(self, other: Change) -> ChangeSet: + """Add a change to the set.""" + if not isinstance(other, Change): + raise TypeError("Can only add Change to ChangeSet") + return ChangeSet(id=self.id, changes=self.changes + [other]) + + def __iadd__(self, other: Change) -> ChangeSet: + """Add a change to the set in place.""" + if not isinstance(other, Change): + raise TypeError("Can only add Change to ChangeSet") + self.changes.append(other) + return self + + +class ConditionsTable(BaseModel): + """PEtab conditions table.""" + + conditions: list[ChangeSet] = [] + + def __getitem__(self, condition_id: str) -> ChangeSet: + """Get a condition by ID.""" + for condition in self.conditions: + if condition.id == condition_id: + return condition + raise KeyError(f"Condition ID {condition_id} not found") + + @classmethod + def from_df(cls, df: pd.DataFrame) -> ConditionsTable: + if df is None: + return cls(conditions=[]) + + conditions = [] + for condition_id, sub_df in df.groupby(C.CONDITION_ID): + changes = [Change(**row.to_dict()) for _, row in sub_df.iterrows()] + conditions.append(ChangeSet(id=condition_id, changes=changes)) + + return cls(conditions=conditions) + + def to_df(self) -> pd.DataFrame: + records = [ + {C.CONDITION_ID: condition.id, **change.model_dump()} + for condition in self.conditions + for change in condition.changes + ] + return pd.DataFrame(records) + + @classmethod + def from_tsv(cls, file_path: str | Path) -> ConditionsTable: + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + df = self.to_df() + df.to_csv(file_path, sep="\t", index=False) + + def __add__(self, other: ChangeSet) -> ConditionsTable: + """Add a condition to the table.""" + if not isinstance(other, ChangeSet): + raise TypeError("Can only add ChangeSet to ConditionsTable") + return ConditionsTable(conditions=self.conditions + [other]) + + def __iadd__(self, other: ChangeSet) -> ConditionsTable: + """Add a condition to the table in place.""" + if not isinstance(other, ChangeSet): + raise TypeError("Can only add ChangeSet to ConditionsTable") + self.conditions.append(other) + return self + + +class ExperimentPeriod(BaseModel): + """A period of a timecourse defined by a start time and a set changes. + + This corresponds to a row of the PEtab experiments table. + """ + + start: float = Field(alias=C.TIME) + condition_id: str = Field(alias=C.CONDITION_ID) + + class Config: + populate_by_name = True + + @field_validator("condition_id") + @classmethod + def validate_id(cls, condition_id): + if not condition_id: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(condition_id): + raise ValueError(f"Invalid ID: {condition_id}") + return condition_id + + +class Experiment(BaseModel): + """An experiment or a timecourse defined by an ID and a set of different + periods. + + Corresponds to a group of rows of the PEtab experiments table with the same + experiment ID. + """ + + id: str = Field(alias=C.EXPERIMENT_ID) + periods: list[ExperimentPeriod] = [] + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + + @field_validator("id") + @classmethod + def validate_id(cls, v): + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + def __add__(self, other: ExperimentPeriod) -> Experiment: + """Add a period to the experiment.""" + if not isinstance(other, ExperimentPeriod): + raise TypeError("Can only add ExperimentPeriod to Experiment") + return Experiment(id=self.id, periods=self.periods + [other]) + + def __iadd__(self, other: ExperimentPeriod) -> Experiment: + """Add a period to the experiment in place.""" + if not isinstance(other, ExperimentPeriod): + raise TypeError("Can only add ExperimentPeriod to Experiment") + self.periods.append(other) + return self + + +class ExperimentsTable(BaseModel): + """PEtab experiments table.""" + + experiments: list[Experiment] + + @classmethod + def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: + if df is None: + return cls(experiments=[]) + + experiments = [] + for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): + periods = [ + ExperimentPeriod( + start=row[C.TIME], condition_id=row[C.CONDITION_ID] + ) + for _, row in cur_exp_df.iterrows() + ] + experiments.append(Experiment(id=experiment_id, periods=periods)) + + return cls(experiments=experiments) + + def to_df(self) -> pd.DataFrame: + return pd.DataFrame(self.model_dump()["experiments"]) + + @classmethod + def from_tsv(cls, file_path: str | Path) -> ExperimentsTable: + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + df = self.to_df() + df.to_csv(file_path, sep="\t", index=False) + + def __add__(self, other: Experiment) -> ExperimentsTable: + """Add an experiment to the table.""" + if not isinstance(other, Experiment): + raise TypeError("Can only add Experiment to ExperimentsTable") + return ExperimentsTable(experiments=self.experiments + [other]) + + def __iadd__(self, other: Experiment) -> ExperimentsTable: + """Add an experiment to the table in place.""" + if not isinstance(other, Experiment): + raise TypeError("Can only add Experiment to ExperimentsTable") + self.experiments.append(other) + return self + + +class Measurement(BaseModel): + """A measurement. + + A measurement of an observable at a specific time point in a specific + experiment. + """ + + observable_id: str = Field(alias=C.OBSERVABLE_ID) + experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None) + time: float = Field(alias=C.TIME) + measurement: float = Field(alias=C.MEASUREMENT) + observable_parameters: list[sp.Basic] = Field( + alias=C.OBSERVABLE_PARAMETERS, default_factory=list + ) + noise_parameters: list[sp.Basic] = Field( + alias=C.NOISE_PARAMETERS, default_factory=list + ) + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + + @field_validator( + "experiment_id", + "observable_parameters", + "noise_parameters", + mode="before", + ) + @classmethod + def convert_nan_to_none(cls, v, info: ValidationInfo): + if isinstance(v, float) and np.isnan(v): + return cls.model_fields[info.field_name].default + return v + + @field_validator("observable_id", "experiment_id") + @classmethod + def validate_id(cls, v, info: ValidationInfo): + if not v: + if info.field_name == "experiment_id": + return None + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + @field_validator( + "observable_parameters", "noise_parameters", mode="before" + ) + @classmethod + def sympify_list(cls, v): + if isinstance(v, float) and np.isnan(v): + return [] + if isinstance(v, str): + v = v.split(C.PARAMETER_SEPARATOR) + else: + v = [v] + return [sympify_petab(x) for x in v] + + +class MeasurementTable(BaseModel): + """PEtab measurement table.""" + + measurements: list[Measurement] + + @classmethod + def from_df( + cls, + df: pd.DataFrame, + ) -> MeasurementTable: + if df is None: + return cls(measurements=[]) + + measurements = [ + Measurement( + **row.to_dict(), + ) + for _, row in df.reset_index().iterrows() + ] + + return cls(measurements=measurements) + + def to_df(self) -> pd.DataFrame: + return pd.DataFrame(self.model_dump()["measurements"]) + + @classmethod + def from_tsv(cls, file_path: str | Path) -> MeasurementTable: + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + df = self.to_df() + df.to_csv(file_path, sep="\t", index=False) + + def __add__(self, other: Measurement) -> MeasurementTable: + """Add a measurement to the table.""" + if not isinstance(other, Measurement): + raise TypeError("Can only add Measurement to MeasurementTable") + return MeasurementTable(measurements=self.measurements + [other]) + + def __iadd__(self, other: Measurement) -> MeasurementTable: + """Add a measurement to the table in place.""" + if not isinstance(other, Measurement): + raise TypeError("Can only add Measurement to MeasurementTable") + self.measurements.append(other) + return self + + +class Mapping(BaseModel): + """Mapping PEtab entities to model entities.""" + + petab_id: str = Field(alias=C.PETAB_ENTITY_ID) + model_id: str = Field(alias=C.MODEL_ENTITY_ID) + + class Config: + populate_by_name = True + + @field_validator( + "petab_id", + ) + @classmethod + def validate_id(cls, v): + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + +class MappingTable(BaseModel): + """PEtab mapping table.""" + + mappings: list[Mapping] + + @classmethod + def from_df(cls, df: pd.DataFrame) -> MappingTable: + if df is None: + return cls(mappings=[]) + + mappings = [ + Mapping(**row.to_dict()) for _, row in df.reset_index().iterrows() + ] + + return cls(mappings=mappings) + + def to_df(self) -> pd.DataFrame: + return pd.DataFrame(self.model_dump()["mappings"]) + + @classmethod + def from_tsv(cls, file_path: str | Path) -> MappingTable: + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + df = self.to_df() + df.to_csv(file_path, sep="\t", index=False) + + def __add__(self, other: Mapping) -> MappingTable: + """Add a mapping to the table.""" + if not isinstance(other, Mapping): + raise TypeError("Can only add Mapping to MappingTable") + return MappingTable(mappings=self.mappings + [other]) + + def __iadd__(self, other: Mapping) -> MappingTable: + """Add a mapping to the table in place.""" + if not isinstance(other, Mapping): + raise TypeError("Can only add Mapping to MappingTable") + self.mappings.append(other) + return self + + +class Parameter(BaseModel): + """Parameter definition.""" + + id: str = Field(alias=C.PARAMETER_ID) + lb: float | None = Field(alias=C.LOWER_BOUND, default=None) + ub: float | None = Field(alias=C.UPPER_BOUND, default=None) + nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) + scale: ParameterScale = Field( + alias=C.PARAMETER_SCALE, default=ParameterScale.LIN + ) + estimate: bool = Field(alias=C.ESTIMATE, default=True) + # TODO priors + + class Config: + populate_by_name = True + arbitrary_types_allowed = True + use_enum_values = True + + @field_validator("id") + @classmethod + def validate_id(cls, v): + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + @field_validator("lb", "ub", "nominal_value") + @classmethod + def convert_nan_to_none(cls, v): + if isinstance(v, float) and np.isnan(v): + return None + return v + + +class ParameterTable(BaseModel): + """PEtab parameter table.""" + + parameters: list[Parameter] + + @classmethod + def from_df(cls, df: pd.DataFrame) -> ParameterTable: + if df is None: + return cls(parameters=[]) + + parameters = [ + Parameter(**row.to_dict()) + for _, row in df.reset_index().iterrows() + ] + + return cls(parameters=parameters) + + def to_df(self) -> pd.DataFrame: + return pd.DataFrame(self.model_dump()["parameters"]) + + @classmethod + def from_tsv(cls, file_path: str | Path) -> ParameterTable: + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + df = self.to_df() + df.to_csv(file_path, sep="\t", index=False) + + def __add__(self, other: Parameter) -> ParameterTable: + """Add a parameter to the table.""" + if not isinstance(other, Parameter): + raise TypeError("Can only add Parameter to ParameterTable") + return ParameterTable(parameters=self.parameters + [other]) + + def __iadd__(self, other: Parameter) -> ParameterTable: + """Add a parameter to the table in place.""" + if not isinstance(other, Parameter): + raise TypeError("Can only add Parameter to ParameterTable") + self.parameters.append(other) + return self diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index dc1b2b8c..7f675db0 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -293,7 +293,7 @@ def v1v2_condition_df( id_vars=[v1.C.CONDITION_ID], var_name=v2.C.TARGET_ID, value_name=v2.C.TARGET_VALUE, - ) + ).dropna(subset=[v2.C.TARGET_VALUE]) if condition_df.empty: # This happens if there weren't any condition-specific changes @@ -301,7 +301,7 @@ def v1v2_condition_df( columns=[ v2.C.CONDITION_ID, v2.C.TARGET_ID, - v2.C.VALUE_TYPE, + v2.C.OPERATION_TYPE, v2.C.TARGET_VALUE, ] ) @@ -320,7 +320,5 @@ def v1v2_condition_df( f"Unable to determine value type {target} in the condition " "table." ) - condition_df[v2.C.VALUE_TYPE] = condition_df[v2.C.TARGET_ID].apply( - lambda x: v2.C.VT_INITIAL if x in initial else v2.C.VT_CONSTANT - ) + condition_df[v2.C.OPERATION_TYPE] = v2.C.OT_CUR_VAL return condition_df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 32684d0b..86d82af4 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -93,6 +93,45 @@ def __init__( ) self.config = config + from .core import ( + ChangeSet, + ConditionsTable, + Experiment, + ExperimentsTable, + MappingTable, + MeasurementTable, + Observable, + ObservablesTable, + ParameterTable, + ) + + self.observables_table: ObservablesTable = ObservablesTable.from_df( + self.observable_df + ) + self.observables: list[Observable] = self.observables_table.observables + + self.conditions_table: ConditionsTable = ConditionsTable.from_df( + self.condition_df + ) + self.conditions: list[ChangeSet] = self.conditions_table.conditions + + self.experiments_table: ExperimentsTable = ExperimentsTable.from_df( + self.experiment_df + ) + self.experiments: list[Experiment] = self.experiments_table.experiments + + self.measurement_table: MeasurementTable = MeasurementTable.from_df( + self.measurement_df, + ) + + self.mapping_table: MappingTable = MappingTable.from_df( + self.mapping_df + ) + self.parameter_table: ParameterTable = ParameterTable.from_df( + self.parameter_df + ) + # TODO: visualization table + def __str__(self): model = f"with model ({self.model})" if self.model else "without model" @@ -599,20 +638,6 @@ def get_optimization_to_simulation_parameter_mapping(self, **kwargs): ) ) - def create_parameter_df(self, **kwargs) -> pd.DataFrame: - """Create a new PEtab parameter table - - See :py:func:`create_parameter_df`. - """ - return parameters.create_parameter_df( - model=self.model, - condition_df=self.condition_df, - observable_df=self.observable_df, - measurement_df=self.measurement_df, - mapping_df=self.mapping_df, - **kwargs, - ) - def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): """Create 2D array with starting points for optimization @@ -769,10 +794,10 @@ def add_condition( { CONDITION_ID: id_, TARGET_ID: target_id, - VALUE_TYPE: value_type, + OPERATION_TYPE: op_type, TARGET_VALUE: target_value, } - for target_id, (value_type, target_value) in kwargs.items() + for target_id, (op_type, target_value) in kwargs.items() ] # TODO: is the condition name supported in v2? if name is not None: diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py new file mode 100644 index 00000000..a7eae851 --- /dev/null +++ b/tests/v2/test_core.py @@ -0,0 +1,74 @@ +import tempfile +from pathlib import Path + +from petab.v2.core import ( + Change, + ChangeSet, + ConditionsTable, + Experiment, + ExperimentPeriod, + ObservablesTable, + OperationType, +) +from petab.v2.petab1to2 import petab1to2 + +example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" + + +def test_observables_table_round_trip(): + file = example_dir_fujita / "Fujita_observables.tsv" + observables = ObservablesTable.from_tsv(file) + + with tempfile.TemporaryDirectory() as tmp_dir: + tmp_file = Path(tmp_dir) / "observables.tsv" + observables.to_tsv(tmp_file) + observables2 = ObservablesTable.from_tsv(tmp_file) + assert observables == observables2 + + +def test_conditions_table_round_trip(): + with tempfile.TemporaryDirectory() as tmp_dir: + petab1to2(example_dir_fujita / "Fujita.yaml", tmp_dir) + file = Path(tmp_dir, "Fujita_experimentalCondition.tsv") + conditions = ConditionsTable.from_tsv(file) + tmp_file = Path(tmp_dir) / "conditions.tsv" + conditions.to_tsv(tmp_file) + conditions2 = ConditionsTable.from_tsv(tmp_file) + assert conditions == conditions2 + + +def test_experiment_add_periods(): + """Test operators for Experiment""" + exp = Experiment(id="exp1") + assert exp.periods == [] + + p1 = ExperimentPeriod(start=0, condition_id="p1") + p2 = ExperimentPeriod(start=1, condition_id="p2") + p3 = ExperimentPeriod(start=2, condition_id="p3") + exp += p1 + exp += p2 + + assert exp.periods == [p1, p2] + + exp2 = exp + p3 + assert exp2.periods == [p1, p2, p3] + assert exp.periods == [p1, p2] + + +def test_conditions_table_add_changeset(): + conditions_table = ConditionsTable() + assert conditions_table.conditions == [] + + c1 = ChangeSet( + id="condition1", + changes=[Change(operation_type=OperationType.NO_CHANGE)], + ) + c2 = ChangeSet( + id="condition2", + changes=[Change(operation_type=OperationType.NO_CHANGE)], + ) + + conditions_table += c1 + conditions_table += c2 + + assert conditions_table.conditions == [c1, c2] diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index dadc3a7c..04e394ad 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -16,13 +16,13 @@ NOMINAL_VALUE, OBSERVABLE_FORMULA, OBSERVABLE_ID, + OPERATION_TYPE, + OT_CUR_VAL, PARAMETER_ID, PETAB_ENTITY_ID, TARGET_ID, TARGET_VALUE, UPPER_BOUND, - VALUE_TYPE, - VT_CONSTANT, ) @@ -73,7 +73,7 @@ def test_problem_from_yaml_multiple_files(): for i in (1, 2): problem = Problem() - problem.add_condition(f"condition{i}", parameter1=(VT_CONSTANT, i)) + problem.add_condition(f"condition{i}", parameter1=(OT_CUR_VAL, i)) petab.write_condition_df( problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) @@ -109,14 +109,14 @@ def test_problem_from_yaml_multiple_files(): def test_modify_problem(): """Test modifying a problem via the API.""" problem = Problem() - problem.add_condition("condition1", parameter1=(VT_CONSTANT, 1)) - problem.add_condition("condition2", parameter2=(VT_CONSTANT, 2)) + problem.add_condition("condition1", parameter1=(OT_CUR_VAL, 1)) + problem.add_condition("condition2", parameter2=(OT_CUR_VAL, 2)) exp_condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1", "condition2"], TARGET_ID: ["parameter1", "parameter2"], - VALUE_TYPE: [VT_CONSTANT, VT_CONSTANT], + OPERATION_TYPE: [OT_CUR_VAL, OT_CUR_VAL], TARGET_VALUE: [1.0, 2.0], } ) From ff7784225e70553d10d67990b76406c8bd13c950 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 13 Mar 2025 10:02:13 +0100 Subject: [PATCH 030/141] Update developer's guide (#354) Update developer's guide --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- doc/development.rst | 64 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 64 insertions(+) diff --git a/doc/development.rst b/doc/development.rst index df4edf55..181505a9 100644 --- a/doc/development.rst +++ b/doc/development.rst @@ -24,3 +24,67 @@ Python compatibility -------------------- We follow `numpy's Python support policy `_. + +Release process +--------------- + +1. Update the version number in ``petab/version.py``. + +2. Update the changelog in ``doc/CHANGELOG.md``. + The update content can be generated automatically: + draft a new dummy GitHub release with a dummy tag and the ``develop`` + branch, then click :guilabel:`Generate release notes`. + +3. Create a pull request with the to-be-released changes to the main branch + (usually from ``develop``). + +4. Once the pull request is merged, create a new release on GitHub. + Make sure to set the tag to the version number prefixed with 'v' + (e.g., ``v1.0.0``), and the release title to ``libpetab-python $RELEASE_TAG`` + (e.g., ``libpetab-python v1.0.0``). + +5. Check that the release is now available on PyPI. + The upload to PyPI is performed automatically by a GitHub Actions workflow, + which may take a few minutes to complete. + +6. Merge the main branch back into the `develop` branch. + +Style guide +----------- + +Code style +~~~~~~~~~~ + +We use pre-commit with ruff to enforce code style. To install pre-commit and +the pre-commit hooks, run: + +.. code-block:: bash + + pip install pre-commit + pre-commit install + +To run the pre-commit checks manually on all, not just the modified files, run: + +.. code-block:: bash + + pre-commit run --all-files + +Documentation style +~~~~~~~~~~~~~~~~~~~ + +We use `Sphinx `_ to generate the documentation. +The documentation is written in `reStructuredText `_. + +We use the `sphinx docstring-style `__ for new code. +The ``:param [ParamName]:`` and ``:return:`` statements are important when +applicable. +Manual type annotations (``:type [ParamName]:``) are redundant and should be +avoided. + +To build the documentation, run: + +.. code-block:: bash + + cd doc + make html + # then open `build/html/index.html` in a browser From 84f7875ac0d5fd0735f1d553b81487c2a7dd3e33 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 13 Mar 2025 10:10:31 +0100 Subject: [PATCH 031/141] doc: intersphinx, tox env (#358) * Simplify linking to the PEtab specs * Add tox env for building the documentation --- doc/conf.py | 2 ++ petab/v1/problem.py | 8 ++++++-- petab/v2/problem.py | 8 ++++++-- tox.ini | 15 ++++++++++++++- 4 files changed, 28 insertions(+), 5 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 4dbd3009..56f137e6 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -47,6 +47,7 @@ ] intersphinx_mapping = { + "petab": ("https://petab.readthedocs.io/en/latest/", None), "pandas": ("https://pandas.pydata.org/docs/", None), "numpy": ("https://numpy.org/devdocs/", None), "sympy": ("https://docs.sympy.org/latest/", None), @@ -62,6 +63,7 @@ exclude_patterns = [ "build/doctrees", "build/html", + "build/jupyter_execute", "**.ipynb_checkpoints", "logo/LICENSE.md", ] diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 2405f5c0..4f7df659 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -42,7 +42,9 @@ class Problem: """ - PEtab parameter estimation problem as defined by + PEtab parameter estimation problem. + + A PEtab problem as defined by: - model - condition table @@ -51,7 +53,9 @@ class Problem: - observables table - mapping table - Optionally it may contain visualization tables. + Optionally, it may contain visualization tables. + + See also :doc:`petab:v1/documentation_data_format`. Parameters: condition_df: PEtab condition table diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 86d82af4..d18b4b7c 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -40,7 +40,9 @@ class Problem: """ - PEtab parameter estimation problem as defined by + PEtab parameter estimation problem + + A PEtab parameter estimation problem as defined by - model - condition table @@ -50,7 +52,9 @@ class Problem: - observables table - mapping table - Optionally it may contain visualization tables. + Optionally, it may contain visualization tables. + + See also :doc:`petab:v2/documentation_data_format`. Parameters: condition_df: PEtab condition table diff --git a/tox.ini b/tox.ini index 7d0cdccc..3f3bbe46 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = quality,unit +envlist = quality,unit,doc isolated_build = True [testenv] @@ -23,3 +23,16 @@ commands = tests description = Basic tests + +[testenv:doc] +description = Build the documentation +extras = doc,vis +deps= + # workaround for m2r2 issue with py3.13: No module named 'pkg_resources' + # see also: https://github.com/CrossNox/m2r2/issues/72 + setuptools +allowlist_externals = rm +commands = + rm -rf {tox_root}/doc/build + sphinx-build -W -b html . build/html +changedir = {tox_root}/doc From 0643389580f5208ffed5d9023f4ede31ae30329e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 17 Mar 2025 07:48:07 +0100 Subject: [PATCH 032/141] Fix deprecated class Config in pydantic models (#359) Use ConfigDict instead. --- petab/v2/core.py | 46 +++++++++++++++++++++++----------------------- 1 file changed, 23 insertions(+), 23 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 1f826788..c1942b79 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -10,6 +10,7 @@ import sympy as sp from pydantic import ( BaseModel, + ConfigDict, Field, ValidationInfo, field_validator, @@ -149,9 +150,9 @@ def sympify(cls, v): return sympify_petab(v) - class Config: - populate_by_name = True - arbitrary_types_allowed = True + model_config = ConfigDict( + arbitrary_types_allowed=True, populate_by_name=True + ) class ObservablesTable(BaseModel): @@ -224,10 +225,11 @@ class Change(BaseModel): operation_type: OperationType = Field(alias=C.OPERATION_TYPE) target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) - class Config: - populate_by_name = True - arbitrary_types_allowed = True - use_enum_values = True + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + use_enum_values=True, + ) @model_validator(mode="before") @classmethod @@ -264,8 +266,7 @@ class ChangeSet(BaseModel): id: str = Field(alias=C.CONDITION_ID) changes: list[Change] - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) @field_validator("id") @classmethod @@ -354,8 +355,7 @@ class ExperimentPeriod(BaseModel): start: float = Field(alias=C.TIME) condition_id: str = Field(alias=C.CONDITION_ID) - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) @field_validator("condition_id") @classmethod @@ -378,9 +378,9 @@ class Experiment(BaseModel): id: str = Field(alias=C.EXPERIMENT_ID) periods: list[ExperimentPeriod] = [] - class Config: - populate_by_name = True - arbitrary_types_allowed = True + model_config = ConfigDict( + arbitrary_types_allowed=True, populate_by_name=True + ) @field_validator("id") @classmethod @@ -471,9 +471,9 @@ class Measurement(BaseModel): alias=C.NOISE_PARAMETERS, default_factory=list ) - class Config: - populate_by_name = True - arbitrary_types_allowed = True + model_config = ConfigDict( + arbitrary_types_allowed=True, populate_by_name=True + ) @field_validator( "experiment_id", @@ -566,8 +566,7 @@ class Mapping(BaseModel): petab_id: str = Field(alias=C.PETAB_ENTITY_ID) model_id: str = Field(alias=C.MODEL_ENTITY_ID) - class Config: - populate_by_name = True + model_config = ConfigDict(populate_by_name=True) @field_validator( "petab_id", @@ -636,10 +635,11 @@ class Parameter(BaseModel): estimate: bool = Field(alias=C.ESTIMATE, default=True) # TODO priors - class Config: - populate_by_name = True - arbitrary_types_allowed = True - use_enum_values = True + model_config = ConfigDict( + arbitrary_types_allowed=True, + populate_by_name=True, + use_enum_values=True, + ) @field_validator("id") @classmethod From 5b47448abad9cf9373a2ee560d09f70f24fb878a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 17 Mar 2025 16:00:28 +0100 Subject: [PATCH 033/141] doc: petab.v2.core (#360) * doc: petab.v2.core * inherited-members * undoc-members --- doc/conf.py | 6 +- doc/modules.rst | 1 + petab/v1/visualize/plotting.py | 7 +- petab/v2/core.py | 128 +++++++++++++++++++++++++++++---- pyproject.toml | 1 + pytest.ini | 1 + 6 files changed, 124 insertions(+), 20 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index 56f137e6..d9498efb 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -73,10 +73,10 @@ autosummary_generate = True autodoc_default_options = { - "members": None, + "members": True, "imported-members": ["petab"], - "inherited-members": None, - "show-inheritance": None, + "show-inheritance": True, + "undoc-members": True, } # For some reason causes sphinx import errors otherwise diff --git a/doc/modules.rst b/doc/modules.rst index e933c06f..cfc49e67 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -31,6 +31,7 @@ API Reference petab.v1.yaml petab.v2 petab.v2.C + petab.v2.core petab.v2.experiments petab.v2.lint petab.v2.models diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py index e474c4c8..17db6d7d 100644 --- a/petab/v1/visualize/plotting.py +++ b/petab/v1/visualize/plotting.py @@ -833,9 +833,10 @@ def parse_from_id_list( :: - dataset_ids_per_plot = [['dataset_1', 'dataset_2'], - ['dataset_1', 'dataset_4', - 'dataset_5']] + dataset_ids_per_plot = [ + ["dataset_1", "dataset_2"], + ["dataset_1", "dataset_4", "dataset_5"], + ] or diff --git a/petab/v2/core.py b/petab/v2/core.py index c1942b79..3c23f652 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -49,8 +49,11 @@ class ObservableTransformation(str, Enum): Observable transformations as used in the PEtab observables table. """ + #: No transformation LIN = C.LIN + #: Logarithmic transformation (natural logarithm) LOG = C.LOG + #: Logarithmic transformation (base 10) LOG10 = C.LOG10 @@ -71,7 +74,9 @@ class NoiseDistribution(str, Enum): Noise distributions as used in the PEtab observables table. """ + #: Normal distribution NORMAL = C.NORMAL + #: Laplace distribution LAPLACE = C.LAPLACE @@ -81,10 +86,15 @@ class PriorType(str, Enum): Prior types as used in the PEtab parameters table. """ + #: Normal distribution. NORMAL = C.NORMAL + #: Laplace distribution. LAPLACE = C.LAPLACE + #: Uniform distribution. UNIFORM = C.UNIFORM + #: Log-normal distribution. LOG_NORMAL = C.LOG_NORMAL + #: Log-Laplace distribution LOG_LAPLACE = C.LOG_LAPLACE PARAMETER_SCALE_NORMAL = C.PARAMETER_SCALE_NORMAL PARAMETER_SCALE_LAPLACE = C.PARAMETER_SCALE_LAPLACE @@ -105,20 +115,26 @@ class PriorType(str, Enum): class Observable(BaseModel): """Observable definition.""" + #: Observable ID. id: str = Field(alias=C.OBSERVABLE_ID) + #: Observable name. name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) + #: Observable formula. formula: sp.Basic | None = Field(alias=C.OBSERVABLE_FORMULA, default=None) + #: Observable transformation. transformation: ObservableTransformation = Field( alias=C.OBSERVABLE_TRANSFORMATION, default=ObservableTransformation.LIN ) + #: Noise formula. noise_formula: sp.Basic | None = Field(alias=C.NOISE_FORMULA, default=None) + #: Noise distribution. noise_distribution: NoiseDistribution = Field( alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL ) @field_validator("id") @classmethod - def validate_id(cls, v): + def _validate_id(cls, v): if not v: raise ValueError("ID must not be empty.") if not is_valid_identifier(v): @@ -135,14 +151,14 @@ def validate_id(cls, v): mode="before", ) @classmethod - def convert_nan_to_default(cls, v, info: ValidationInfo): + def _convert_nan_to_default(cls, v, info: ValidationInfo): if isinstance(v, float) and np.isnan(v): return cls.model_fields[info.field_name].default return v @field_validator("formula", "noise_formula", mode="before") @classmethod - def sympify(cls, v): + def _sympify(cls, v): if v is None or isinstance(v, sp.Basic): return v if isinstance(v, float) and np.isnan(v): @@ -150,6 +166,7 @@ def sympify(cls, v): return sympify_petab(v) + #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True ) @@ -158,6 +175,7 @@ def sympify(cls, v): class ObservablesTable(BaseModel): """PEtab observables table.""" + #: List of observables. observables: list[Observable] def __getitem__(self, observable_id: str) -> Observable: @@ -169,6 +187,7 @@ def __getitem__(self, observable_id: str) -> Observable: @classmethod def from_df(cls, df: pd.DataFrame) -> ObservablesTable: + """Create an ObservablesTable from a DataFrame.""" if df is None: return cls(observables=[]) @@ -180,14 +199,17 @@ def from_df(cls, df: pd.DataFrame) -> ObservablesTable: return cls(observables=observables) def to_df(self) -> pd.DataFrame: + """Convert the ObservablesTable to a DataFrame.""" return pd.DataFrame(self.model_dump()["observables"]) @classmethod def from_tsv(cls, file_path: str | Path) -> ObservablesTable: + """Create an ObservablesTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: + """Write the ObservablesTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) @@ -205,6 +227,7 @@ def __iadd__(self, other: Observable) -> ObservablesTable: return self +# TODO remove?! class OperationType(str, Enum): """Operation types for model changes in the PEtab conditions table.""" @@ -219,12 +242,24 @@ class Change(BaseModel): A change to the model or model state, corresponding to an individual row of the PEtab conditions table. + + >>> Change( + ... target_id="k1", + ... operation_type=OperationType.SET_CURRENT_VALUE, + ... target_value="10", + ... ) # doctest: +NORMALIZE_WHITESPACE + Change(target_id='k1', operation_type='setCurrentValue', + target_value=10.0000000000000) """ + #: The ID of the target entity to change. target_id: str | None = Field(alias=C.TARGET_ID, default=None) + # TODO: remove?! operation_type: OperationType = Field(alias=C.OPERATION_TYPE) + #: The value to set the target entity to. target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) + #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True, @@ -233,7 +268,7 @@ class Change(BaseModel): @model_validator(mode="before") @classmethod - def validate_id(cls, data: dict): + def _validate_id(cls, data: dict): if ( data.get("operation_type", data.get(C.OPERATION_TYPE)) != C.OT_NO_CHANGE @@ -246,7 +281,7 @@ def validate_id(cls, data: dict): @field_validator("target_value", mode="before") @classmethod - def sympify(cls, v): + def _sympify(cls, v): if v is None or isinstance(v, sp.Basic): return v if isinstance(v, float) and np.isnan(v): @@ -261,16 +296,32 @@ class ChangeSet(BaseModel): A set of simultaneously occurring changes to the model or model state, corresponding to a perturbation of the underlying system. This corresponds to all rows of the PEtab conditions table with the same condition ID. + + >>> ChangeSet( + ... id="condition1", + ... changes=[ + ... Change( + ... target_id="k1", + ... operation_type=OperationType.SET_CURRENT_VALUE, + ... target_value="10", + ... ) + ... ], + ... ) # doctest: +NORMALIZE_WHITESPACE + ChangeSet(id='condition1', changes=[Change(target_id='k1', + operation_type='setCurrentValue', target_value=10.0000000000000)]) """ + #: The condition ID. id: str = Field(alias=C.CONDITION_ID) + #: The changes associated with this condition. changes: list[Change] + #: :meta private: model_config = ConfigDict(populate_by_name=True) @field_validator("id") @classmethod - def validate_id(cls, v): + def _validate_id(cls, v): if not v: raise ValueError("ID must not be empty.") if not is_valid_identifier(v): @@ -294,6 +345,7 @@ def __iadd__(self, other: Change) -> ChangeSet: class ConditionsTable(BaseModel): """PEtab conditions table.""" + #: List of conditions. conditions: list[ChangeSet] = [] def __getitem__(self, condition_id: str) -> ChangeSet: @@ -305,6 +357,7 @@ def __getitem__(self, condition_id: str) -> ChangeSet: @classmethod def from_df(cls, df: pd.DataFrame) -> ConditionsTable: + """Create a ConditionsTable from a DataFrame.""" if df is None: return cls(conditions=[]) @@ -316,6 +369,7 @@ def from_df(cls, df: pd.DataFrame) -> ConditionsTable: return cls(conditions=conditions) def to_df(self) -> pd.DataFrame: + """Convert the ConditionsTable to a DataFrame.""" records = [ {C.CONDITION_ID: condition.id, **change.model_dump()} for condition in self.conditions @@ -325,10 +379,12 @@ def to_df(self) -> pd.DataFrame: @classmethod def from_tsv(cls, file_path: str | Path) -> ConditionsTable: + """Create a ConditionsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: + """Write the ConditionsTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) @@ -347,19 +403,23 @@ def __iadd__(self, other: ChangeSet) -> ConditionsTable: class ExperimentPeriod(BaseModel): - """A period of a timecourse defined by a start time and a set changes. + """A period of a timecourse or experiment defined by a start time + and a condition ID. This corresponds to a row of the PEtab experiments table. """ + #: The start time of the period in time units as defined in the model. start: float = Field(alias=C.TIME) + #: The ID of the condition to be applied at the start time. condition_id: str = Field(alias=C.CONDITION_ID) + #: :meta private: model_config = ConfigDict(populate_by_name=True) @field_validator("condition_id") @classmethod - def validate_id(cls, condition_id): + def _validate_id(cls, condition_id): if not condition_id: raise ValueError("ID must not be empty.") if not is_valid_identifier(condition_id): @@ -375,16 +435,19 @@ class Experiment(BaseModel): experiment ID. """ + #: The experiment ID. id: str = Field(alias=C.EXPERIMENT_ID) + #: The periods of the experiment. periods: list[ExperimentPeriod] = [] + #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True ) @field_validator("id") @classmethod - def validate_id(cls, v): + def _validate_id(cls, v): if not v: raise ValueError("ID must not be empty.") if not is_valid_identifier(v): @@ -408,10 +471,12 @@ def __iadd__(self, other: ExperimentPeriod) -> Experiment: class ExperimentsTable(BaseModel): """PEtab experiments table.""" + #: List of experiments. experiments: list[Experiment] @classmethod def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: + """Create an ExperimentsTable from a DataFrame.""" if df is None: return cls(experiments=[]) @@ -428,14 +493,17 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: return cls(experiments=experiments) def to_df(self) -> pd.DataFrame: + """Convert the ExperimentsTable to a DataFrame.""" return pd.DataFrame(self.model_dump()["experiments"]) @classmethod def from_tsv(cls, file_path: str | Path) -> ExperimentsTable: + """Create an ExperimentsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: + """Write the ExperimentsTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) @@ -460,17 +528,24 @@ class Measurement(BaseModel): experiment. """ + #: The observable ID. observable_id: str = Field(alias=C.OBSERVABLE_ID) + #: The experiment ID. experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None) + #: The time point of the measurement in time units as defined in the model. time: float = Field(alias=C.TIME) + #: The measurement value. measurement: float = Field(alias=C.MEASUREMENT) + #: Values for placeholder parameters in the observable formula. observable_parameters: list[sp.Basic] = Field( alias=C.OBSERVABLE_PARAMETERS, default_factory=list ) + #: Values for placeholder parameters in the noise formula. noise_parameters: list[sp.Basic] = Field( alias=C.NOISE_PARAMETERS, default_factory=list ) + #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True ) @@ -489,7 +564,7 @@ def convert_nan_to_none(cls, v, info: ValidationInfo): @field_validator("observable_id", "experiment_id") @classmethod - def validate_id(cls, v, info: ValidationInfo): + def _validate_id(cls, v, info: ValidationInfo): if not v: if info.field_name == "experiment_id": return None @@ -502,7 +577,7 @@ def validate_id(cls, v, info: ValidationInfo): "observable_parameters", "noise_parameters", mode="before" ) @classmethod - def sympify_list(cls, v): + def _sympify_list(cls, v): if isinstance(v, float) and np.isnan(v): return [] if isinstance(v, str): @@ -515,6 +590,7 @@ def sympify_list(cls, v): class MeasurementTable(BaseModel): """PEtab measurement table.""" + #: List of measurements. measurements: list[Measurement] @classmethod @@ -522,6 +598,7 @@ def from_df( cls, df: pd.DataFrame, ) -> MeasurementTable: + """Create a MeasurementTable from a DataFrame.""" if df is None: return cls(measurements=[]) @@ -535,14 +612,17 @@ def from_df( return cls(measurements=measurements) def to_df(self) -> pd.DataFrame: + """Convert the MeasurementTable to a DataFrame.""" return pd.DataFrame(self.model_dump()["measurements"]) @classmethod def from_tsv(cls, file_path: str | Path) -> MeasurementTable: + """Create a MeasurementTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: + """Write the MeasurementTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) @@ -563,16 +643,19 @@ def __iadd__(self, other: Measurement) -> MeasurementTable: class Mapping(BaseModel): """Mapping PEtab entities to model entities.""" + #: PEtab entity ID. petab_id: str = Field(alias=C.PETAB_ENTITY_ID) + #: Model entity ID. model_id: str = Field(alias=C.MODEL_ENTITY_ID) + #: :meta private: model_config = ConfigDict(populate_by_name=True) @field_validator( "petab_id", ) @classmethod - def validate_id(cls, v): + def _validate_id(cls, v): if not v: raise ValueError("ID must not be empty.") if not is_valid_identifier(v): @@ -583,10 +666,12 @@ def validate_id(cls, v): class MappingTable(BaseModel): """PEtab mapping table.""" + #: List of mappings. mappings: list[Mapping] @classmethod def from_df(cls, df: pd.DataFrame) -> MappingTable: + """Create a MappingTable from a DataFrame.""" if df is None: return cls(mappings=[]) @@ -597,14 +682,17 @@ def from_df(cls, df: pd.DataFrame) -> MappingTable: return cls(mappings=mappings) def to_df(self) -> pd.DataFrame: + """Convert the MappingTable to a DataFrame.""" return pd.DataFrame(self.model_dump()["mappings"]) @classmethod def from_tsv(cls, file_path: str | Path) -> MappingTable: + """Create a MappingTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: + """Write the MappingTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) @@ -625,16 +713,23 @@ def __iadd__(self, other: Mapping) -> MappingTable: class Parameter(BaseModel): """Parameter definition.""" + #: Parameter ID. id: str = Field(alias=C.PARAMETER_ID) + #: Lower bound. lb: float | None = Field(alias=C.LOWER_BOUND, default=None) + #: Upper bound. ub: float | None = Field(alias=C.UPPER_BOUND, default=None) + #: Nominal value. nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) + #: Parameter scale. scale: ParameterScale = Field( alias=C.PARAMETER_SCALE, default=ParameterScale.LIN ) + #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) # TODO priors + #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True, @@ -643,7 +738,7 @@ class Parameter(BaseModel): @field_validator("id") @classmethod - def validate_id(cls, v): + def _validate_id(cls, v): if not v: raise ValueError("ID must not be empty.") if not is_valid_identifier(v): @@ -652,7 +747,7 @@ def validate_id(cls, v): @field_validator("lb", "ub", "nominal_value") @classmethod - def convert_nan_to_none(cls, v): + def _convert_nan_to_none(cls, v): if isinstance(v, float) and np.isnan(v): return None return v @@ -661,10 +756,12 @@ def convert_nan_to_none(cls, v): class ParameterTable(BaseModel): """PEtab parameter table.""" + #: List of parameters. parameters: list[Parameter] @classmethod def from_df(cls, df: pd.DataFrame) -> ParameterTable: + """Create a ParameterTable from a DataFrame.""" if df is None: return cls(parameters=[]) @@ -676,14 +773,17 @@ def from_df(cls, df: pd.DataFrame) -> ParameterTable: return cls(parameters=parameters) def to_df(self) -> pd.DataFrame: + """Convert the ParameterTable to a DataFrame.""" return pd.DataFrame(self.model_dump()["parameters"]) @classmethod def from_tsv(cls, file_path: str | Path) -> ParameterTable: + """Create a ParameterTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: + """Write the ParameterTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) diff --git a/pyproject.toml b/pyproject.toml index 74a1aa1e..5168dbd0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -114,6 +114,7 @@ convention = "pep257" "tests/*" = ["T201"] [tool.ruff.format] +docstring-code-format = true exclude = [ "petab/math/_generated/*", # auto-generated ] diff --git a/pytest.ini b/pytest.ini index 1e9b4286..4aa44158 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,4 +1,5 @@ [pytest] +addopts = --doctest-modules filterwarnings = error # TODO: until tests are reorganized for petab.v1 From 101c79c6b6d90dfbc0e9f2186915706729c0cbe5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 14:26:18 +0100 Subject: [PATCH 034/141] Update petab.v2 (#361) * Adapt petab.v2 to the updated specs draft. * Make the new pydantic objects the primary objects and generate the DataFrames only on demand. Validation is still a bit rough. We'll need nicer error messages and pydantic settings are too lax for validating files. --- doc/modules.rst | 1 + petab/v1/lint.py | 16 +- petab/v1/math/sympify.py | 17 +- petab/v2/C.py | 33 +- petab/v2/__init__.py | 38 +- petab/v2/conditions.py | 22 +- petab/v2/core.py | 490 ++++++++++++------ petab/v2/lint.py | 979 ++++++++++++++++++++---------------- petab/v2/petab1to2.py | 83 +-- petab/v2/problem.py | 544 +++++++++++--------- pytest.ini | 7 +- tests/v1/math/test_math.py | 5 + tests/v2/test_conversion.py | 18 +- tests/v2/test_core.py | 260 ++++++++-- tests/v2/test_lint.py | 17 +- tests/v2/test_problem.py | 23 +- 16 files changed, 1569 insertions(+), 984 deletions(-) diff --git a/doc/modules.rst b/doc/modules.rst index cfc49e67..627ba9d8 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -16,6 +16,7 @@ API Reference petab.v1.core petab.v1.distributions petab.v1.lint + petab.v1.math petab.v1.measurements petab.v1.models petab.v1.observables diff --git a/petab/v1/lint.py b/petab/v1/lint.py index b2260b83..e14289fb 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -53,6 +53,9 @@ "observable_table_has_nontrivial_noise_formula", ] +#: Regular expression pattern for valid PEtab IDs +_petab_id_pattern = re.compile(r"^[a-zA-Z_]\w*$") + def _check_df(df: pd.DataFrame, req_cols: Iterable, name: str) -> None: """Check if given columns are present in DataFrame @@ -1041,10 +1044,13 @@ def assert_model_parameters_in_condition_or_parameter_table( mapping_df[MODEL_ENTITY_ID], strict=True, ) - # mapping table entities mapping to already allowed parameters - if to_id in allowed_in_condition_cols - # mapping table entities mapping to species - or model.is_state_variable(to_id) + if not pd.isna(to_id) + and ( + # mapping table entities mapping to already allowed parameters + to_id in allowed_in_condition_cols + # mapping table entities mapping to species + or model.is_state_variable(to_id) + ) } allowed_in_parameter_table = ( @@ -1186,7 +1192,7 @@ def is_valid_identifier(x: str) -> bool: if pd.isna(x): return False - return re.match(r"^[a-zA-Z_]\w*$", x) is not None + return _petab_id_pattern.match(x) is not None def check_ids(ids: Iterable[str], kind: str = "") -> None: diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index cc81a000..8ef1a129 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -15,6 +15,11 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: """Convert PEtab math expression to sympy expression. + .. note:: + + All symbols in the returned expression will have the `real=True` + assumption. + Args: expr: PEtab math expression. @@ -26,14 +31,22 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: The sympy expression corresponding to `expr`. Boolean values are converted to numeric values. """ + if isinstance(expr, sp.Expr): + # TODO: check if only PEtab-compatible symbols and functions are used + return expr + if isinstance(expr, int) or isinstance(expr, np.integer): return sp.Integer(expr) if isinstance(expr, float) or isinstance(expr, np.floating): return sp.Float(expr) - # Set error listeners - input_stream = InputStream(expr) + try: + input_stream = InputStream(expr) + except TypeError as e: + raise TypeError(f"Error parsing {expr!r}: {e.args[0]}") from e + lexer = PetabMathExprLexer(input_stream) + # Set error listeners lexer.removeErrorListeners() lexer.addErrorListener(MathErrorListener()) diff --git a/petab/v2/C.py b/petab/v2/C.py index 617977c1..c94a1d29 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -125,28 +125,14 @@ #: Condition ID column in the condition table CONDITION_ID = "conditionId" -# TODO: removed? -#: Condition name column in the condition table -CONDITION_NAME = "conditionName" #: Column in the condition table with the ID of an entity that is changed TARGET_ID = "targetId" -#: Column in the condition table with the operation type -OPERATION_TYPE = "operationType" #: Column in the condition table with the new value of the target entity TARGET_VALUE = "targetValue" -# operation types: -OT_CUR_VAL = "setCurrentValue" -OT_NO_CHANGE = "noChange" - -OPERATION_TYPES = [ - OT_CUR_VAL, - OT_NO_CHANGE, -] CONDITION_DF_COLS = [ CONDITION_ID, TARGET_ID, - OPERATION_TYPE, TARGET_VALUE, ] @@ -161,25 +147,25 @@ # OBSERVABLES -#: Observable name column in the observables table +#: Observable name column in the observable table OBSERVABLE_NAME = "observableName" -#: Observable formula column in the observables table +#: Observable formula column in the observable table OBSERVABLE_FORMULA = "observableFormula" -#: Noise formula column in the observables table +#: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" -#: Observable transformation column in the observables table +#: Observable transformation column in the observable table OBSERVABLE_TRANSFORMATION = "observableTransformation" -#: Noise distribution column in the observables table +#: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" -#: Mandatory columns of observables table +#: Mandatory columns of observable table OBSERVABLE_DF_REQUIRED_COLS = [ OBSERVABLE_ID, OBSERVABLE_FORMULA, NOISE_FORMULA, ] -#: Optional columns of observables table +#: Optional columns of observable table OBSERVABLE_DF_OPTIONAL_COLS = [ OBSERVABLE_NAME, OBSERVABLE_TRANSFORMATION, @@ -382,6 +368,9 @@ PETAB_ENTITY_ID = "petabEntityId" #: Model entity ID column in the mapping table MODEL_ENTITY_ID = "modelEntityId" +#: Arbitrary name +NAME = "name" + #: Required columns of the mapping table MAPPING_DF_REQUIRED_COLS = [PETAB_ENTITY_ID, MODEL_ENTITY_ID] @@ -389,7 +378,7 @@ #: Simulated value column in the simulation table SIMULATION = "simulation" -#: Residual value column in the residuals table +#: Residual value column in the residual table RESIDUAL = "residual" #: ??? NOISE_VALUE = "noiseValue" diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 4d147828..4f8d28ea 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -5,27 +5,31 @@ from warnings import warn -# TODO: remove v1 star imports -from ..v1.calculate import * # noqa: F403, F401, E402 -from ..v1.composite_problem import * # noqa: F403, F401, E402 -from ..v1.core import * # noqa: F403, F401, E402 -from ..v1.format_version import __format_version__ # noqa: F401, E402 -from ..v1.mapping import * # noqa: F403, F401, E402 -from ..v1.measurements import * # noqa: F403, F401, E402 -from ..v1.observables import * # noqa: F403, F401, E402 -from ..v1.parameter_mapping import * # noqa: F403, F401, E402 -from ..v1.parameters import * # noqa: F403, F401, E402 -from ..v1.sampling import * # noqa: F403, F401, E402 -from ..v1.sbml import * # noqa: F403, F401, E402 -from ..v1.simulate import * # noqa: F403, F401, E402 -from ..v1.yaml import * # noqa: F403, F401, E402 - warn( "Support for PEtab2.0 and all of petab.v2 is experimental " "and subject to changes!", stacklevel=1, ) +# TODO: move this module to v2 +from petab.v1.mapping import ( # noqa: F403, F401, E402 + get_mapping_df, + write_mapping_df, +) +from petab.v1.measurements import ( # noqa: F401, E402 + get_measurement_df, + write_measurement_df, +) +from petab.v1.observables import ( # noqa: F401, E402 + get_observable_df, + write_observable_df, +) +from petab.v1.parameters import ( # noqa: F401, E402 + get_parameter_df, + write_parameter_df, +) +from petab.v1.yaml import load_yaml # noqa: F401, E402 + # import after v1 from ..version import __version__ # noqa: F401, E402 from . import ( # noqa: F401, E402 @@ -38,5 +42,5 @@ write_experiment_df, ) from .lint import lint_problem # noqa: F401, E402 -from .models import Model # noqa: F401, E402 -from .problem import Problem # noqa: F401, E402 +from .models import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, Model # noqa: F401, E402 +from .problem import Problem, ProblemConfig # noqa: F401, E402 diff --git a/petab/v2/conditions.py b/petab/v2/conditions.py index 8d5a3067..deea1a0c 100644 --- a/petab/v2/conditions.py +++ b/petab/v2/conditions.py @@ -5,12 +5,8 @@ from pathlib import Path import pandas as pd -import sympy as sp -from .. import v2 -from ..v1.math import sympify_petab -from .C import * -from .lint import assert_no_leading_trailing_whitespace +from ..v1.lint import assert_no_leading_trailing_whitespace __all__ = [ "get_condition_df", @@ -50,19 +46,3 @@ def write_condition_df(df: pd.DataFrame, filename: str | Path) -> None: """ df = get_condition_df(df) df.to_csv(filename, sep="\t", index=False) - - -def get_condition_table_free_symbols(problem: v2.Problem) -> set[sp.Basic]: - """Free symbols from condition table assignments. - - Collects all free symbols from the condition table `targetValue` column. - - :returns: Set of free symbols. - """ - if problem.condition_df is None: - return set() - - free_symbols = set() - for target_value in problem.condition_df[TARGET_VALUE]: - free_symbols |= sympify_petab(target_value).free_symbols - return free_symbols diff --git a/petab/v2/core.py b/petab/v2/core.py index 3c23f652..10088b62 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,37 +2,43 @@ from __future__ import annotations +import re +from collections.abc import Sequence from enum import Enum +from itertools import chain from pathlib import Path +from typing import Annotated, Literal import numpy as np import pandas as pd import sympy as sp from pydantic import ( + AfterValidator, BaseModel, + BeforeValidator, ConfigDict, Field, ValidationInfo, field_validator, model_validator, ) +from typing_extensions import Self from ..v1.lint import is_valid_identifier from ..v1.math import sympify_petab -from . import C +from . import C, get_observable_df __all__ = [ "Observable", - "ObservablesTable", + "ObservableTable", "ObservableTransformation", "NoiseDistribution", "Change", - "ChangeSet", - "ConditionsTable", - "OperationType", + "Condition", + "ConditionTable", "ExperimentPeriod", "Experiment", - "ExperimentsTable", + "ExperimentTable", "Measurement", "MeasurementTable", "Mapping", @@ -43,6 +49,43 @@ ] +def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != -np.inf: + raise ValueError( + f"{info.field_name} value must be finite or -inf but got {v}" + ) + return v + + +def _is_finite_or_pos_inf(v: float, info: ValidationInfo) -> float: + if not np.isfinite(v) and v != np.inf: + raise ValueError( + f"{info.field_name} value must be finite or inf but got {v}" + ) + return v + + +def _not_nan(v: float, info: ValidationInfo) -> float: + if np.isnan(v): + raise ValueError(f"{info.field_name} value must not be nan.") + return v + + +def _convert_nan_to_none(v): + if isinstance(v, float) and np.isnan(v): + return None + return v + + +def _valid_petab_id(v: str) -> str: + """Field validator for PEtab IDs.""" + if not v: + raise ValueError("ID must not be empty.") + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + class ObservableTransformation(str, Enum): """Observable transformation types. @@ -116,7 +159,9 @@ class Observable(BaseModel): """Observable definition.""" #: Observable ID. - id: str = Field(alias=C.OBSERVABLE_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.OBSERVABLE_ID + ) #: Observable name. name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) #: Observable formula. @@ -132,14 +177,10 @@ class Observable(BaseModel): alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v + #: :meta private: + model_config = ConfigDict( + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" + ) @field_validator( "name", @@ -166,13 +207,40 @@ def _sympify(cls, v): return sympify_petab(v) - #: :meta private: - model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True - ) - - -class ObservablesTable(BaseModel): + def _placeholders( + self, type_: Literal["observable", "noise"] + ) -> set[sp.Symbol]: + formula = ( + self.formula + if type_ == "observable" + else self.noise_formula + if type_ == "noise" + else None + ) + if formula is None or formula.is_number: + return set() + + if not (free_syms := formula.free_symbols): + return set() + + # TODO: add field validator to check for 1-based consecutive numbering + t = f"{re.escape(type_)}Parameter" + o = re.escape(self.id) + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + return {s for s in free_syms if pattern.match(str(s))} + + @property + def observable_placeholders(self) -> set[sp.Symbol]: + """Placeholder symbols for the observable formula.""" + return self._placeholders("observable") + + @property + def noise_placeholders(self) -> set[sp.Symbol]: + """Placeholder symbols for the noise formula.""" + return self._placeholders("noise") + + +class ObservableTable(BaseModel): """PEtab observables table.""" #: List of observables. @@ -186,11 +254,12 @@ def __getitem__(self, observable_id: str) -> Observable: raise KeyError(f"Observable ID {observable_id} not found") @classmethod - def from_df(cls, df: pd.DataFrame) -> ObservablesTable: + def from_df(cls, df: pd.DataFrame) -> ObservableTable: """Create an ObservablesTable from a DataFrame.""" if df is None: return cls(observables=[]) + df = get_observable_df(df) observables = [ Observable(**row.to_dict()) for _, row in df.reset_index().iterrows() @@ -200,10 +269,31 @@ def from_df(cls, df: pd.DataFrame) -> ObservablesTable: def to_df(self) -> pd.DataFrame: """Convert the ObservablesTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["observables"]) + records = self.model_dump(by_alias=True)["observables"] + for record in records: + obs = record[C.OBSERVABLE_FORMULA] + noise = record[C.NOISE_FORMULA] + record[C.OBSERVABLE_FORMULA] = ( + None + if obs is None + # TODO: we need a custom printer for sympy expressions + # to avoid '**' + # https://github.com/PEtab-dev/libpetab-python/issues/362 + else str(obs) + if not obs.is_number + else float(obs) + ) + record[C.NOISE_FORMULA] = ( + None + if noise is None + else str(noise) + if not noise.is_number + else float(noise) + ) + return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod - def from_tsv(cls, file_path: str | Path) -> ObservablesTable: + def from_tsv(cls, file_path: str | Path) -> ObservableTable: """Create an ObservablesTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -211,15 +301,15 @@ def from_tsv(cls, file_path: str | Path) -> ObservablesTable: def to_tsv(self, file_path: str | Path) -> None: """Write the ObservablesTable to a TSV file.""" df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) + df.to_csv(file_path, sep="\t", index=True) - def __add__(self, other: Observable) -> ObservablesTable: + def __add__(self, other: Observable) -> ObservableTable: """Add an observable to the table.""" if not isinstance(other, Observable): raise TypeError("Can only add Observable to ObservablesTable") - return ObservablesTable(observables=self.observables + [other]) + return ObservableTable(observables=self.observables + [other]) - def __iadd__(self, other: Observable) -> ObservablesTable: + def __iadd__(self, other: Observable) -> ObservableTable: """Add an observable to the table in place.""" if not isinstance(other, Observable): raise TypeError("Can only add Observable to ObservablesTable") @@ -227,58 +317,34 @@ def __iadd__(self, other: Observable) -> ObservablesTable: return self -# TODO remove?! -class OperationType(str, Enum): - """Operation types for model changes in the PEtab conditions table.""" - - # TODO update names - SET_CURRENT_VALUE = "setCurrentValue" - NO_CHANGE = "noChange" - ... - - class Change(BaseModel): """A change to the model or model state. A change to the model or model state, corresponding to an individual - row of the PEtab conditions table. + row of the PEtab condition table. >>> Change( ... target_id="k1", - ... operation_type=OperationType.SET_CURRENT_VALUE, ... target_value="10", ... ) # doctest: +NORMALIZE_WHITESPACE - Change(target_id='k1', operation_type='setCurrentValue', - target_value=10.0000000000000) + Change(target_id='k1', target_value=10.0000000000000) """ #: The ID of the target entity to change. - target_id: str | None = Field(alias=C.TARGET_ID, default=None) - # TODO: remove?! - operation_type: OperationType = Field(alias=C.OPERATION_TYPE) + target_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.TARGET_ID + ) #: The value to set the target entity to. - target_value: sp.Basic | None = Field(alias=C.TARGET_VALUE, default=None) + target_value: sp.Basic = Field(alias=C.TARGET_VALUE) #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True, use_enum_values=True, + extra="allow", ) - @model_validator(mode="before") - @classmethod - def _validate_id(cls, data: dict): - if ( - data.get("operation_type", data.get(C.OPERATION_TYPE)) - != C.OT_NO_CHANGE - ): - target_id = data.get("target_id", data.get(C.TARGET_ID)) - - if not is_valid_identifier(target_id): - raise ValueError(f"Invalid ID: {target_id}") - return data - @field_validator("target_value", mode="before") @classmethod def _sympify(cls, v): @@ -290,65 +356,57 @@ def _sympify(cls, v): return sympify_petab(v) -class ChangeSet(BaseModel): +class Condition(BaseModel): """A set of changes to the model or model state. A set of simultaneously occurring changes to the model or model state, corresponding to a perturbation of the underlying system. This corresponds to all rows of the PEtab conditions table with the same condition ID. - >>> ChangeSet( + >>> Condition( ... id="condition1", ... changes=[ ... Change( ... target_id="k1", - ... operation_type=OperationType.SET_CURRENT_VALUE, ... target_value="10", ... ) ... ], ... ) # doctest: +NORMALIZE_WHITESPACE - ChangeSet(id='condition1', changes=[Change(target_id='k1', - operation_type='setCurrentValue', target_value=10.0000000000000)]) + Condition(id='condition1', + changes=[Change(target_id='k1', target_value=10.0000000000000)]) """ #: The condition ID. - id: str = Field(alias=C.CONDITION_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.CONDITION_ID + ) #: The changes associated with this condition. changes: list[Change] #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - - def __add__(self, other: Change) -> ChangeSet: + def __add__(self, other: Change) -> Condition: """Add a change to the set.""" if not isinstance(other, Change): - raise TypeError("Can only add Change to ChangeSet") - return ChangeSet(id=self.id, changes=self.changes + [other]) + raise TypeError("Can only add Change to Condition") + return Condition(id=self.id, changes=self.changes + [other]) - def __iadd__(self, other: Change) -> ChangeSet: + def __iadd__(self, other: Change) -> Condition: """Add a change to the set in place.""" if not isinstance(other, Change): - raise TypeError("Can only add Change to ChangeSet") + raise TypeError("Can only add Change to Condition") self.changes.append(other) return self -class ConditionsTable(BaseModel): +class ConditionTable(BaseModel): """PEtab conditions table.""" #: List of conditions. - conditions: list[ChangeSet] = [] + conditions: list[Condition] = [] - def __getitem__(self, condition_id: str) -> ChangeSet: + def __getitem__(self, condition_id: str) -> Condition: """Get a condition by ID.""" for condition in self.conditions: if condition.id == condition_id: @@ -356,29 +414,39 @@ def __getitem__(self, condition_id: str) -> ChangeSet: raise KeyError(f"Condition ID {condition_id} not found") @classmethod - def from_df(cls, df: pd.DataFrame) -> ConditionsTable: + def from_df(cls, df: pd.DataFrame) -> ConditionTable: """Create a ConditionsTable from a DataFrame.""" - if df is None: + if df is None or df.empty: return cls(conditions=[]) conditions = [] for condition_id, sub_df in df.groupby(C.CONDITION_ID): - changes = [Change(**row.to_dict()) for _, row in sub_df.iterrows()] - conditions.append(ChangeSet(id=condition_id, changes=changes)) + changes = [Change(**row) for row in sub_df.to_dict("records")] + conditions.append(Condition(id=condition_id, changes=changes)) return cls(conditions=conditions) def to_df(self) -> pd.DataFrame: """Convert the ConditionsTable to a DataFrame.""" records = [ - {C.CONDITION_ID: condition.id, **change.model_dump()} + {C.CONDITION_ID: condition.id, **change.model_dump(by_alias=True)} for condition in self.conditions for change in condition.changes ] - return pd.DataFrame(records) + for record in records: + record[C.TARGET_VALUE] = ( + float(record[C.TARGET_VALUE]) + if record[C.TARGET_VALUE].is_number + else str(record[C.TARGET_VALUE]) + ) + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.CONDITION_DF_REQUIRED_COLS) + ) @classmethod - def from_tsv(cls, file_path: str | Path) -> ConditionsTable: + def from_tsv(cls, file_path: str | Path) -> ConditionTable: """Create a ConditionsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -388,19 +456,36 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: ChangeSet) -> ConditionsTable: + def __add__(self, other: Condition) -> ConditionTable: """Add a condition to the table.""" - if not isinstance(other, ChangeSet): - raise TypeError("Can only add ChangeSet to ConditionsTable") - return ConditionsTable(conditions=self.conditions + [other]) + if not isinstance(other, Condition): + raise TypeError("Can only add Conditions to ConditionsTable") + return ConditionTable(conditions=self.conditions + [other]) - def __iadd__(self, other: ChangeSet) -> ConditionsTable: + def __iadd__(self, other: Condition) -> ConditionTable: """Add a condition to the table in place.""" - if not isinstance(other, ChangeSet): - raise TypeError("Can only add ChangeSet to ConditionsTable") + if not isinstance(other, Condition): + raise TypeError("Can only add Conditions to ConditionsTable") self.conditions.append(other) return self + @property + def free_symbols(self) -> set[sp.Symbol]: + """Get all free symbols in the conditions table. + + This includes all free symbols in the target values of the changes, + independently of whether it is referenced by any experiment, or + (indirectly) by any measurement. + """ + return set( + chain.from_iterable( + change.target_value.free_symbols + for condition in self.conditions + for change in condition.changes + if change.target_value is not None + ) + ) + class ExperimentPeriod(BaseModel): """A period of a timecourse or experiment defined by a start time @@ -410,18 +495,20 @@ class ExperimentPeriod(BaseModel): """ #: The start time of the period in time units as defined in the model. - start: float = Field(alias=C.TIME) + time: Annotated[float, AfterValidator(_is_finite_or_neg_inf)] = Field( + alias=C.TIME + ) #: The ID of the condition to be applied at the start time. - condition_id: str = Field(alias=C.CONDITION_ID) + condition_id: str | None = Field(alias=C.CONDITION_ID, default=None) #: :meta private: - model_config = ConfigDict(populate_by_name=True) + model_config = ConfigDict(populate_by_name=True, extra="allow") - @field_validator("condition_id") + @field_validator("condition_id", mode="before") @classmethod def _validate_id(cls, condition_id): - if not condition_id: - raise ValueError("ID must not be empty.") + if pd.isna(condition_id) or not condition_id: + return None if not is_valid_identifier(condition_id): raise ValueError(f"Invalid ID: {condition_id}") return condition_id @@ -436,24 +523,17 @@ class Experiment(BaseModel): """ #: The experiment ID. - id: str = Field(alias=C.EXPERIMENT_ID) + id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.EXPERIMENT_ID + ) #: The periods of the experiment. periods: list[ExperimentPeriod] = [] #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - def __add__(self, other: ExperimentPeriod) -> Experiment: """Add a period to the experiment.""" if not isinstance(other, ExperimentPeriod): @@ -468,14 +548,14 @@ def __iadd__(self, other: ExperimentPeriod) -> Experiment: return self -class ExperimentsTable(BaseModel): +class ExperimentTable(BaseModel): """PEtab experiments table.""" #: List of experiments. experiments: list[Experiment] @classmethod - def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: + def from_df(cls, df: pd.DataFrame) -> ExperimentTable: """Create an ExperimentsTable from a DataFrame.""" if df is None: return cls(experiments=[]) @@ -484,7 +564,7 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): periods = [ ExperimentPeriod( - start=row[C.TIME], condition_id=row[C.CONDITION_ID] + time=row[C.TIME], condition_id=row[C.CONDITION_ID] ) for _, row in cur_exp_df.iterrows() ] @@ -494,10 +574,22 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentsTable: def to_df(self) -> pd.DataFrame: """Convert the ExperimentsTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["experiments"]) + records = [ + { + C.EXPERIMENT_ID: experiment.id, + **period.model_dump(by_alias=True), + } + for experiment in self.experiments + for period in experiment.periods + ] + return ( + pd.DataFrame(records) + if records + else pd.DataFrame(columns=C.EXPERIMENT_DF_REQUIRED_COLS) + ) @classmethod - def from_tsv(cls, file_path: str | Path) -> ExperimentsTable: + def from_tsv(cls, file_path: str | Path) -> ExperimentTable: """Create an ExperimentsTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) @@ -507,19 +599,26 @@ def to_tsv(self, file_path: str | Path) -> None: df = self.to_df() df.to_csv(file_path, sep="\t", index=False) - def __add__(self, other: Experiment) -> ExperimentsTable: + def __add__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table.""" if not isinstance(other, Experiment): raise TypeError("Can only add Experiment to ExperimentsTable") - return ExperimentsTable(experiments=self.experiments + [other]) + return ExperimentTable(experiments=self.experiments + [other]) - def __iadd__(self, other: Experiment) -> ExperimentsTable: + def __iadd__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table in place.""" if not isinstance(other, Experiment): raise TypeError("Can only add Experiment to ExperimentsTable") self.experiments.append(other) return self + def __getitem__(self, item): + """Get an experiment by ID.""" + for experiment in self.experiments: + if experiment.id == item: + return experiment + raise KeyError(f"Experiment ID {item} not found") + class Measurement(BaseModel): """A measurement. @@ -533,9 +632,13 @@ class Measurement(BaseModel): #: The experiment ID. experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None) #: The time point of the measurement in time units as defined in the model. - time: float = Field(alias=C.TIME) + time: Annotated[float, AfterValidator(_is_finite_or_pos_inf)] = Field( + alias=C.TIME + ) #: The measurement value. - measurement: float = Field(alias=C.MEASUREMENT) + measurement: Annotated[float, AfterValidator(_not_nan)] = Field( + alias=C.MEASUREMENT + ) #: Values for placeholder parameters in the observable formula. observable_parameters: list[sp.Basic] = Field( alias=C.OBSERVABLE_PARAMETERS, default_factory=list @@ -547,7 +650,7 @@ class Measurement(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True + arbitrary_types_allowed=True, populate_by_name=True, extra="allow" ) @field_validator( @@ -578,12 +681,17 @@ def _validate_id(cls, v, info: ValidationInfo): ) @classmethod def _sympify_list(cls, v): + if v is None: + return [] + if isinstance(v, float) and np.isnan(v): return [] + if isinstance(v, str): v = v.split(C.PARAMETER_SEPARATOR) - else: + elif not isinstance(v, Sequence): v = [v] + return [sympify_petab(x) for x in v] @@ -613,7 +721,16 @@ def from_df( def to_df(self) -> pd.DataFrame: """Convert the MeasurementTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["measurements"]) + records = self.model_dump(by_alias=True)["measurements"] + for record in records: + record[C.OBSERVABLE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PARAMETERS]) + ) + record[C.NOISE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PARAMETERS]) + ) + + return pd.DataFrame(records) @classmethod def from_tsv(cls, file_path: str | Path) -> MeasurementTable: @@ -644,23 +761,20 @@ class Mapping(BaseModel): """Mapping PEtab entities to model entities.""" #: PEtab entity ID. - petab_id: str = Field(alias=C.PETAB_ENTITY_ID) + petab_id: Annotated[str, AfterValidator(_valid_petab_id)] = Field( + alias=C.PETAB_ENTITY_ID + ) #: Model entity ID. - model_id: str = Field(alias=C.MODEL_ENTITY_ID) + model_id: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = ( + Field(alias=C.MODEL_ENTITY_ID, default=None) + ) + #: Arbitrary name + name: Annotated[str | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.NAME, default=None + ) #: :meta private: - model_config = ConfigDict(populate_by_name=True) - - @field_validator( - "petab_id", - ) - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v + model_config = ConfigDict(populate_by_name=True, extra="allow") class MappingTable(BaseModel): @@ -683,7 +797,12 @@ def from_df(cls, df: pd.DataFrame) -> MappingTable: def to_df(self) -> pd.DataFrame: """Convert the MappingTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["mappings"]) + res = ( + pd.DataFrame(self.model_dump(by_alias=True)["mappings"]) + if self.mappings + else pd.DataFrame(columns=C.MAPPING_DF_REQUIRED_COLS) + ) + return res.set_index([C.PETAB_ENTITY_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> MappingTable: @@ -709,6 +828,20 @@ def __iadd__(self, other: Mapping) -> MappingTable: self.mappings.append(other) return self + def __getitem__(self, petab_id: str) -> Mapping: + """Get a mapping by PEtab ID.""" + for mapping in self.mappings: + if mapping.petab_id == petab_id: + return mapping + raise KeyError(f"PEtab ID {petab_id} not found") + + def get(self, petab_id, default=None): + """Get a mapping by PEtab ID or return a default value.""" + try: + return self[petab_id] + except KeyError: + return default + class Parameter(BaseModel): """Parameter definition.""" @@ -722,18 +855,24 @@ class Parameter(BaseModel): #: Nominal value. nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) #: Parameter scale. + # TODO: keep or remove? scale: ParameterScale = Field( alias=C.PARAMETER_SCALE, default=ParameterScale.LIN ) + # TODO: change to bool in PEtab, or serialize as 0/1? + # https://github.com/PEtab-dev/PEtab/discussions/610 #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) + # TODO priors + # pydantic vs. petab.v1.priors.Prior #: :meta private: model_config = ConfigDict( arbitrary_types_allowed=True, populate_by_name=True, use_enum_values=True, + extra="allow", ) @field_validator("id") @@ -745,6 +884,28 @@ def _validate_id(cls, v): raise ValueError(f"Invalid ID: {v}") return v + @field_validator("estimate", mode="before") + @classmethod + def _validate_estimate_before(cls, v): + if isinstance(v, bool): + return v + + # FIXME: grace period for 0/1 values until the test suite was updated + if v in [0, 1, "0", "1"]: + return bool(int(v)) + + # TODO: clarify whether extra whitespace is allowed + if isinstance(v, str): + v = v.strip().lower() + if v == "true": + return True + if v == "false": + return False + + raise ValueError( + f"Invalid value for estimate: {v}. Must be `true` or `false`." + ) + @field_validator("lb", "ub", "nominal_value") @classmethod def _convert_nan_to_none(cls, v): @@ -752,6 +913,33 @@ def _convert_nan_to_none(cls, v): return None return v + @model_validator(mode="after") + def _validate(self) -> Self: + if not self.estimate and self.nominal_value is None: + raise ValueError( + "Non-estimated parameter must have a nominal value" + ) + + if self.estimate and (self.lb is None or self.ub is None): + raise ValueError( + "Estimated parameter must have lower and upper bounds set" + ) + + # TODO: also if not estimated? + if ( + self.estimate + and self.lb is not None + and self.ub is not None + and self.lb >= self.ub + ): + raise ValueError("Lower bound must be less than upper bound.") + + # TODO parameterScale? + + # TODO priorType, priorParameters + + return self + class ParameterTable(BaseModel): """PEtab parameter table.""" @@ -774,7 +962,9 @@ def from_df(cls, df: pd.DataFrame) -> ParameterTable: def to_df(self) -> pd.DataFrame: """Convert the ParameterTable to a DataFrame.""" - return pd.DataFrame(self.model_dump()["parameters"]) + return pd.DataFrame( + self.model_dump(by_alias=True)["parameters"] + ).set_index([C.PARAMETER_ID]) @classmethod def from_tsv(cls, file_path: str | Path) -> ParameterTable: @@ -799,3 +989,15 @@ def __iadd__(self, other: Parameter) -> ParameterTable: raise TypeError("Can only add Parameter to ParameterTable") self.parameters.append(other) return self + + def __getitem__(self, item) -> Parameter: + """Get a parameter by ID.""" + for parameter in self.parameters: + if parameter.id == item: + return parameter + raise KeyError(f"Parameter ID {item} not found") + + @property + def n_estimated(self) -> int: + """Number of estimated parameters.""" + return sum(p.estimate for p in self.parameters) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2deb0ebd..71d655dd 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -4,42 +4,15 @@ import logging from abc import ABC, abstractmethod -from collections import OrderedDict +from collections import Counter, OrderedDict from collections.abc import Set from dataclasses import dataclass, field from enum import IntEnum from pathlib import Path -import numpy as np import pandas as pd +import sympy as sp -from .. import v2 -from ..v1.lint import ( - _check_df, - assert_measured_observables_defined, - assert_measurements_not_null, - assert_measurements_numeric, - assert_model_parameters_in_condition_or_parameter_table, - assert_no_leading_trailing_whitespace, - assert_parameter_bounds_are_numeric, - assert_parameter_estimate_is_boolean, - assert_parameter_id_is_string, - assert_parameter_prior_parameters_are_valid, - assert_parameter_prior_type_is_valid, - assert_parameter_scale_is_valid, - assert_unique_observable_ids, - assert_unique_parameter_ids, - check_ids, - check_observable_df, - check_parameter_bounds, -) -from ..v1.measurements import ( - assert_overrides_match_parameter_count, - split_parameter_replacement_list, -) -from ..v1.observables import get_output_parameters, get_placeholders -from ..v1.visualize.lint import validate_visualization_df -from ..v2.C import * from .problem import Problem logger = logging.getLogger(__name__) @@ -51,17 +24,18 @@ "ValidationError", "ValidationTask", "CheckModel", - "CheckTableExists", - "CheckValidPetabIdColumn", - "CheckMeasurementTable", - "CheckConditionTable", - "CheckObservableTable", - "CheckParameterTable", + "CheckProblemConfig", + "CheckPosLogMeasurements", + "CheckValidConditionTargets", + "CheckUniquePrimaryKeys", "CheckExperimentTable", "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", "CheckVisualizationTable", + "CheckUnusedExperiments", + "CheckObservablesDoNotShadowModelEntities", + "CheckUnusedConditions", "lint_problem", "default_validation_tasks", ] @@ -91,6 +65,7 @@ class ValidationIssue: level: ValidationIssueSeverity message: str + task: str | None = None def __post_init__(self): if not isinstance(self.level, ValidationIssueSeverity): @@ -101,6 +76,18 @@ def __post_init__(self): def __str__(self): return f"{self.level.name}: {self.message}" + def _get_task_name(self): + """Get the name of the ValidationTask that raised this error.""" + import inspect + + # walk up the stack until we find the ValidationTask.run method + for frame_info in inspect.stack(): + frame = frame_info.frame + if "self" in frame.f_locals: + task = frame.f_locals["self"] + if isinstance(task, ValidationTask): + return task.__class__.__name__ + @dataclass class ValidationError(ValidationIssue): @@ -109,23 +96,23 @@ class ValidationError(ValidationIssue): level: ValidationIssueSeverity = field( default=ValidationIssueSeverity.ERROR, init=False ) - task: str | None = None def __post_init__(self): if self.task is None: self.task = self._get_task_name() - def _get_task_name(self): - """Get the name of the ValidationTask that raised this error.""" - import inspect - # walk up the stack until we find the ValidationTask.run method - for frame_info in inspect.stack(): - frame = frame_info.frame - if "self" in frame.f_locals: - task = frame.f_locals["self"] - if isinstance(task, ValidationTask): - return task.__class__.__name__ +@dataclass +class ValidationWarning(ValidationIssue): + """A validation result with level WARNING.""" + + level: ValidationIssueSeverity = field( + default=ValidationIssueSeverity.WARNING, init=False + ) + + def __post_init__(self): + if self.task is None: + self.task = self._get_task_name() class ValidationResultList(list[ValidationIssue]): @@ -139,17 +126,25 @@ def log( *, logger: logging.Logger = logger, min_level: ValidationIssueSeverity = ValidationIssueSeverity.INFO, + max_level: ValidationIssueSeverity = ValidationIssueSeverity.CRITICAL, ): - """Log the validation results.""" + """Log the validation results. + + :param logger: The logger to use for logging. + Defaults to the module logger. + :param min_level: The minimum severity level to log. + :param max_level: The maximum severity level to log. + """ for result in self: - if result.level < min_level: + if result.level < min_level or result.level > max_level: continue + msg = f"{result.level.name}: {result.message} [{result.task}]" if result.level == ValidationIssueSeverity.INFO: - logger.info(result.message) + logger.info(msg) elif result.level == ValidationIssueSeverity.WARNING: - logger.warning(result.message) + logger.warning(msg) elif result.level >= ValidationIssueSeverity.ERROR: - logger.error(result.message) + logger.error(msg) if not self: logger.info("PEtab format check completed successfully.") @@ -167,7 +162,7 @@ def lint_problem(problem: Problem | str | Path) -> ValidationResultList: Arguments: problem: PEtab problem to check. Instance of :class:`Problem` or path - to a PEtab problem yaml file. + to a PEtab problem YAML file. Returns: A list of validation results. Empty if no issues were found. """ @@ -195,6 +190,38 @@ def __call__(self, *args, **kwargs): return self.run(*args, **kwargs) +class CheckProblemConfig(ValidationTask): + """A task to validate the configuration of a PEtab problem. + + This corresponds to checking the problem YAML file semantics. + """ + + def run(self, problem: Problem) -> ValidationIssue | None: + if (config := problem.config) is None or config.base_path is None: + # This is allowed, so we can validate in-memory problems + # that don't have the list of files populated + return None + # TODO: decide when this should be emitted + # return ValidationWarning("Problem configuration is missing.") + + # TODO: we need some option for validating partial vs full problems + # check for unset but required files + missing_files = [] + if not config.parameter_file: + missing_files.append("parameters") + + if not [p.measurement_files for p in config.problems]: + missing_files.append("measurements") + + if not [p.observable_files for p in config.problems]: + missing_files.append("observables") + + if missing_files: + return ValidationError( + f"Missing files: {', '.join(missing_files)}" + ) + + class CheckModel(ValidationTask): """A task to validate the model of a PEtab problem.""" @@ -207,134 +234,133 @@ def run(self, problem: Problem) -> ValidationIssue | None: return ValidationError("Model is invalid.") -class CheckTableExists(ValidationTask): - """A task to check if a table exists in the PEtab problem.""" - - def __init__(self, table_name: str): - if table_name not in ["measurement", "observable", "parameter"]: - # all others are optional - raise ValueError( - f"Table name {table_name} is not supported. " - "Supported table names are 'measurement', 'observable', " - "'parameter'." - ) - self.table_name = table_name +class CheckMeasuredObservablesDefined(ValidationTask): + """A task to check that all observables referenced by the measurements + are defined.""" def run(self, problem: Problem) -> ValidationIssue | None: - if getattr(problem, f"{self.table_name}_df") is None: - return ValidationError(f"{self.table_name} table is missing.") - + used_observables = { + m.observable_id for m in problem.measurement_table.measurements + } + defined_observables = { + o.id for o in problem.observable_table.observables + } + if undefined_observables := (used_observables - defined_observables): + return ValidationError( + f"Observables {undefined_observables} used in " + "measurement table but not defined in observable table." + ) -class CheckValidPetabIdColumn(ValidationTask): - """A task to check that a given column contains only valid PEtab IDs.""" - def __init__( - self, - table_name: str, - column_name: str, - required_column: bool = True, - ignore_nan: bool = False, - ): - self.table_name = table_name - self.column_name = column_name - self.required_column = required_column - self.ignore_nan = ignore_nan +class CheckOverridesMatchPlaceholders(ValidationTask): + """A task to check that the number of observable/noise parameters + in the measurements match the number of placeholders in the observables.""" def run(self, problem: Problem) -> ValidationIssue | None: - df = getattr(problem, f"{self.table_name}_df") - if df is None: - return - - if self.column_name not in df.columns: - if self.required_column: - return ValidationError( - f"Column {self.column_name} is missing in " - f"{self.table_name} table." + observable_parameters_count = { + o.id: len(o.observable_placeholders) + for o in problem.observable_table.observables + } + noise_parameters_count = { + o.id: len(o.noise_placeholders) + for o in problem.observable_table.observables + } + messages = [] + for m in problem.measurement_table.measurements: + # check observable parameters + try: + expected = observable_parameters_count[m.observable_id] + except KeyError: + messages.append( + f"Observable {m.observable_id} used in measurement " + f"table is not defined." ) - return + continue - try: - ids = df[self.column_name].values - if self.ignore_nan: - ids = ids[~pd.isna(ids)] - check_ids(ids, kind=self.column_name) - except ValueError as e: - return ValidationError(str(e)) + actual = len(m.observable_parameters) + if actual != expected: + formula = problem.observable_table[m.observable_id].formula + messages.append( + f"Mismatch of observable parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) -class CheckMeasurementTable(ValidationTask): - """A task to validate the measurement table of a PEtab problem.""" + # check noise parameters + expected = noise_parameters_count[m.observable_id] + actual = len(m.noise_parameters) + if actual != expected: + # no overrides defined, but a numerical sigma can be provided + # anyway + if len(m.noise_parameters) != 1 or ( + len(m.noise_parameters) == 1 + and m.noise_parameters[0].is_number + ): + messages.append( + "No placeholders have been specified in the " + f"noise model for observable {m.observable_id}, " + "but a parameter ID " + "or multiple overrides were specified in the " + "noiseParameters column." + ) + else: + formula = problem.observable_table[ + m.observable_id + ].noise_formula + messages.append( + f"Mismatch of noise parameter overrides for " + f"{m.observable_id} ({formula})" + f"in:\n{m}\n" + f"Expected {expected} but got {actual}" + ) - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.measurement_df is None: - return + if messages: + return ValidationError("\n".join(messages)) - df = problem.measurement_df - try: - _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") - for column_name in MEASUREMENT_DF_REQUIRED_COLS: - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) +class CheckPosLogMeasurements(ValidationTask): + """Check that measurements for observables with + log-transformation are positive.""" - for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df and not np.issubdtype( - df[column_name].dtype, np.number - ): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name + def run(self, problem: Problem) -> ValidationIssue | None: + from .core import ObservableTransformation as ot + + log_observables = { + o.id + for o in problem.observable_table.observables + if o.transformation in [ot.LOG, ot.LOG10] + } + if log_observables: + for m in problem.measurement_table.measurements: + if m.measurement <= 0 and m.observable_id in log_observables: + return ValidationError( + "Measurements with observable " + f"log transformation must be " + f"positive, but {m.measurement} <= 0 for {m}" ) - if problem.observable_df is not None: - assert_measured_observables_defined(df, problem.observable_df) - assert_overrides_match_parameter_count( - df, problem.observable_df - ) - if OBSERVABLE_TRANSFORMATION in problem.observable_df: - # Check for positivity of measurements in case of - # log-transformation - assert_unique_observable_ids(problem.observable_df) - # If the above is not checked, in the following loop - # trafo may become a pandas Series - for measurement, obs_id in zip( - df[MEASUREMENT], df[OBSERVABLE_ID], strict=True - ): - trafo = problem.observable_df.loc[ - obs_id, OBSERVABLE_TRANSFORMATION - ] - if measurement <= 0.0 and trafo in [LOG, LOG10]: - raise ValueError( - "Measurements with observable " - f"transformation {trafo} must be " - f"positive, but {measurement} <= 0." - ) - - assert_measurements_not_null(df) - assert_measurements_numeric(df) - except AssertionError as e: - return ValidationError(str(e)) +class CheckMeasuredExperimentsDefined(ValidationTask): + """A task to check that all experiments referenced by measurements + are defined.""" + def run(self, problem: Problem) -> ValidationIssue | None: # TODO: introduce some option for validation of partial vs full # problem. if this is supposed to be a complete problem, a missing # condition table should be an error if the measurement table refers # to conditions, otherwise it should maximally be a warning - used_experiments = set(problem.measurement_df[EXPERIMENT_ID].values) - # handle default-experiment - used_experiments = set( - filter( - lambda x: not pd.isna(x), - used_experiments, - ) - ) + used_experiments = { + m.experiment_id + for m in problem.measurement_table.measurements + if m.experiment_id is not None + } + # check that measured experiments exist - available_experiments = ( - set(problem.experiment_df[EXPERIMENT_ID].unique()) - if problem.experiment_df is not None - else set() - ) + available_experiments = { + e.id for e in problem.experiment_table.experiments + } if missing_experiments := (used_experiments - available_experiments): return ValidationError( "Measurement table references experiments that " @@ -343,78 +369,85 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -class CheckConditionTable(ValidationTask): - """A task to validate the condition table of a PEtab problem.""" +class CheckValidConditionTargets(ValidationTask): + """Check that all condition table targets are valid.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.condition_df is None: - return - - df = problem.condition_df - - try: - _check_df(df, CONDITION_DF_REQUIRED_COLS, "condition") - check_ids(df[CONDITION_ID], kind="condition") - check_ids(df[TARGET_ID], kind="target") - except AssertionError as e: - return ValidationError(str(e)) - - # TODO: check value types - - if problem.model is None: - return - - # check targets are valid allowed_targets = set( problem.model.get_valid_ids_for_condition_table() ) - if problem.observable_df is not None: - allowed_targets |= set( - get_output_parameters( - model=problem.model, - observable_df=problem.observable_df, - mapping_df=problem.mapping_df, - ) - ) - if problem.mapping_df is not None: - allowed_targets |= set(problem.mapping_df.index.values) - invalid = set(df[TARGET_ID].unique()) - allowed_targets - if invalid: + allowed_targets |= set(get_output_parameters(problem)) + allowed_targets |= { + m.petab_id + for m in problem.mapping_table.mappings + if m.model_id is not None + } + + used_targets = { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } + + if invalid := (used_targets - allowed_targets): return ValidationError( f"Condition table contains invalid targets: {invalid}" ) - # TODO check that all value types are valid for the given targets +class CheckUniquePrimaryKeys(ValidationTask): + """Check that all primary keys are unique.""" -class CheckObservableTable(ValidationTask): - """A task to validate the observable table of a PEtab problem.""" + def run(self, problem: Problem) -> ValidationIssue | None: + # TODO: check that IDs are globally unique + # -- replaces CheckObservablesDoNotShadowModelEntities - def run(self, problem: Problem): - if problem.observable_df is None: - return + # check for uniqueness of all primary keys + counter = Counter(c.id for c in problem.condition_table.conditions) + duplicates = {id for id, count in counter.items() if count > 1} - try: - check_observable_df( - problem.observable_df, + if duplicates: + return ValidationError( + f"Condition table contains duplicate IDs: {duplicates}" ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + + counter = Counter(o.id for o in problem.observable_table.observables) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Observable table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(e.id for e in problem.experiment_table.experiments) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Experiment table contains duplicate IDs: {duplicates}" + ) + + counter = Counter(p.id for p in problem.parameter_table.parameters) + duplicates = {id for id, count in counter.items() if count > 1} + + if duplicates: + return ValidationError( + f"Parameter table contains duplicate IDs: {duplicates}" ) class CheckObservablesDoNotShadowModelEntities(ValidationTask): """A task to check that observable IDs do not shadow model entities.""" + # TODO: all PEtab entity IDs must be disjoint from the model entity IDs def run(self, problem: Problem) -> ValidationIssue | None: - if problem.observable_df is None or problem.model is None: - return + if not problem.observable_table.observables or problem.model is None: + return None shadowed_entities = [ - obs_id - for obs_id in problem.observable_df.index - if problem.model.has_entity_with_id(obs_id) + o.id + for o in problem.observable_table.observables + if problem.model.has_entity_with_id(o.id) ] if shadowed_entities: return ValidationError( @@ -422,103 +455,23 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) -class CheckParameterTable(ValidationTask): - """A task to validate the parameter table of a PEtab problem.""" - - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.parameter_df is None: - return - - try: - df = problem.parameter_df - _check_df(df, PARAMETER_DF_REQUIRED_COLS[1:], "parameter") - - if df.index.name != PARAMETER_ID: - return ValidationError( - f"Parameter table has wrong index {df.index.name}." - f" Expected {PARAMETER_ID}.", - ) - - check_ids(df.index.values, kind="parameter") - - for column_name in PARAMETER_DF_REQUIRED_COLS[ - 1: - ]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): - assert_no_leading_trailing_whitespace( - df[column_name].values, column_name - ) - - # nominal value is required for non-estimated parameters - non_estimated_par_ids = list( - df.index[ - (df[ESTIMATE] != 1) - | ( - pd.api.types.is_string_dtype(df[ESTIMATE]) - and df[ESTIMATE] != "1" - ) - ] - ) - # TODO implement as validators - # `assert_has_fixed_parameter_nominal_values` - # and `assert_correct_table_dtypes` - if non_estimated_par_ids: - if NOMINAL_VALUE not in df: - return ValidationError( - "Parameter table contains parameters " - f"{non_estimated_par_ids} that are not " - "specified to be estimated, " - f"but column {NOMINAL_VALUE} is missing." - ) - try: - df.loc[non_estimated_par_ids, NOMINAL_VALUE].apply(float) - except ValueError: - return ValidationError( - f"Expected numeric values for `{NOMINAL_VALUE}` " - "in parameter table " - "for all non-estimated parameters." - ) - - assert_parameter_id_is_string(df) - assert_parameter_scale_is_valid(df) - assert_parameter_bounds_are_numeric(df) - assert_parameter_estimate_is_boolean(df) - assert_unique_parameter_ids(df) - check_parameter_bounds(df) - assert_parameter_prior_type_is_valid(df) - assert_parameter_prior_parameters_are_valid(df) - - except AssertionError as e: - return ValidationError(str(e)) - - class CheckExperimentTable(ValidationTask): """A task to validate the experiment table of a PEtab problem.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.experiment_df is None: - return - - df = problem.experiment_df - - try: - _check_df(df, EXPERIMENT_DF_REQUIRED_COLS, "experiment") - except AssertionError as e: - return ValidationError(str(e)) + messages = [] + for experiment in problem.experiment_table.experiments: + # Check that there are no duplicate timepoints + counter = Counter(period.time for period in experiment.periods) + duplicates = {time for time, count in counter.items() if count > 1} + if duplicates: + messages.append( + f"Experiment {experiment.id} contains duplicate " + f"timepoints: {duplicates}" + ) - # valid timepoints - invalid = [] - for time in df[TIME].values: - try: - time = float(time) - if not np.isfinite(time) and time != -np.inf: - invalid.append(time) - except ValueError: - invalid.append(time) - if invalid: - return ValidationError( - f"Invalid timepoints in experiment table: {invalid}" - ) + if messages: + return ValidationError("\n".join(messages)) class CheckExperimentConditionsExist(ValidationTask): @@ -526,30 +479,24 @@ class CheckExperimentConditionsExist(ValidationTask): in the condition table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if problem.experiment_df is None: - return - - if ( - problem.condition_df is None - and problem.experiment_df is not None - and not problem.experiment_df.empty - ): - return ValidationError( - "Experiment table is non-empty, " - "but condition table is missing." - ) + messages = [] + available_conditions = { + c.id for c in problem.condition_table.conditions + } + for experiment in problem.experiment_table.experiments: + missing_conditions = { + period.condition_id + for period in experiment.periods + if period.condition_id is not None + } - available_conditions + if missing_conditions: + messages.append( + f"Experiment {experiment.id} requires conditions that are " + f"not present in the condition table: {missing_conditions}" + ) - required_conditions = problem.experiment_df[CONDITION_ID].unique() - existing_conditions = problem.condition_df[CONDITION_ID].unique() - - missing_conditions = set(required_conditions) - set( - existing_conditions - ) - if missing_conditions: - return ValidationError( - f"Experiment table contains conditions that are not present " - f"in the condition table: {missing_conditions}" - ) + if messages: + return ValidationError("\n".join(messages)) class CheckAllParametersPresentInParameterTable(ValidationTask): @@ -557,34 +504,25 @@ class CheckAllParametersPresentInParameterTable(ValidationTask): with no additional ones.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.parameter_df is None - or problem.observable_df is None - or problem.measurement_df is None - ): - return + if problem.model is None: + return None required = get_required_parameters_for_parameter_table(problem) allowed = get_valid_parameters_for_parameter_table(problem) - actual = set(problem.parameter_df.index) + actual = {p.id for p in problem.parameter_table.parameters} missing = required - actual extraneous = actual - allowed # missing parameters might be present under a different name based on # the mapping table - if missing and problem.mapping_df is not None: + if missing: model_to_petab_mapping = {} - for map_from, map_to in zip( - problem.mapping_df.index.values, - problem.mapping_df[MODEL_ENTITY_ID], - strict=True, - ): - if map_to in model_to_petab_mapping: - model_to_petab_mapping[map_to].append(map_from) + for m in problem.mapping_table.mappings: + if m.model_id in model_to_petab_mapping: + model_to_petab_mapping[m.model_id].append(m.petab_id) else: - model_to_petab_mapping[map_to] = [map_from] + model_to_petab_mapping[m.model_id] = [m.petab_id] missing = { missing_id for missing_id in missing @@ -613,23 +551,116 @@ class CheckValidParameterInConditionOrParameterTable(ValidationTask): present in the condition or parameter table.""" def run(self, problem: Problem) -> ValidationIssue | None: - if ( - problem.model is None - or problem.condition_df is None - or problem.parameter_df is None - ): - return - - try: - assert_model_parameters_in_condition_or_parameter_table( - problem.model, - problem.condition_df, - problem.parameter_df, - problem.mapping_df, + if problem.model is None: + return None + + allowed_in_condition_cols = set( + problem.model.get_valid_ids_for_condition_table() + ) + allowed_in_condition_cols |= { + m.petab_id + for m in problem.mapping_table.mappings + if not pd.isna(m.model_id) + and ( + # mapping table entities mapping to already allowed parameters + m.model_id in allowed_in_condition_cols + # mapping table entities mapping to species + or problem.model.is_state_variable(m.model_id) ) - except AssertionError as e: - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, message=str(e) + } + + allowed_in_parameter_table = get_valid_parameters_for_parameter_table( + problem + ) + + entities_in_condition_table = { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } + entities_in_parameter_table = { + p.id for p in problem.parameter_table.parameters + } + + disallowed_in_condition = { + x + for x in (entities_in_condition_table - allowed_in_condition_cols) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + if disallowed_in_condition: + is_or_are = "is" if len(disallowed_in_condition) == 1 else "are" + return ValidationError( + f"{disallowed_in_condition} {is_or_are} not " + "allowed to occur in condition table " + "columns." + ) + + disallowed_in_parameters = { + x + for x in (entities_in_parameter_table - allowed_in_parameter_table) + # we only check model entities here, not output parameters + if problem.model.has_entity_with_id(x) + } + + if disallowed_in_parameters: + is_or_are = "is" if len(disallowed_in_parameters) == 1 else "are" + return ValidationError( + f"{disallowed_in_parameters} {is_or_are} not " + "allowed to occur in the parameters table." + ) + + in_both = entities_in_condition_table & entities_in_parameter_table + if in_both: + is_or_are = "is" if len(in_both) == 1 else "are" + return ValidationError( + f"{in_both} {is_or_are} present in both " + "the condition table and the parameter table." + ) + + +class CheckUnusedExperiments(ValidationTask): + """A task to check for experiments that are not used in the measurements + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_experiments = { + m.experiment_id + for m in problem.measurement_table.measurements + if m.experiment_id is not None + } + available_experiments = { + e.id for e in problem.experiment_table.experiments + } + + unused_experiments = available_experiments - used_experiments + if unused_experiments: + return ValidationWarning( + f"Experiments {unused_experiments} are not used in the " + "measurements table." + ) + + +class CheckUnusedConditions(ValidationTask): + """A task to check for conditions that are not used in the experiments + table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_conditions = { + p.condition_id + for e in problem.experiment_table.experiments + for p in e.periods + if p.condition_id is not None + } + available_conditions = { + c.id for c in problem.condition_table.conditions + } + + unused_conditions = available_conditions - used_conditions + if unused_conditions: + return ValidationWarning( + f"Conditions {unused_conditions} are not used in the " + "experiments table." ) @@ -638,7 +669,9 @@ class CheckVisualizationTable(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: if problem.visualization_df is None: - return + return None + + from ..v1.visualize.lint import validate_visualization_df if validate_visualization_df(problem): return ValidationIssue( @@ -651,14 +684,9 @@ def get_valid_parameters_for_parameter_table( problem: Problem, ) -> set[str]: """ - Get set of parameters which may be present inside the parameter table + Get the set of parameters which may be present inside the parameter table - Arguments: - model: PEtab model - condition_df: PEtab condition table - observable_df: PEtab observable table - measurement_df: PEtab measurement table - mapping_df: PEtab mapping table for additional checks + :param problem: The PEtab problem Returns: Set of parameter IDs which PEtab allows to be present in the @@ -672,71 +700,50 @@ def get_valid_parameters_for_parameter_table( # - remove parameters for which condition table columns exist # - remove placeholder parameters # (only partial overrides are not supported) - model = problem.model - condition_df = problem.condition_df - observable_df = problem.observable_df - measurement_df = problem.measurement_df - mapping_df = problem.mapping_df # must not go into parameter table - blackset = set() + invalid = set(get_placeholders(problem)) - if observable_df is not None: - placeholders = set(get_placeholders(observable_df)) - - # collect assignment targets - blackset |= placeholders - - if condition_df is not None: - blackset |= set(condition_df.columns.values) - {CONDITION_NAME} + # condition table targets + invalid |= { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } # don't use sets here, to have deterministic ordering, - # e.g. for creating parameter tables + # e.g., for creating parameter tables parameter_ids = OrderedDict.fromkeys( p - for p in model.get_valid_parameters_for_parameter_table() - if p not in blackset + for p in problem.model.get_valid_parameters_for_parameter_table() + if p not in invalid ) - if mapping_df is not None: - for from_id, to_id in mapping_df[MODEL_ENTITY_ID].items(): - if to_id in parameter_ids.keys(): - parameter_ids[from_id] = None + for mapping in problem.mapping_table.mappings: + if mapping.model_id and mapping.model_id in parameter_ids.keys(): + parameter_ids[mapping.petab_id] = None - if observable_df is not None: - # add output parameters from observables table - output_parameters = get_output_parameters( - observable_df=observable_df, model=model - ) - for p in output_parameters: - if p not in blackset: - parameter_ids[p] = None + # add output parameters from observables table + output_parameters = get_output_parameters(problem) + for p in output_parameters: + if p not in invalid: + parameter_ids[p] = None # Append parameters from measurement table, unless they occur as condition # table columns def append_overrides(overrides): for p in overrides: - if isinstance(p, str) and p not in blackset: - parameter_ids[p] = None - - if measurement_df is not None: - for _, row in measurement_df.iterrows(): - # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list( - row.get(NOISE_PARAMETERS, None) - ) - ) + if isinstance(p, sp.Symbol) and (str_p := str(p)) not in invalid: + parameter_ids[str_p] = None + + for measurement in problem.measurement_table.measurements: + # we trust that the number of overrides matches + append_overrides(measurement.observable_parameters) + append_overrides(measurement.noise_parameters) # Append parameter overrides from condition table - if condition_df is not None: - for p in v2.conditions.get_condition_table_free_symbols(problem): - parameter_ids[str(p)] = None + for p in problem.condition_table.free_symbols: + parameter_ids[str(p)] = None return set(parameter_ids.keys()) @@ -756,34 +763,30 @@ def get_required_parameters_for_parameter_table( that are not defined in the model. """ parameter_ids = set() + condition_targets = { + change.target_id + for cond in problem.condition_table.conditions + for change in cond.changes + } # Add parameters from measurement table, unless they are fixed parameters def append_overrides(overrides): parameter_ids.update( - p + str_p for p in overrides - if isinstance(p, str) - and ( - problem.condition_df is None - or p not in problem.condition_df[TARGET_ID] - ) + if isinstance(p, sp.Symbol) + and (str_p := str(p)) not in condition_targets ) - for _, row in problem.measurement_df.iterrows(): + for m in problem.measurement_table.measurements: # we trust that the number of overrides matches - append_overrides( - split_parameter_replacement_list( - row.get(OBSERVABLE_PARAMETERS, None) - ) - ) - append_overrides( - split_parameter_replacement_list(row.get(NOISE_PARAMETERS, None)) - ) + append_overrides(m.observable_parameters) + append_overrides(m.noise_parameters) - # remove `observable_ids` when - # `get_output_parameters` is updated for PEtab v2/v1.1, where - # observable IDs are allowed in observable formulae - observable_ids = set(problem.observable_df.index) + # TODO remove `observable_ids` when + # `get_output_parameters` is updated for PEtab v2/v1.1, where + # observable IDs are allowed in observable formulae + observable_ids = {o.id for o in problem.observable_table.observables} # Add output parameters except for placeholders for formula_type, placeholder_sources in ( @@ -801,13 +804,11 @@ def append_overrides(overrides): ), ): output_parameters = get_output_parameters( - problem.observable_df, - problem.model, - mapping_df=problem.mapping_df, + problem, **formula_type, ) placeholders = get_placeholders( - problem.observable_df, + problem, **placeholder_sources, ) parameter_ids.update( @@ -820,35 +821,119 @@ def append_overrides(overrides): # model parameter_ids.update( str(p) - for p in v2.conditions.get_condition_table_free_symbols(problem) + for p in problem.condition_table.free_symbols if not problem.model.has_entity_with_id(str(p)) ) # parameters that are overridden via the condition table are not allowed - if problem.condition_df is not None: - parameter_ids -= set(problem.condition_df[TARGET_ID].unique()) + parameter_ids -= condition_targets return parameter_ids +def get_output_parameters( + problem: Problem, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get output parameters + + Returns IDs of parameters used in observable and noise formulas that are + not defined in the model. + + Arguments: + problem: The PEtab problem + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of output parameter IDs + """ + formulas = [] + if observables: + formulas.extend( + o.formula for o in problem.observable_table.observables + ) + if noise: + formulas.extend( + o.noise_formula for o in problem.observable_table.observables + ) + output_parameters = OrderedDict() + + for formula in formulas: + free_syms = sorted( + formula.free_symbols, + key=lambda symbol: symbol.name, + ) + for free_sym in free_syms: + sym = str(free_sym) + if problem.model.symbol_allowed_in_observable_formula(sym): + continue + + # does it map to a model entity? + + if ( + (mapped := problem.mapping_table.get(sym)) is not None + and mapped.model_id is not None + and problem.model.symbol_allowed_in_observable_formula( + mapped.model_id + ) + ): + continue + + output_parameters[sym] = None + + return list(output_parameters.keys()) + + +def get_placeholders( + problem: Problem, + observables: bool = True, + noise: bool = True, +) -> list[str]: + """Get all placeholder parameters from observable table observableFormulas + and noiseFormulas. + + Arguments: + problem: The PEtab problem + observables: Include parameters from observableFormulas + noise: Include parameters from noiseFormulas + + Returns: + List of placeholder parameters from observable table observableFormulas + and noiseFormulas. + """ + # collect placeholder parameters overwritten by + # {observable,noise}Parameters + placeholders = [] + for o in problem.observable_table.observables: + if observables: + placeholders.extend(map(str, o.observable_placeholders)) + if noise: + placeholders.extend(map(str, o.noise_placeholders)) + + from ..v1.core import unique_preserve_order + + return unique_preserve_order(placeholders) + + #: Validation tasks that should be run on any PEtab problem default_validation_tasks = [ - CheckTableExists("measurement"), - CheckTableExists("observable"), - CheckTableExists("parameter"), + CheckProblemConfig(), CheckModel(), - CheckMeasurementTable(), - CheckConditionTable(), + CheckUniquePrimaryKeys(), + CheckMeasuredObservablesDefined(), + CheckPosLogMeasurements(), + CheckOverridesMatchPlaceholders(), + CheckValidConditionTargets(), CheckExperimentTable(), - CheckValidPetabIdColumn("measurement", EXPERIMENT_ID, ignore_nan=True), - CheckValidPetabIdColumn("experiment", EXPERIMENT_ID), - CheckValidPetabIdColumn("experiment", CONDITION_ID), CheckExperimentConditionsExist(), - CheckObservableTable(), CheckObservablesDoNotShadowModelEntities(), - CheckParameterTable(), CheckAllParametersPresentInParameterTable(), + CheckValidParameterInConditionOrParameterTable(), + CheckUnusedExperiments(), + CheckUnusedConditions(), # TODO: atomize checks, update to long condition table, re-enable # CheckVisualizationTable(), - CheckValidParameterInConditionOrParameterTable(), + # TODO validate mapping table ] diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 7f675db0..d7b6fb68 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -1,9 +1,12 @@ """Convert PEtab version 1 problems to version 2.""" +from __future__ import annotations + import shutil from contextlib import suppress from itertools import chain from pathlib import Path +from tempfile import TemporaryDirectory from urllib.parse import urlparse from uuid import uuid4 @@ -14,36 +17,48 @@ from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version from .models import MODEL_TYPE_SBML -from .problem import ProblemConfig __all__ = ["petab1to2"] -def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): +def petab1to2( + yaml_config: Path | str, output_dir: Path | str = None +) -> v2.Problem | None: """Convert from PEtab 1.0 to PEtab 2.0 format. Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. - Parameters - ---------- - yaml_config: dict | Path | str + :param yaml_config: The PEtab problem as dictionary or YAML file name. - output_dir: Path | str + :param output_dir: The output directory to save the converted PEtab problem, or ``None``, to return a :class:`petab.v2.Problem` instance. - Raises - ------ - ValueError + :raises ValueError: If the input is invalid or does not pass linting or if the generated files do not pass linting. """ - if output_dir is None: - # TODO requires petab.v2.Problem - raise NotImplementedError("Not implemented yet.") - elif isinstance(yaml_config, dict): - raise ValueError("If output_dir is given, yaml_config must be a file.") + if output_dir is not None: + return petab_files_1to2(yaml_config, output_dir) + + with TemporaryDirectory() as tmp_dir: + petab_files_1to2(yaml_config, tmp_dir) + return v2.Problem.from_yaml(Path(tmp_dir, Path(yaml_config).name)) + + +def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): + """Convert PEtab files from PEtab 1.0 to PEtab 2.0. + + + :param yaml_config: + The PEtab problem as dictionary or YAML file name. + :param output_dir: + The output directory to save the converted PEtab problem. + :raises ValueError: + If the input is invalid or does not pass linting or if the generated + files do not pass linting. + """ if isinstance(yaml_config, Path | str): yaml_file = str(yaml_config) path_prefix = get_path_prefix(yaml_file) @@ -56,11 +71,12 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): get_dest_path = lambda filename: f"{output_dir}/{filename}" # noqa: E731 - # Validate original PEtab problem + # Validate the original PEtab problem validate(yaml_config, path_prefix=path_prefix) if get_major_version(yaml_config) != 1: raise ValueError("PEtab problem is not version 1.") petab_problem = v1.Problem.from_yaml(yaml_file or yaml_config) + # TODO: move to mapping table # get rid of conditionName column if present (unsupported in v2) petab_problem.condition_df = petab_problem.condition_df.drop( columns=[v1.C.CONDITION_NAME], errors="ignore" @@ -72,7 +88,7 @@ def petab1to2(yaml_config: Path | str, output_dir: Path | str = None): # Update YAML file new_yaml_config = _update_yaml(yaml_config) - new_yaml_config = ProblemConfig(**new_yaml_config) + new_yaml_config = v2.ProblemConfig(**new_yaml_config) # Update tables # condition tables, observable tables, SBML files, parameter table: @@ -218,7 +234,7 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: measurement_df, get_dest_path(measurement_file) ) - # Write new YAML file + # Write the new YAML file new_yaml_file = output_dir / Path(yaml_file).name new_yaml_config.to_yaml(new_yaml_file) @@ -226,10 +242,19 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: validation_issues = v2.lint_problem(new_yaml_file) if validation_issues: - raise ValueError( - "Generated PEtab v2 problem did not pass linting: " - f"{validation_issues}" + sev = v2.lint.ValidationIssueSeverity + validation_issues.log(max_level=sev.WARNING) + errors = "\n".join( + map( + str, + (i for i in validation_issues if i.level > sev.WARNING), + ) ) + if errors: + raise ValueError( + "The generated PEtab v2 problem did not pass linting: " + f"{errors}" + ) def _update_yaml(yaml_config: dict) -> dict: @@ -287,7 +312,7 @@ def v1v2_condition_df( condition_df = condition_df.copy().reset_index() with suppress(KeyError): # conditionName was dropped in PEtab v2 - condition_df.drop(columns=[v2.C.CONDITION_NAME], inplace=True) + condition_df.drop(columns=[v1.C.CONDITION_NAME], inplace=True) condition_df = condition_df.melt( id_vars=[v1.C.CONDITION_ID], @@ -301,24 +326,8 @@ def v1v2_condition_df( columns=[ v2.C.CONDITION_ID, v2.C.TARGET_ID, - v2.C.OPERATION_TYPE, v2.C.TARGET_VALUE, ] ) - targets = set(condition_df[v2.C.TARGET_ID].unique()) - valid_cond_pars = set(model.get_valid_parameters_for_parameter_table()) - # entities to which we assign constant values - constant = targets & valid_cond_pars - # entities to which we assign initial values - initial = set() - for target in targets - constant: - if model.is_state_variable(target): - initial.add(target) - else: - raise NotImplementedError( - f"Unable to determine value type {target} in the condition " - "table." - ) - condition_df[v2.C.OPERATION_TYPE] = v2.C.OT_CUR_VAL return condition_df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index d18b4b7c..2b564e0c 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -13,10 +13,10 @@ from typing import TYPE_CHECKING import pandas as pd +import sympy as sp from pydantic import AnyUrl, BaseModel, Field from ..v1 import ( - core, mapping, measurements, observables, @@ -25,17 +25,18 @@ sampling, yaml, ) +from ..v1.core import concat_tables, get_visualization_df from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 from ..versions import parse_version -from . import conditions, experiments +from . import conditions, core, experiments if TYPE_CHECKING: from ..v2.lint import ValidationResultList, ValidationTask -__all__ = ["Problem"] +__all__ = ["Problem", "ProblemConfig"] class Problem: @@ -49,135 +50,102 @@ class Problem: - experiment table - measurement table - parameter table - - observables table + - observable table - mapping table Optionally, it may contain visualization tables. See also :doc:`petab:v2/documentation_data_format`. - - Parameters: - condition_df: PEtab condition table - experiment_df: PEtab experiment table - measurement_df: PEtab measurement table - parameter_df: PEtab parameter table - observable_df: PEtab observable table - visualization_df: PEtab visualization table - mapping_df: PEtab mapping table - model: The underlying model - extensions_config: Information on the extensions used """ def __init__( self, model: Model = None, - condition_df: pd.DataFrame = None, - experiment_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, + condition_table: core.ConditionTable = None, + experiment_table: core.ExperimentTable = None, + observable_table: core.ObservableTable = None, + measurement_table: core.MeasurementTable = None, + parameter_table: core.ParameterTable = None, + mapping_table: core.MappingTable = None, visualization_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, - extensions_config: dict = None, config: ProblemConfig = None, ): from ..v2.lint import default_validation_tasks - self.condition_df: pd.DataFrame | None = condition_df - self.experiment_df: pd.DataFrame | None = experiment_df - self.measurement_df: pd.DataFrame | None = measurement_df - self.parameter_df: pd.DataFrame | None = parameter_df - self.visualization_df: pd.DataFrame | None = visualization_df - self.observable_df: pd.DataFrame | None = observable_df - self.mapping_df: pd.DataFrame | None = mapping_df + self.config = config self.model: Model | None = model - self.extensions_config = extensions_config or {} self.validation_tasks: list[ValidationTask] = ( default_validation_tasks.copy() ) - self.config = config - from .core import ( - ChangeSet, - ConditionsTable, - Experiment, - ExperimentsTable, - MappingTable, - MeasurementTable, - Observable, - ObservablesTable, - ParameterTable, + self.observable_table = observable_table or core.ObservableTable( + observables=[] ) - - self.observables_table: ObservablesTable = ObservablesTable.from_df( - self.observable_df + self.condition_table = condition_table or core.ConditionTable( + conditions=[] ) - self.observables: list[Observable] = self.observables_table.observables - - self.conditions_table: ConditionsTable = ConditionsTable.from_df( - self.condition_df + self.experiment_table = experiment_table or core.ExperimentTable( + experiments=[] ) - self.conditions: list[ChangeSet] = self.conditions_table.conditions - - self.experiments_table: ExperimentsTable = ExperimentsTable.from_df( - self.experiment_df + self.measurement_table = measurement_table or core.MeasurementTable( + measurements=[] ) - self.experiments: list[Experiment] = self.experiments_table.experiments - - self.measurement_table: MeasurementTable = MeasurementTable.from_df( - self.measurement_df, + self.mapping_table = mapping_table or core.MappingTable(mappings=[]) + self.parameter_table = parameter_table or core.ParameterTable( + parameters=[] ) - self.mapping_table: MappingTable = MappingTable.from_df( - self.mapping_df - ) - self.parameter_table: ParameterTable = ParameterTable.from_df( - self.parameter_df - ) - # TODO: visualization table + self.visualization_df = visualization_df def __str__(self): model = f"with model ({self.model})" if self.model else "without model" - experiments = ( - f"{self.experiment_df.shape[0]} experiments" - if self.experiment_df is not None - else "without experiments table" - ) + ne = len(self.experiment_table.experiments) + experiments = f"{ne} experiments" - conditions = ( - f"{self.condition_df.shape[0]} conditions" - if self.condition_df is not None - else "without conditions table" - ) + nc = len(self.condition_table.conditions) + conditions = f"{nc} conditions" - observables = ( - f"{self.observable_df.shape[0]} observables" - if self.observable_df is not None - else "without observables table" - ) + no = len(self.observable_table.observables) + observables = f"{no} observables" - measurements = ( - f"{self.measurement_df.shape[0]} measurements" - if self.measurement_df is not None - else "without measurements table" - ) + nm = len(self.measurement_table.measurements) + measurements = f"{nm} measurements" - if self.parameter_df is not None: - num_estimated_parameters = ( - sum(self.parameter_df[ESTIMATE] == 1) - if ESTIMATE in self.parameter_df - else self.parameter_df.shape[0] - ) - parameters = f"{num_estimated_parameters} estimated parameters" - else: - parameters = "without parameter_df table" + nest = self.parameter_table.n_estimated + parameters = f"{nest} estimated parameters" return ( f"PEtab Problem {model}, {conditions}, {experiments}, " f"{observables}, {measurements}, {parameters}" ) + def __getitem__(self, key): + """Get PEtab entity by ID. + + This allows accessing PEtab entities such as conditions, experiments, + observables, and parameters by their ID. + + Accessing model entities is not currently not supported. + """ + for table in ( + self.condition_table, + self.experiment_table, + self.observable_table, + self.measurement_table, + self.parameter_table, + self.mapping_table, + ): + if table is not None: + try: + return table[key] + except KeyError: + pass + + raise KeyError( + f"Entity with ID '{key}' not found in the PEtab problem" + ) + @staticmethod def from_yaml( yaml_config: dict | Path | str, base_path: str | Path = None @@ -235,10 +203,10 @@ def get_path(filename): if yaml.is_composite_problem(yaml_config): raise ValueError( - "petab.Problem.from_yaml() can only be used for " + "petab.v2.Problem.from_yaml() can only be used for " "yaml files comprising a single model. " "Consider using " - "petab.CompositeProblem.from_yaml() instead." + "petab.v2.CompositeProblem.from_yaml() instead." ) config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file @@ -273,9 +241,7 @@ def get_path(filename): measurement_files = [get_path(f) for f in problem0.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( - core.concat_tables( - measurement_files, measurements.get_measurement_df - ) + concat_tables(measurement_files, measurements.get_measurement_df) if measurement_files else None ) @@ -283,7 +249,7 @@ def get_path(filename): condition_files = [get_path(f) for f in problem0.condition_files] # If there are multiple tables, we will merge them condition_df = ( - core.concat_tables(condition_files, conditions.get_condition_df) + concat_tables(condition_files, conditions.get_condition_df) if condition_files else None ) @@ -291,7 +257,7 @@ def get_path(filename): experiment_files = [get_path(f) for f in problem0.experiment_files] # If there are multiple tables, we will merge them experiment_df = ( - core.concat_tables(experiment_files, experiments.get_experiment_df) + concat_tables(experiment_files, experiments.get_experiment_df) if experiment_files else None ) @@ -301,7 +267,7 @@ def get_path(filename): ] # If there are multiple tables, we will merge them visualization_df = ( - core.concat_tables(visualization_files, core.get_visualization_df) + concat_tables(visualization_files, get_visualization_df) if visualization_files else None ) @@ -309,7 +275,7 @@ def get_path(filename): observable_files = [get_path(f) for f in problem0.observable_files] # If there are multiple tables, we will merge them observable_df = ( - core.concat_tables(observable_files, observables.get_observable_df) + concat_tables(observable_files, observables.get_observable_df) if observable_files else None ) @@ -317,12 +283,12 @@ def get_path(filename): mapping_files = [get_path(f) for f in problem0.mapping_files] # If there are multiple tables, we will merge them mapping_df = ( - core.concat_tables(mapping_files, mapping.get_mapping_df) + concat_tables(mapping_files, mapping.get_mapping_df) if mapping_files else None ) - return Problem( + return Problem.from_dfs( condition_df=condition_df, experiment_df=experiment_df, measurement_df=measurement_df, @@ -331,20 +297,66 @@ def get_path(filename): model=model, visualization_df=visualization_df, mapping_df=mapping_df, - extensions_config=config.extensions, + config=config, + ) + + @staticmethod + def from_dfs( + model: Model = None, + condition_df: pd.DataFrame = None, + experiment_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + visualization_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + mapping_df: pd.DataFrame = None, + config: ProblemConfig = None, + ): + """ + Construct a PEtab problem from dataframes. + + Parameters: + condition_df: PEtab condition table + experiment_df: PEtab experiment table + measurement_df: PEtab measurement table + parameter_df: PEtab parameter table + observable_df: PEtab observable table + visualization_df: PEtab visualization table + mapping_df: PEtab mapping table + model: The underlying model + config: The PEtab problem configuration + """ + + observable_table = core.ObservableTable.from_df(observable_df) + condition_table = core.ConditionTable.from_df(condition_df) + experiment_table = core.ExperimentTable.from_df(experiment_df) + measurement_table = core.MeasurementTable.from_df(measurement_df) + mapping_table = core.MappingTable.from_df(mapping_df) + parameter_table = core.ParameterTable.from_df(parameter_df) + + return Problem( + model=model, + condition_table=condition_table, + experiment_table=experiment_table, + observable_table=observable_table, + measurement_table=measurement_table, + parameter_table=parameter_table, + mapping_table=mapping_table, + visualization_df=visualization_df, + config=config, ) @staticmethod def from_combine(filename: Path | str) -> Problem: """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). - See also :py:func:`petab.create_combine_archive`. + See also :py:func:`petab.v2.create_combine_archive`. Arguments: filename: Path to the PEtab-COMBINE archive Returns: - A :py:class:`petab.Problem` instance. + A :py:class:`petab.v2.Problem` instance. """ # function-level import, because module-level import interfered with # other SWIG interfaces @@ -390,13 +402,75 @@ def get_problem(problem: str | Path | Problem) -> Problem: "or a PEtab problem object." ) + @property + def condition_df(self) -> pd.DataFrame | None: + """Condition table as DataFrame.""" + # TODO: return empty df? + return self.condition_table.to_df() if self.condition_table else None + + @condition_df.setter + def condition_df(self, value: pd.DataFrame): + self.condition_table = core.ConditionTable.from_df(value) + + @property + def experiment_df(self) -> pd.DataFrame | None: + """Experiment table as DataFrame.""" + return self.experiment_table.to_df() if self.experiment_table else None + + @experiment_df.setter + def experiment_df(self, value: pd.DataFrame): + self.experiment_table = core.ExperimentTable.from_df(value) + + @property + def measurement_df(self) -> pd.DataFrame | None: + """Measurement table as DataFrame.""" + return ( + self.measurement_table.to_df() if self.measurement_table else None + ) + + @measurement_df.setter + def measurement_df(self, value: pd.DataFrame): + self.measurement_table = core.MeasurementTable.from_df(value) + + @property + def parameter_df(self) -> pd.DataFrame | None: + """Parameter table as DataFrame.""" + return self.parameter_table.to_df() if self.parameter_table else None + + @parameter_df.setter + def parameter_df(self, value: pd.DataFrame): + self.parameter_table = core.ParameterTable.from_df(value) + + @property + def observable_df(self) -> pd.DataFrame | None: + """Observable table as DataFrame.""" + return self.observable_table.to_df() if self.observable_table else None + + @observable_df.setter + def observable_df(self, value: pd.DataFrame): + self.observable_table = core.ObservableTable.from_df(value) + + @property + def mapping_df(self) -> pd.DataFrame | None: + """Mapping table as DataFrame.""" + return self.mapping_table.to_df() if self.mapping_table else None + + @mapping_df.setter + def mapping_df(self, value: pd.DataFrame): + self.mapping_table = core.MappingTable.from_df(value) + def get_optimization_parameters(self) -> list[str]: """ - Return list of optimization parameter IDs. + Get the list of optimization parameter IDs from parameter table. - See :py:func:`petab.parameters.get_optimization_parameters`. + Arguments: + parameter_df: PEtab parameter DataFrame + + Returns: + A list of IDs of parameters selected for optimization + (i.e., those with estimate = True). """ - return parameters.get_optimization_parameters(self.parameter_df) + return [p.id for p in self.parameter_table.parameters if p.estimate] def get_optimization_parameter_scales(self) -> dict[str, str]: """ @@ -404,13 +478,14 @@ def get_optimization_parameter_scales(self) -> dict[str, str]: See :py:func:`petab.parameters.get_optimization_parameters`. """ + # TODO: to be removed in v2? return parameters.get_optimization_parameter_scaling(self.parameter_df) def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. """ - return list(self.observable_df.index) + return [o.id for o in self.observable_table.observables] def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): """Apply mask of only free or only fixed values. @@ -420,9 +495,9 @@ def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): v: The full vector the mask is to be applied to. free: - Whether to return free parameters, i.e. parameters to estimate. + Whether to return free parameters, i.e., parameters to estimate. fixed: - Whether to return fixed parameters, i.e. parameters not to + Whether to return fixed parameters, i.e., parameters not to estimate. Returns @@ -452,7 +527,7 @@ def get_x_ids(self, free: bool = True, fixed: bool = True): ------- The parameter IDs. """ - v = list(self.parameter_df.index.values) + v = [p.id for p in self.parameter_table.parameters] return self._apply_mask(v, free=free, fixed=fixed) @property @@ -472,7 +547,7 @@ def x_fixed_ids(self) -> list[str]: def get_x_nominal( self, free: bool = True, fixed: bool = True, scaled: bool = False - ): + ) -> list: """Generic function to get parameter nominal values. Parameters @@ -490,10 +565,10 @@ def get_x_nominal( ------- The parameter nominal values. """ - if NOMINAL_VALUE in self.parameter_df: - v = list(self.parameter_df[NOMINAL_VALUE]) - else: - v = [nan] * len(self.parameter_df) + v = [ + p.nominal_value if p.nominal_value is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( @@ -555,7 +630,10 @@ def get_lb( ------- The lower parameter bounds. """ - v = list(self.parameter_df[LOWER_BOUND]) + v = [ + p.lb if p.lb is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -592,7 +670,10 @@ def get_ub( ------- The upper parameter bounds. """ - v = list(self.parameter_df[UPPER_BOUND]) + v = [ + p.ub if p.ub is not None else nan + for p in self.parameter_table.parameters + ] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -612,19 +693,22 @@ def ub_scaled(self) -> list: @property def x_free_indices(self) -> list[int]: """Parameter table estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val != 0] + return [ + i + for i, p in enumerate(self.parameter_table.parameters) + if p.estimate + ] @property def x_fixed_indices(self) -> list[int]: """Parameter table non-estimated parameter indices.""" - estimated = list(self.parameter_df[ESTIMATE]) - return [j for j, val in enumerate(estimated) if val == 0] - - def get_simulation_conditions_from_measurement_df(self) -> pd.DataFrame: - """See :func:`petab.get_simulation_conditions`.""" - return measurements.get_simulation_conditions(self.measurement_df) + return [ + i + for i, p in enumerate(self.parameter_table.parameters) + if not p.estimate + ] + # TODO remove in v2? def get_optimization_to_simulation_parameter_mapping(self, **kwargs): """ See @@ -669,6 +753,7 @@ def sample_parameter_startpoints_dict( ) ] + # TODO: remove in v2? def unscale_parameters( self, x_dict: dict[str, float], @@ -693,6 +778,7 @@ def unscale_parameters( for parameter_id, parameter_value in x_dict.items() } + # TODO: remove in v2? def scale_parameters( self, x_dict: dict[str, float], @@ -725,8 +811,9 @@ def n_estimated(self) -> int: @property def n_measurements(self) -> int: """Number of measurements.""" - return self.measurement_df[MEASUREMENT].notna().sum() + return len(self.measurement_table.measurements) + # TODO: update after implementing priors in `Parameter` @property def n_priors(self) -> int: """Number of priors.""" @@ -753,13 +840,14 @@ def validate( ) validation_results = ValidationResultList() - if self.extensions_config: + if self.config.extensions: + extensions = ",".join(e.name for e in self.config.extensions) validation_results.append( ValidationIssue( ValidationIssueSeverity.WARNING, "Validation of PEtab extensions is not yet implemented, " "but the given problem uses the following extensions: " - f"{'', ''.join(self.extensions_config.keys())}", + f"{extensions}", ) ) @@ -782,7 +870,7 @@ def validate( return validation_results def add_condition( - self, id_: str, name: str = None, **kwargs: tuple[str, Number | str] + self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr ): """Add a simulation condition to the problem. @@ -790,29 +878,25 @@ def add_condition( id_: The condition id name: The condition name kwargs: Entities to be added to the condition table in the form - `target_id=(value_type, target_value)`. + `target_id=target_value`. """ if not kwargs: - return - records = [ - { - CONDITION_ID: id_, - TARGET_ID: target_id, - OPERATION_TYPE: op_type, - TARGET_VALUE: target_value, - } - for target_id, (op_type, target_value) in kwargs.items() + raise ValueError("Cannot add condition without any changes") + + changes = [ + core.Change(target_id=target_id, target_value=target_value) + for target_id, target_value in kwargs.items() ] - # TODO: is the condition name supported in v2? - if name is not None: - for record in records: - record[CONDITION_NAME] = [name] - tmp_df = pd.DataFrame(records) - self.condition_df = ( - pd.concat([self.condition_df, tmp_df], ignore_index=True) - if self.condition_df is not None - else tmp_df + self.condition_table.conditions.append( + core.Condition(id=id_, changes=changes) ) + if name is not None: + self.mapping_table.mappings.append( + core.Mapping( + petab_id=id_, + name=name, + ) + ) def add_observable( self, @@ -837,25 +921,20 @@ def add_observable( """ record = { - OBSERVABLE_ID: [id_], - OBSERVABLE_FORMULA: [formula], + OBSERVABLE_ID: id_, + OBSERVABLE_FORMULA: formula, } if name is not None: - record[OBSERVABLE_NAME] = [name] + record[OBSERVABLE_NAME] = name if noise_formula is not None: - record[NOISE_FORMULA] = [noise_formula] + record[NOISE_FORMULA] = noise_formula if noise_distribution is not None: - record[NOISE_DISTRIBUTION] = [noise_distribution] + record[NOISE_DISTRIBUTION] = noise_distribution if transform is not None: - record[OBSERVABLE_TRANSFORMATION] = [transform] + record[OBSERVABLE_TRANSFORMATION] = transform record.update(kwargs) - tmp_df = pd.DataFrame(record).set_index([OBSERVABLE_ID]) - self.observable_df = ( - pd.concat([self.observable_df, tmp_df]) - if self.observable_df is not None - else tmp_df - ) + self.observable_table += core.Observable(**record) def add_parameter( self, @@ -888,42 +967,37 @@ def add_parameter( kwargs: additional columns/values to add to the parameter table """ record = { - PARAMETER_ID: [id_], + PARAMETER_ID: id_, } if estimate is not None: - record[ESTIMATE] = [int(estimate)] + record[ESTIMATE] = estimate if nominal_value is not None: - record[NOMINAL_VALUE] = [nominal_value] + record[NOMINAL_VALUE] = nominal_value if scale is not None: - record[PARAMETER_SCALE] = [scale] + record[PARAMETER_SCALE] = scale if lb is not None: - record[LOWER_BOUND] = [lb] + record[LOWER_BOUND] = lb if ub is not None: - record[UPPER_BOUND] = [ub] + record[UPPER_BOUND] = ub if init_prior_type is not None: - record[INITIALIZATION_PRIOR_TYPE] = [init_prior_type] + record[INITIALIZATION_PRIOR_TYPE] = init_prior_type if init_prior_pars is not None: if not isinstance(init_prior_pars, str): init_prior_pars = PARAMETER_SEPARATOR.join( map(str, init_prior_pars) ) - record[INITIALIZATION_PRIOR_PARAMETERS] = [init_prior_pars] + record[INITIALIZATION_PRIOR_PARAMETERS] = init_prior_pars if obj_prior_type is not None: - record[OBJECTIVE_PRIOR_TYPE] = [obj_prior_type] + record[OBJECTIVE_PRIOR_TYPE] = obj_prior_type if obj_prior_pars is not None: if not isinstance(obj_prior_pars, str): obj_prior_pars = PARAMETER_SEPARATOR.join( map(str, obj_prior_pars) ) - record[OBJECTIVE_PRIOR_PARAMETERS] = [obj_prior_pars] + record[OBJECTIVE_PRIOR_PARAMETERS] = obj_prior_pars record.update(kwargs) - tmp_df = pd.DataFrame(record).set_index([PARAMETER_ID]) - self.parameter_df = ( - pd.concat([self.parameter_df, tmp_df]) - if self.parameter_df is not None - else tmp_df - ) + self.parameter_table += core.Parameter(**record) def add_measurement( self, @@ -931,8 +1005,8 @@ def add_measurement( experiment_id: str, time: float, measurement: float, - observable_parameters: Sequence[str | float] = None, - noise_parameters: Sequence[str | float] = None, + observable_parameters: Sequence[str | float] | str | float = None, + noise_parameters: Sequence[str | float] | str | float = None, ): """Add a measurement to the problem. @@ -944,44 +1018,35 @@ def add_measurement( observable_parameters: The observable parameters noise_parameters: The noise parameters """ - record = { - OBSERVABLE_ID: [obs_id], - EXPERIMENT_ID: [experiment_id], - TIME: [time], - MEASUREMENT: [measurement], - } - if observable_parameters is not None: - record[OBSERVABLE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(map(str, observable_parameters)) - ] - if noise_parameters is not None: - record[NOISE_PARAMETERS] = [ - PARAMETER_SEPARATOR.join(map(str, noise_parameters)) - ] - - tmp_df = pd.DataFrame(record) - self.measurement_df = ( - pd.concat([self.measurement_df, tmp_df]) - if self.measurement_df is not None - else tmp_df - ).reset_index(drop=True) - - def add_mapping(self, petab_id: str, model_id: str): + if observable_parameters is not None and not isinstance( + observable_parameters, Sequence + ): + observable_parameters = [observable_parameters] + if noise_parameters is not None and not isinstance( + noise_parameters, Sequence + ): + noise_parameters = [noise_parameters] + + self.measurement_table.measurements.append( + core.Measurement( + observable_id=obs_id, + experiment_id=experiment_id, + time=time, + measurement=measurement, + observable_parameters=observable_parameters, + noise_parameters=noise_parameters, + ) + ) + + def add_mapping(self, petab_id: str, model_id: str, name: str = None): """Add a mapping table entry to the problem. Arguments: petab_id: The new PEtab-compatible ID mapping to `model_id` model_id: The ID of some entity in the model """ - record = { - PETAB_ENTITY_ID: [petab_id], - MODEL_ENTITY_ID: [model_id], - } - tmp_df = pd.DataFrame(record).set_index([PETAB_ENTITY_ID]) - self.mapping_df = ( - pd.concat([self.mapping_df, tmp_df]) - if self.mapping_df is not None - else tmp_df + self.mapping_table.mappings.append( + core.Mapping(petab_id=petab_id, model_id=model_id, name=name) ) def add_experiment(self, id_: str, *args): @@ -996,22 +1061,41 @@ def add_experiment(self, id_: str, *args): "Arguments must be pairs of timepoints and condition IDs." ) - records = [] - for i in range(0, len(args), 2): - records.append( - { - EXPERIMENT_ID: id_, - TIME: args[i], - CONDITION_ID: args[i + 1], - } - ) - tmp_df = pd.DataFrame(records) - self.experiment_df = ( - pd.concat([self.experiment_df, tmp_df]) - if self.experiment_df is not None - else tmp_df + periods = [ + core.ExperimentPeriod(time=args[i], condition_id=args[i + 1]) + for i in range(0, len(args), 2) + ] + + self.experiment_table.experiments.append( + core.Experiment(id=id_, periods=periods) + ) + + def __iadd__(self, other): + """Add Observable, Parameter, Measurement, Condition, or Experiment""" + from .core import ( + Condition, + Experiment, + Measurement, + Observable, + Parameter, ) + if isinstance(other, Observable): + self.observable_table += other + elif isinstance(other, Parameter): + self.parameter_table += other + elif isinstance(other, Measurement): + self.measurement_table += other + elif isinstance(other, Condition): + self.condition_table += other + elif isinstance(other, Experiment): + self.experiment_table += other + else: + raise ValueError( + f"Cannot add object of type {type(other)} to Problem." + ) + return self + class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" @@ -1044,19 +1128,25 @@ class ExtensionConfig(BaseModel): class ProblemConfig(BaseModel): """The PEtab problem configuration.""" + #: The path to the PEtab problem configuration. filepath: str | AnyUrl | None = Field( None, description="The path to the PEtab problem configuration.", exclude=True, ) + #: The base path to resolve relative paths. base_path: str | AnyUrl | None = Field( None, description="The base path to resolve relative paths.", exclude=True, ) + #: The PEtab format version. format_version: str = "2.0.0" + #: The path to the parameter file, relative to ``base_path``. parameter_file: str | AnyUrl | None = None + #: The list of problems in the configuration. problems: list[SubProblem] = [] + #: Extensiions used by the problem. extensions: list[ExtensionConfig] = [] def to_yaml(self, filename: str | Path): diff --git a/pytest.ini b/pytest.ini index 4aa44158..b5f0c04d 100644 --- a/pytest.ini +++ b/pytest.ini @@ -1,5 +1,8 @@ [pytest] -addopts = --doctest-modules +addopts = --doctest-modules --durations=0 --durations-min=10 +testpaths = + petab + tests filterwarnings = error # TODO: until tests are reorganized for petab.v1 @@ -8,5 +11,3 @@ filterwarnings = ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning - # TODO: until we have proper v2 support - ignore:The experiment table is not yet supported and will be ignored:UserWarning diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 828aac88..dae78154 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -74,6 +74,11 @@ def test_ids(): """Test symbols in expressions.""" assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True) + # test that sympy expressions that are invalid in PEtab raise an error + # TODO: handle these cases after + # https://github.com/PEtab-dev/libpetab-python/pull/364 + # sympify_petab(sp.Symbol("föö")) + def test_syntax_error(): """Test exceptions upon syntax errors.""" diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index 4b982fcf..612606ab 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -1,21 +1,21 @@ import logging -import tempfile import pytest +from petab.v2 import Problem from petab.v2.petab1to2 import petab1to2 def test_petab1to2_remote(): + """Test that we can upgrade a remote PEtab 1.0.0 problem.""" yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" ) - with tempfile.TemporaryDirectory(prefix="test_petab1to2") as tmpdirname: - # TODO verify that the v2 files match "ground truth" - # in `petabtests/cases/v2.0.0/sbml/0001/_0001.yaml` - petab1to2(yaml_url, tmpdirname) + problem = petab1to2(yaml_url) + assert isinstance(problem, Problem) + assert len(problem.measurement_table.measurements) try: @@ -36,10 +36,10 @@ def test_benchmark_collection(problem_id): logging.basicConfig(level=logging.DEBUG) if problem_id == "Froehlich_CellSystems2018": + # this is mostly about 6M sympifications in the condition table pytest.skip("Too slow. Re-enable once we are faster.") yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) - with tempfile.TemporaryDirectory( - prefix=f"test_petab1to2_{problem_id}" - ) as tmpdirname: - petab1to2(yaml_path, tmpdirname) + problem = petab1to2(yaml_path) + assert isinstance(problem, Problem) + assert len(problem.measurement_table.measurements) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index a7eae851..181f5523 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -1,39 +1,36 @@ import tempfile from pathlib import Path -from petab.v2.core import ( - Change, - ChangeSet, - ConditionsTable, - Experiment, - ExperimentPeriod, - ObservablesTable, - OperationType, -) +import pytest +import sympy as sp +from pydantic import ValidationError +from sympy.abc import x, y + +from petab.v2.core import * from petab.v2.petab1to2 import petab1to2 example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" -def test_observables_table_round_trip(): +def test_observable_table_round_trip(): file = example_dir_fujita / "Fujita_observables.tsv" - observables = ObservablesTable.from_tsv(file) + observables = ObservableTable.from_tsv(file) with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = Path(tmp_dir) / "observables.tsv" observables.to_tsv(tmp_file) - observables2 = ObservablesTable.from_tsv(tmp_file) + observables2 = ObservableTable.from_tsv(tmp_file) assert observables == observables2 -def test_conditions_table_round_trip(): +def test_condition_table_round_trip(): with tempfile.TemporaryDirectory() as tmp_dir: petab1to2(example_dir_fujita / "Fujita.yaml", tmp_dir) file = Path(tmp_dir, "Fujita_experimentalCondition.tsv") - conditions = ConditionsTable.from_tsv(file) + conditions = ConditionTable.from_tsv(file) tmp_file = Path(tmp_dir) / "conditions.tsv" conditions.to_tsv(tmp_file) - conditions2 = ConditionsTable.from_tsv(tmp_file) + conditions2 = ConditionTable.from_tsv(tmp_file) assert conditions == conditions2 @@ -42,9 +39,9 @@ def test_experiment_add_periods(): exp = Experiment(id="exp1") assert exp.periods == [] - p1 = ExperimentPeriod(start=0, condition_id="p1") - p2 = ExperimentPeriod(start=1, condition_id="p2") - p3 = ExperimentPeriod(start=2, condition_id="p3") + p1 = ExperimentPeriod(time=0, condition_id="p1") + p2 = ExperimentPeriod(time=1, condition_id="p2") + p3 = ExperimentPeriod(time=2, condition_id="p3") exp += p1 exp += p2 @@ -55,20 +52,227 @@ def test_experiment_add_periods(): assert exp.periods == [p1, p2] -def test_conditions_table_add_changeset(): - conditions_table = ConditionsTable() - assert conditions_table.conditions == [] +def test_condition_table_add_changes(): + condition_table = ConditionTable() + assert condition_table.conditions == [] - c1 = ChangeSet( + c1 = Condition( id="condition1", - changes=[Change(operation_type=OperationType.NO_CHANGE)], + changes=[Change(target_id="k1", target_value=1)], ) - c2 = ChangeSet( + c2 = Condition( id="condition2", - changes=[Change(operation_type=OperationType.NO_CHANGE)], + changes=[Change(target_id="k2", target_value=sp.sympify("2 * x"))], + ) + + condition_table += c1 + condition_table += c2 + + assert condition_table.conditions == [c1, c2] + + +def test_measurments(): + Measurement( + observable_id="obs1", time=1, experiment_id="exp1", measurement=1 + ) + Measurement( + observable_id="obs1", time="1", experiment_id="exp1", measurement="1" + ) + Measurement( + observable_id="obs1", time="inf", experiment_id="exp1", measurement="1" + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[1], + noise_parameters=[2], + ) + + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + observable_parameters=[sp.sympify("x ** y")], + noise_parameters=[sp.sympify("x ** y")], ) - conditions_table += c1 - conditions_table += c2 + assert ( + Measurement( + observable_id="obs1", + time=1, + experiment_id="exp1", + measurement=1, + non_petab=1, + ).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got -inf"): + Measurement( + observable_id="obs1", + time="-inf", + experiment_id="exp1", + measurement=1, + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="1_obs", time=1, experiment_id="exp1", measurement=1 + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + Measurement( + observable_id="obs", time=1, experiment_id=" exp1", measurement=1 + ) + + +def test_observable(): + Observable(id="obs1", formula=x + y) + Observable(id="obs1", formula="x + y", noise_formula="x + y") + Observable(id="obs1", formula=1, noise_formula=2) + Observable( + id="obs1", + formula="x + y", + noise_formula="x + y", + observable_parameters=["p1"], + noise_parameters=["n1"], + ) + Observable( + id="obs1", + formula=sp.sympify("x + y"), + noise_formula=sp.sympify("x + y"), + observable_parameters=[sp.Symbol("p1")], + noise_parameters=[sp.Symbol("n1")], + ) + assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1 + + o = Observable(id="obs1", formula=x + y) + assert o.observable_placeholders == set() + assert o.noise_placeholders == set() + + o = Observable( + id="obs1", + formula="observableParameter1_obs1", + noise_formula="noiseParameter1_obs1", + ) + assert o.observable_placeholders == { + sp.Symbol("observableParameter1_obs1", real=True), + } + assert o.noise_placeholders == { + sp.Symbol("noiseParameter1_obs1", real=True) + } + + # TODO: this should raise an error + # (numbering is not consecutive / not starting from 1) + # TODO: clarify if observableParameter0_obs1 would be allowed + # as regular parameter + # + # with pytest.raises(ValidationError): + # Observable(id="obs1", formula="observableParameter2_obs1") + + +def test_change(): + Change(target_id="k1", target_value=1) + Change(target_id="k1", target_value="x * y") + + assert ( + Change(target_id="k1", target_value=x * y, non_petab="foo").non_petab + == "foo" + ) + with pytest.raises(ValidationError, match="Invalid ID"): + Change(target_id="1_k", target_value=x) + + with pytest.raises(ValidationError, match="input_value=None"): + Change(target_id="k1", target_value=None) + + +def test_period(): + ExperimentPeriod(time=0) + ExperimentPeriod(time=1, condition_id="p1") + ExperimentPeriod(time="-inf", condition_id="p1") + + assert ( + ExperimentPeriod(time="1", condition_id="p1", non_petab=1).non_petab + == 1 + ) + + with pytest.raises(ValidationError, match="got inf"): + ExperimentPeriod(time="inf", condition_id="p1") + + with pytest.raises(ValidationError, match="Invalid ID"): + ExperimentPeriod(time=1, condition_id="1_condition") + + with pytest.raises(ValidationError, match="type=missing"): + ExperimentPeriod(condition_id="condition") + + +def test_parameter(): + Parameter(id="k1", lb=1, ub=2) + Parameter(id="k1", estimate=False, nominal_value=1) + + assert Parameter(id="k1", lb=1, ub=2, non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Invalid ID"): + Parameter(id="1_k", lb=1, ub=2) + + with pytest.raises(ValidationError, match="upper"): + Parameter(id="k1", lb=1) + + with pytest.raises(ValidationError, match="lower"): + Parameter(id="k1", ub=1) + + with pytest.raises(ValidationError, match="less than"): + Parameter(id="k1", lb=2, ub=1) + + +def test_experiment(): + Experiment(id="experiment1") + Experiment( + id="experiment1", periods=[ExperimentPeriod(time=1, condition_id="c1")] + ) + + assert Experiment(id="experiment1", non_petab=1).non_petab == 1 + + with pytest.raises(ValidationError, match="Field required"): + Experiment() + + with pytest.raises(ValidationError, match="Invalid ID"): + Experiment(id="experiment 1") + + +def test_condition_table(): + assert ConditionTable().free_symbols == set() + + assert ( + ConditionTable( + conditions=[ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value="true")], + ) + ] + ).free_symbols + == set() + ) - assert conditions_table.conditions == [c1, c2] + assert ConditionTable( + conditions=[ + Condition( + id="condition1", + changes=[Change(target_id="k1", target_value=x / y)], + ) + ] + ).free_symbols == {x, y} diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index db0c402a..33cdb300 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -10,23 +10,14 @@ def test_check_experiments(): """Test ``CheckExperimentTable``.""" problem = Problem() - problem.add_experiment("e1", 0, "c1", 1, "c2") - problem.add_experiment("e2", "-inf", "c1", 1, "c2") - assert problem.experiment_df.shape == (4, 3) check = CheckExperimentTable() assert check.run(problem) is None - assert check.run(Problem()) is None - - tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.loc[0, TIME] = "invalid" - assert check.run(tmp_problem) is not None - - tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.loc[0, TIME] = "inf" - assert check.run(tmp_problem) is not None + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_experiment("e2", "-inf", "c1", 1, "c2") + assert check.run(problem) is None tmp_problem = deepcopy(problem) - tmp_problem.experiment_df.drop(columns=[TIME], inplace=True) + tmp_problem["e1"].periods[0].time = tmp_problem["e1"].periods[1].time assert check.run(tmp_problem) is not None diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 04e394ad..d4eab006 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -12,12 +12,11 @@ ESTIMATE, LOWER_BOUND, MODEL_ENTITY_ID, + NAME, NOISE_FORMULA, NOMINAL_VALUE, OBSERVABLE_FORMULA, OBSERVABLE_ID, - OPERATION_TYPE, - OT_CUR_VAL, PARAMETER_ID, PETAB_ENTITY_ID, TARGET_ID, @@ -73,7 +72,7 @@ def test_problem_from_yaml_multiple_files(): for i in (1, 2): problem = Problem() - problem.add_condition(f"condition{i}", parameter1=(OT_CUR_VAL, i)) + problem.add_condition(f"condition{i}", parameter1=i) petab.write_condition_df( problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") ) @@ -109,14 +108,13 @@ def test_problem_from_yaml_multiple_files(): def test_modify_problem(): """Test modifying a problem via the API.""" problem = Problem() - problem.add_condition("condition1", parameter1=(OT_CUR_VAL, 1)) - problem.add_condition("condition2", parameter2=(OT_CUR_VAL, 2)) + problem.add_condition("condition1", parameter1=1) + problem.add_condition("condition2", parameter2=2) exp_condition_df = pd.DataFrame( data={ CONDITION_ID: ["condition1", "condition2"], TARGET_ID: ["parameter1", "parameter2"], - OPERATION_TYPE: [OT_CUR_VAL, OT_CUR_VAL], TARGET_VALUE: [1.0, 2.0], } ) @@ -130,12 +128,14 @@ def test_modify_problem(): exp_observable_df = pd.DataFrame( data={ OBSERVABLE_ID: ["observable1", "observable2"], - OBSERVABLE_FORMULA: ["1", "2"], + OBSERVABLE_FORMULA: [1, 2], NOISE_FORMULA: [np.nan, 2.2], } ).set_index([OBSERVABLE_ID]) assert_frame_equal( - problem.observable_df, exp_observable_df, check_dtype=False + problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]], + exp_observable_df, + check_dtype=False, ) problem.add_parameter("parameter1", 1, 0, lb=1, ub=2) @@ -151,7 +151,11 @@ def test_modify_problem(): } ).set_index([PARAMETER_ID]) assert_frame_equal( - problem.parameter_df, exp_parameter_df, check_dtype=False + problem.parameter_df[ + [ESTIMATE, NOMINAL_VALUE, LOWER_BOUND, UPPER_BOUND] + ], + exp_parameter_df, + check_dtype=False, ) problem.add_mapping("new_petab_id", "some_model_entity_id") @@ -160,6 +164,7 @@ def test_modify_problem(): data={ PETAB_ENTITY_ID: ["new_petab_id"], MODEL_ENTITY_ID: ["some_model_entity_id"], + NAME: [None], } ).set_index([PETAB_ENTITY_ID]) assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) From 0ed48b7596f9db0d751f3542fc5774045629d195 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 16:16:22 +0100 Subject: [PATCH 035/141] doc: Coherent naming --- petab/v2/core.py | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 10088b62..b595a466 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -241,7 +241,7 @@ def noise_placeholders(self) -> set[sp.Symbol]: class ObservableTable(BaseModel): - """PEtab observables table.""" + """PEtab observable table.""" #: List of observables. observables: list[Observable] @@ -255,7 +255,7 @@ def __getitem__(self, observable_id: str) -> Observable: @classmethod def from_df(cls, df: pd.DataFrame) -> ObservableTable: - """Create an ObservablesTable from a DataFrame.""" + """Create an ObservableTable from a DataFrame.""" if df is None: return cls(observables=[]) @@ -268,7 +268,7 @@ def from_df(cls, df: pd.DataFrame) -> ObservableTable: return cls(observables=observables) def to_df(self) -> pd.DataFrame: - """Convert the ObservablesTable to a DataFrame.""" + """Convert the ObservableTable to a DataFrame.""" records = self.model_dump(by_alias=True)["observables"] for record in records: obs = record[C.OBSERVABLE_FORMULA] @@ -294,25 +294,25 @@ def to_df(self) -> pd.DataFrame: @classmethod def from_tsv(cls, file_path: str | Path) -> ObservableTable: - """Create an ObservablesTable from a TSV file.""" + """Create an ObservableTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: - """Write the ObservablesTable to a TSV file.""" + """Write the ObservableTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=True) def __add__(self, other: Observable) -> ObservableTable: """Add an observable to the table.""" if not isinstance(other, Observable): - raise TypeError("Can only add Observable to ObservablesTable") + raise TypeError("Can only add Observable to ObservableTable") return ObservableTable(observables=self.observables + [other]) def __iadd__(self, other: Observable) -> ObservableTable: """Add an observable to the table in place.""" if not isinstance(other, Observable): - raise TypeError("Can only add Observable to ObservablesTable") + raise TypeError("Can only add Observable to ObservableTable") self.observables.append(other) return self @@ -415,7 +415,7 @@ def __getitem__(self, condition_id: str) -> Condition: @classmethod def from_df(cls, df: pd.DataFrame) -> ConditionTable: - """Create a ConditionsTable from a DataFrame.""" + """Create a ConditionTable from a DataFrame.""" if df is None or df.empty: return cls(conditions=[]) @@ -427,7 +427,7 @@ def from_df(cls, df: pd.DataFrame) -> ConditionTable: return cls(conditions=conditions) def to_df(self) -> pd.DataFrame: - """Convert the ConditionsTable to a DataFrame.""" + """Convert the ConditionTable to a DataFrame.""" records = [ {C.CONDITION_ID: condition.id, **change.model_dump(by_alias=True)} for condition in self.conditions @@ -447,31 +447,31 @@ def to_df(self) -> pd.DataFrame: @classmethod def from_tsv(cls, file_path: str | Path) -> ConditionTable: - """Create a ConditionsTable from a TSV file.""" + """Create a ConditionTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: - """Write the ConditionsTable to a TSV file.""" + """Write the ConditionTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) def __add__(self, other: Condition) -> ConditionTable: """Add a condition to the table.""" if not isinstance(other, Condition): - raise TypeError("Can only add Conditions to ConditionsTable") + raise TypeError("Can only add Condition to ConditionTable") return ConditionTable(conditions=self.conditions + [other]) def __iadd__(self, other: Condition) -> ConditionTable: """Add a condition to the table in place.""" if not isinstance(other, Condition): - raise TypeError("Can only add Conditions to ConditionsTable") + raise TypeError("Can only add Condition to ConditionTable") self.conditions.append(other) return self @property def free_symbols(self) -> set[sp.Symbol]: - """Get all free symbols in the conditions table. + """Get all free symbols in the condition table. This includes all free symbols in the target values of the changes, independently of whether it is referenced by any experiment, or @@ -556,7 +556,7 @@ class ExperimentTable(BaseModel): @classmethod def from_df(cls, df: pd.DataFrame) -> ExperimentTable: - """Create an ExperimentsTable from a DataFrame.""" + """Create an ExperimentTable from a DataFrame.""" if df is None: return cls(experiments=[]) @@ -573,7 +573,7 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentTable: return cls(experiments=experiments) def to_df(self) -> pd.DataFrame: - """Convert the ExperimentsTable to a DataFrame.""" + """Convert the ExperimentTable to a DataFrame.""" records = [ { C.EXPERIMENT_ID: experiment.id, @@ -590,25 +590,25 @@ def to_df(self) -> pd.DataFrame: @classmethod def from_tsv(cls, file_path: str | Path) -> ExperimentTable: - """Create an ExperimentsTable from a TSV file.""" + """Create an ExperimentTable from a TSV file.""" df = pd.read_csv(file_path, sep="\t") return cls.from_df(df) def to_tsv(self, file_path: str | Path) -> None: - """Write the ExperimentsTable to a TSV file.""" + """Write the ExperimentTable to a TSV file.""" df = self.to_df() df.to_csv(file_path, sep="\t", index=False) def __add__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table.""" if not isinstance(other, Experiment): - raise TypeError("Can only add Experiment to ExperimentsTable") + raise TypeError("Can only add Experiment to ExperimentTable") return ExperimentTable(experiments=self.experiments + [other]) def __iadd__(self, other: Experiment) -> ExperimentTable: """Add an experiment to the table in place.""" if not isinstance(other, Experiment): - raise TypeError("Can only add Experiment to ExperimentsTable") + raise TypeError("Can only add Experiment to ExperimentTable") self.experiments.append(other) return self From 017746e17826d5e4236f9c71f49bae052ae270f5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 19:52:06 +0100 Subject: [PATCH 036/141] lint: Don't fail on missing noiseFormula (#367) * lint: Don't fail on missing noiseFormula * Check measurements after observables Fixes #219. --- petab/v1/lint.py | 30 +++++++++++++++--------------- petab/v1/measurements.py | 24 ++++++++++++++---------- 2 files changed, 29 insertions(+), 25 deletions(-) diff --git a/petab/v1/lint.py b/petab/v1/lint.py index e14289fb..6d06c492 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -906,21 +906,6 @@ def lint_problem(problem: "petab.Problem") -> bool: else: logger.warning("Model not available. Skipping.") - if problem.measurement_df is not None: - logger.info("Checking measurement table...") - try: - check_measurement_df(problem.measurement_df, problem.observable_df) - - if problem.condition_df is not None: - assert_measurement_conditions_present_in_condition_table( - problem.measurement_df, problem.condition_df - ) - except AssertionError as e: - logger.error(e) - errors_occurred = True - else: - logger.warning("Measurement table not available. Skipping.") - if problem.condition_df is not None: logger.info("Checking condition table...") try: @@ -953,6 +938,21 @@ def lint_problem(problem: "petab.Problem") -> bool: else: logger.warning("Observable table not available. Skipping.") + if problem.measurement_df is not None: + logger.info("Checking measurement table...") + try: + check_measurement_df(problem.measurement_df, problem.observable_df) + + if problem.condition_df is not None: + assert_measurement_conditions_present_in_condition_table( + problem.measurement_df, problem.condition_df + ) + except AssertionError as e: + logger.error(e) + errors_occurred = True + else: + logger.warning("Measurement table not available. Skipping.") + if problem.parameter_df is not None: logger.info("Checking parameter table...") try: diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py index f78511df..ec7a1069 100644 --- a/petab/v1/measurements.py +++ b/petab/v1/measurements.py @@ -277,16 +277,20 @@ def assert_overrides_match_parameter_count( strict=True, ) } - noise_parameters_count = { - obs_id: len( - observables.get_formula_placeholders(formula, obs_id, "noise") - ) - for obs_id, formula in zip( - observable_df.index.values, - observable_df[NOISE_FORMULA], - strict=True, - ) - } + noise_parameters_count = ( + { + obs_id: len( + observables.get_formula_placeholders(formula, obs_id, "noise") + ) + for obs_id, formula in zip( + observable_df.index.values, + observable_df[NOISE_FORMULA], + strict=True, + ) + } + if NOISE_FORMULA in observable_df.columns + else {obs_id: 0 for obs_id in observable_df.index.values} + ) for _, row in measurement_df.iterrows(): # check observable parameters From 81af370dd973f734ad5323da799bf651efb16b21 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 19:52:59 +0100 Subject: [PATCH 037/141] Add `evaluate: bool` argument to math parser (#365) So far, when math expressions were parsed, they were evaluated as far as possible. This was not always desirable. Now, this optional as far as conveniently possible. Closes #363. --- petab/v1/math/SympyVisitor.py | 50 ++++++++++++++++++++++++++--------- petab/v1/math/sympify.py | 34 ++++++++++++++++++++++-- tests/v1/math/test_math.py | 5 ++++ 3 files changed, 74 insertions(+), 15 deletions(-) diff --git a/petab/v1/math/SympyVisitor.py b/petab/v1/math/SympyVisitor.py index b8154301..949366b3 100644 --- a/petab/v1/math/SympyVisitor.py +++ b/petab/v1/math/SympyVisitor.py @@ -39,8 +39,12 @@ } _unary_funcs = { "exp": sp.exp, - "log10": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 10), - "log2": lambda x: -sp.oo if x.is_zero is True else sp.log(x, 2), + "log10": lambda x, evaluate=True: -sp.oo + if x.is_zero is True + else sp.log(x, 10, evaluate=evaluate), + "log2": lambda x, evaluate=True: -sp.oo + if x.is_zero is True + else sp.log(x, 2, evaluate=evaluate), "ln": sp.log, "sqrt": sp.sqrt, "abs": sp.Abs, @@ -75,8 +79,14 @@ class MathVisitorSympy(PetabMathExprParserVisitor): For a general introduction to ANTLR4 visitors, see: https://github.com/antlr/antlr4/blob/7d4cea92bc3f7d709f09c3f1ac77c5bbc71a6749/doc/python-target.md + + :param evaluate: Whether to evaluate the expression. """ + def __init__(self, evaluate=True): + super().__init__() + self.evaluate = evaluate + def visitPetabExpression( self, ctx: PetabMathExprParser.PetabExpressionContext ) -> sp.Expr | sp.Basic: @@ -101,9 +111,17 @@ def visitMultExpr( operand1 = bool2num(self.visit(ctx.getChild(0))) operand2 = bool2num(self.visit(ctx.getChild(2))) if ctx.ASTERISK(): - return operand1 * operand2 + return sp.Mul(operand1, operand2, evaluate=self.evaluate) if ctx.SLASH(): - return operand1 / operand2 + return ( + operand1 / operand2 + if self.evaluate + else sp.Mul( + operand1, + sp.Pow(operand2, -1, evaluate=False), + evaluate=False, + ) + ) raise AssertionError(f"Unexpected expression: {ctx.getText()}") @@ -112,9 +130,9 @@ def visitAddExpr(self, ctx: PetabMathExprParser.AddExprContext) -> sp.Expr: op1 = bool2num(self.visit(ctx.getChild(0))) op2 = bool2num(self.visit(ctx.getChild(2))) if ctx.PLUS(): - return op1 + op2 + return sp.Add(op1, op2, evaluate=self.evaluate) if ctx.MINUS(): - return op1 - op2 + return sp.Add(op1, -op2, evaluate=self.evaluate) raise AssertionError( f"Unexpected operator: {ctx.getChild(1).getText()} " @@ -146,28 +164,32 @@ def visitFunctionCall( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return _trig_funcs[func_name](*args) + return _trig_funcs[func_name](*args, evaluate=self.evaluate) if func_name in _unary_funcs: if len(args) != 1: raise AssertionError( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return _unary_funcs[func_name](*args) + return _unary_funcs[func_name](*args, evaluate=self.evaluate) if func_name in _binary_funcs: if len(args) != 2: raise AssertionError( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return _binary_funcs[func_name](*args) + return _binary_funcs[func_name](*args, evaluate=self.evaluate) if func_name == "log": if len(args) not in [1, 2]: raise AssertionError( f"Unexpected number of arguments: {len(args)} " f"in {ctx.getText()}" ) - return -sp.oo if args[0].is_zero is True else sp.log(*args) + return ( + -sp.oo + if args[0].is_zero is True + else sp.log(*args, evaluate=self.evaluate) + ) if func_name == "piecewise": if (len(args) - 1) % 2 != 0: @@ -184,7 +206,7 @@ def visitFunctionCall( args[::2], args[1::2], strict=True ) ) - return sp.Piecewise(*sp_args) + return sp.Piecewise(*sp_args, evaluate=self.evaluate) raise ValueError(f"Unknown function: {ctx.getText()}") @@ -203,7 +225,7 @@ def visitPowerExpr( ) operand1 = bool2num(self.visit(ctx.getChild(0))) operand2 = bool2num(self.visit(ctx.getChild(2))) - return sp.Pow(operand1, operand2) + return sp.Pow(operand1, operand2, evaluate=self.evaluate) def visitUnaryExpr( self, ctx: PetabMathExprParser.UnaryExprContext @@ -240,7 +262,7 @@ def visitComparisonExpr( if op in ops: lhs = bool2num(lhs) rhs = bool2num(rhs) - return ops[op](lhs, rhs) + return ops[op](lhs, rhs, evaluate=self.evaluate) raise AssertionError(f"Unexpected operator: {op}") @@ -301,4 +323,6 @@ def num2bool(x: sp.Basic | sp.Expr) -> sp.Basic | sp.Expr: return sp.false if x.is_zero is False: return sp.true + if isinstance(x, Boolean): + return x return sp.Piecewise((True, x != 0.0), (False, True)) diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index 8ef1a129..87e37e3b 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -12,7 +12,9 @@ __all__ = ["sympify_petab"] -def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: +def sympify_petab( + expr: str | int | float, evaluate: bool = True +) -> sp.Expr | sp.Basic: """Convert PEtab math expression to sympy expression. .. note:: @@ -22,6 +24,7 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: Args: expr: PEtab math expression. + evaluate: Whether to evaluate the expression. Raises: ValueError: Upon lexer/parser errors or if the expression is @@ -30,6 +33,33 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: Returns: The sympy expression corresponding to `expr`. Boolean values are converted to numeric values. + + + :example: + >>> from petab.math import sympify_petab + >>> sympify_petab("sin(0)") + 0 + >>> sympify_petab("sin(0)", evaluate=False) + sin(0.0) + >>> sympify_petab("sin(0)", evaluate=True) + 0 + >>> sympify_petab("1 + 2", evaluate=True) + 3.00000000000000 + >>> sympify_petab("1 + 2", evaluate=False) + 1.0 + 2.0 + >>> sympify_petab("piecewise(1, 1 > 2, 0)", evaluate=True) + 0.0 + >>> sympify_petab("piecewise(1, 1 > 2, 0)", evaluate=False) + Piecewise((1.0, 1.0 > 2.0), (0.0, True)) + >>> # currently, boolean values are converted to numeric values + >>> # independent of the `evaluate` flag + >>> sympify_petab("true", evaluate=True) + 1.00000000000000 + >>> sympify_petab("true", evaluate=False) + 1.00000000000000 + >>> # ... and integer values are converted to floats + >>> sympify_petab("2", evaluate=True) + 2.00000000000000 """ if isinstance(expr, sp.Expr): # TODO: check if only PEtab-compatible symbols and functions are used @@ -62,7 +92,7 @@ def sympify_petab(expr: str | int | float) -> sp.Expr | sp.Basic: raise ValueError(f"Error parsing {expr!r}: {e.args[0]}") from None # Convert to sympy expression - visitor = MathVisitorSympy() + visitor = MathVisitorSympy(evaluate=evaluate) expr = visitor.visit(tree) expr = bool2num(expr) # check for `False`, we'll accept both `True` and `None` diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index dae78154..ed58595e 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -24,6 +24,11 @@ def test_parse_simple(): assert float(sympify_petab("1 + 2 * (3 + 4) / 2")) == 8 +def test_evaluate(): + act = sympify_petab("piecewise(1, 1 > 2, 0)", evaluate=False) + assert str(act) == "Piecewise((1.0, 1.0 > 2.0), (0.0, True))" + + def read_cases(): """Read test cases from YAML file in the petab_test_suite package.""" yaml_file = importlib.resources.files("petabtests.cases").joinpath( From 7ab7cd8708d2b3c1b77018c9abdf112725717af1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 27 Mar 2025 20:35:15 +0100 Subject: [PATCH 038/141] Add petab-compatible sympy string-printer (#364) Add a sympy Printer to stringify sympy expressions in a petab-compatible way. For example, we need to avoid `str(sympy.sympify("x^2"))` -> `'x**2'`. Closes #362. --- petab/v1/math/__init__.py | 1 + petab/v1/math/printer.py | 91 ++++++++++++++++++++++++++++++++++++++ petab/v1/math/sympify.py | 35 +++++++++------ petab/v2/core.py | 21 ++------- tests/v1/math/test_math.py | 40 ++++++++++++----- tests/v2/test_problem.py | 4 +- 6 files changed, 149 insertions(+), 43 deletions(-) create mode 100644 petab/v1/math/printer.py diff --git a/petab/v1/math/__init__.py b/petab/v1/math/__init__.py index b9a4f59b..b85ca07b 100644 --- a/petab/v1/math/__init__.py +++ b/petab/v1/math/__init__.py @@ -1,3 +1,4 @@ """Functions for parsing and evaluating mathematical expressions.""" +from .printer import PetabStrPrinter, petab_math_str # noqa: F401 from .sympify import sympify_petab # noqa: F401 diff --git a/petab/v1/math/printer.py b/petab/v1/math/printer.py new file mode 100644 index 00000000..1ba123d9 --- /dev/null +++ b/petab/v1/math/printer.py @@ -0,0 +1,91 @@ +"""A PEtab-compatible sympy string-printer.""" + +from itertools import chain, islice + +import sympy as sp +from sympy.printing.str import StrPrinter + + +class PetabStrPrinter(StrPrinter): + """A PEtab-compatible sympy string-printer.""" + + #: Mapping of sympy functions to PEtab functions + _func_map = { + "asin": "arcsin", + "acos": "arccos", + "atan": "arctan", + "acot": "arccot", + "asec": "arcsec", + "acsc": "arccsc", + "asinh": "arcsinh", + "acosh": "arccosh", + "atanh": "arctanh", + "acoth": "arccoth", + "asech": "arcsech", + "acsch": "arccsch", + "Abs": "abs", + } + + def _print_BooleanTrue(self, expr): + return "true" + + def _print_BooleanFalse(self, expr): + return "false" + + def _print_Pow(self, expr: sp.Pow): + """Custom printing for the power operator""" + base, exp = expr.as_base_exp() + return f"{self._print(base)} ^ {self._print(exp)}" + + def _print_Infinity(self, expr): + """Custom printing for infinity""" + return "inf" + + def _print_NegativeInfinity(self, expr): + """Custom printing for negative infinity""" + return "-inf" + + def _print_Function(self, expr): + """Custom printing for specific functions""" + + if expr.func.__name__ == "Piecewise": + return self._print_Piecewise(expr) + + if func := self._func_map.get(expr.func.__name__): + return f"{func}({', '.join(map(self._print, expr.args))})" + + return super()._print_Function(expr) + + def _print_Piecewise(self, expr): + """Custom printing for Piecewise function""" + # merge the tuples and drop the final `True` condition + str_args = map( + self._print, + islice(chain.from_iterable(expr.args), 2 * len(expr.args) - 1), + ) + return f"piecewise({', '.join(str_args)})" + + def _print_Min(self, expr): + """Custom printing for Min function""" + return f"min({', '.join(map(self._print, expr.args))})" + + def _print_Max(self, expr): + """Custom printing for Max function""" + return f"max({', '.join(map(self._print, expr.args))})" + + +def petab_math_str(expr: sp.Basic | sp.Expr | None) -> str: + """Convert a sympy expression to a PEtab-compatible math expression string. + + :example: + >>> expr = sp.sympify("x**2 + sin(y)") + >>> petab_math_str(expr) + 'x ^ 2 + sin(y)' + >>> expr = sp.sympify("Piecewise((1, x > 0), (0, True))") + >>> petab_math_str(expr) + 'piecewise(1, x > 0, 0)' + """ + if expr is None: + return "" + + return PetabStrPrinter().doprint(expr) diff --git a/petab/v1/math/sympify.py b/petab/v1/math/sympify.py index 87e37e3b..1d44e16d 100644 --- a/petab/v1/math/sympify.py +++ b/petab/v1/math/sympify.py @@ -5,6 +5,7 @@ from antlr4 import CommonTokenStream, InputStream from antlr4.error.ErrorListener import ErrorListener +from . import petab_math_str from ._generated.PetabMathExprLexer import PetabMathExprLexer from ._generated.PetabMathExprParser import PetabMathExprParser from .SympyVisitor import MathVisitorSympy, bool2num @@ -13,14 +14,10 @@ def sympify_petab( - expr: str | int | float, evaluate: bool = True + expr: str | int | float | sp.Basic, evaluate: bool = True ) -> sp.Expr | sp.Basic: """Convert PEtab math expression to sympy expression. - .. note:: - - All symbols in the returned expression will have the `real=True` - assumption. Args: expr: PEtab math expression. @@ -28,15 +25,19 @@ def sympify_petab( Raises: ValueError: Upon lexer/parser errors or if the expression is - otherwise invalid. + otherwise invalid. Returns: The sympy expression corresponding to `expr`. Boolean values are converted to numeric values. + .. note:: + + All symbols in the returned expression will have the ``real=True`` + assumption. :example: - >>> from petab.math import sympify_petab + >>> from petab.v1.math import sympify_petab >>> sympify_petab("sin(0)") 0 >>> sympify_petab("sin(0)", evaluate=False) @@ -61,9 +62,8 @@ def sympify_petab( >>> sympify_petab("2", evaluate=True) 2.00000000000000 """ - if isinstance(expr, sp.Expr): - # TODO: check if only PEtab-compatible symbols and functions are used - return expr + if isinstance(expr, sp.Basic): + return sympify_petab(petab_math_str(expr)) if isinstance(expr, int) or isinstance(expr, np.integer): return sp.Integer(expr) @@ -95,10 +95,17 @@ def sympify_petab( visitor = MathVisitorSympy(evaluate=evaluate) expr = visitor.visit(tree) expr = bool2num(expr) - # check for `False`, we'll accept both `True` and `None` - if expr.is_extended_real is False: - raise ValueError(f"Expression {expr} is not real-valued.") - + try: + # check for `False`, we'll accept both `True` and `None` + if expr.is_extended_real is False: + raise ValueError(f"Expression {expr} is not real-valued.") + except AttributeError as e: + # work-around for `sp.sec(0, evaluate=False).is_extended_real` error + if str(e) not in ( + "'One' object has no attribute '_eval_is_extended_real'", + "'Float' object has no attribute '_eval_is_extended_real'", + ): + raise return expr diff --git a/petab/v2/core.py b/petab/v2/core.py index b595a466..b42083f4 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -25,7 +25,7 @@ from typing_extensions import Self from ..v1.lint import is_valid_identifier -from ..v1.math import sympify_petab +from ..v1.math import petab_math_str, sympify_petab from . import C, get_observable_df __all__ = [ @@ -273,23 +273,8 @@ def to_df(self) -> pd.DataFrame: for record in records: obs = record[C.OBSERVABLE_FORMULA] noise = record[C.NOISE_FORMULA] - record[C.OBSERVABLE_FORMULA] = ( - None - if obs is None - # TODO: we need a custom printer for sympy expressions - # to avoid '**' - # https://github.com/PEtab-dev/libpetab-python/issues/362 - else str(obs) - if not obs.is_number - else float(obs) - ) - record[C.NOISE_FORMULA] = ( - None - if noise is None - else str(noise) - if not noise.is_number - else float(noise) - ) + record[C.OBSERVABLE_FORMULA] = petab_math_str(obs) + record[C.NOISE_FORMULA] = petab_math_str(noise) return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index ed58595e..940c5340 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -6,9 +6,9 @@ import sympy as sp import yaml from sympy.abc import _clash -from sympy.logic.boolalg import Boolean +from sympy.logic.boolalg import Boolean, BooleanFalse, BooleanTrue -from petab.math import sympify_petab +from petab.v1.math import petab_math_str, sympify_petab def test_sympify_numpy(): @@ -29,6 +29,20 @@ def test_evaluate(): assert str(act) == "Piecewise((1.0, 1.0 > 2.0), (0.0, True))" +def test_assumptions(): + # in PEtab, all symbols are expected to be real-valued + assert sympify_petab("x").is_real + + # non-real symbols are changed to real + assert sympify_petab(sp.Symbol("x", real=False)).is_real + + +def test_printer(): + assert petab_math_str(None) == "" + assert petab_math_str(BooleanTrue()) == "true" + assert petab_math_str(BooleanFalse()) == "false" + + def read_cases(): """Read test cases from YAML file in the petab_test_suite package.""" yaml_file = importlib.resources.files("petabtests.cases").joinpath( @@ -60,29 +74,35 @@ def read_cases(): @pytest.mark.parametrize("expr_str, expected", read_cases()) def test_parse_cases(expr_str, expected): """Test PEtab math expressions for the PEtab test suite.""" - result = sympify_petab(expr_str) - if isinstance(result, Boolean): - assert result == expected + sym_expr = sympify_petab(expr_str) + if isinstance(sym_expr, Boolean): + assert sym_expr == expected else: try: - result = float(result.evalf()) + result = float(sym_expr.evalf()) assert np.isclose(result, expected), ( f"{expr_str}: Expected {expected}, got {result}" ) except TypeError: - assert result == expected, ( + assert sym_expr == expected, ( f"{expr_str}: Expected {expected}, got {result}" ) + # test parsing, printing, and parsing again + resympified = sympify_petab(petab_math_str(sym_expr)) + if sym_expr.is_number: + assert np.isclose(float(resympified), float(sym_expr)) + else: + assert resympified.equals(sym_expr), (sym_expr, resympified) + def test_ids(): """Test symbols in expressions.""" assert sympify_petab("bla * 2") == 2.0 * sp.Symbol("bla", real=True) # test that sympy expressions that are invalid in PEtab raise an error - # TODO: handle these cases after - # https://github.com/PEtab-dev/libpetab-python/pull/364 - # sympify_petab(sp.Symbol("föö")) + with pytest.raises(ValueError): + sympify_petab(sp.Symbol("föö")) def test_syntax_error(): diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index d4eab006..67c47a46 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -133,7 +133,9 @@ def test_modify_problem(): } ).set_index([OBSERVABLE_ID]) assert_frame_equal( - problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]], + problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]].map( + lambda x: float(x) if x != "" else None + ), exp_observable_df, check_dtype=False, ) From 8614c02ee882490ba514bc5166ff5c9c3fd9f9ff Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 28 Mar 2025 09:00:44 +0100 Subject: [PATCH 039/141] v2: Parameter.estimate -> bool (#368) * Update serialization * Update upconversion * Update tests --- petab/v2/core.py | 9 +++++---- petab/v2/petab1to2.py | 14 +++++++++++--- petab/v2/problem.py | 2 +- tests/v2/test_problem.py | 4 ++-- 4 files changed, 19 insertions(+), 10 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index b42083f4..72eaec31 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -19,6 +19,7 @@ ConfigDict, Field, ValidationInfo, + field_serializer, field_validator, model_validator, ) @@ -875,10 +876,6 @@ def _validate_estimate_before(cls, v): if isinstance(v, bool): return v - # FIXME: grace period for 0/1 values until the test suite was updated - if v in [0, 1, "0", "1"]: - return bool(int(v)) - # TODO: clarify whether extra whitespace is allowed if isinstance(v, str): v = v.strip().lower() @@ -891,6 +888,10 @@ def _validate_estimate_before(cls, v): f"Invalid value for estimate: {v}. Must be `true` or `false`." ) + @field_serializer("estimate") + def _serialize_estimate(self, estimate: bool, _info): + return str(estimate).lower() + @field_validator("lb", "ub", "nominal_value") @classmethod def _convert_nan_to_none(cls, v): diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index d7b6fb68..29107238 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -91,12 +91,20 @@ def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): new_yaml_config = v2.ProblemConfig(**new_yaml_config) # Update tables - # condition tables, observable tables, SBML files, parameter table: - # no changes - just copy + + # parameter table: + # * parameter.estimate: int -> bool + parameter_df = petab_problem.parameter_df.copy() + parameter_df[v1.C.ESTIMATE] = parameter_df[v1.C.ESTIMATE].apply( + lambda x: str(bool(int(x))).lower() + ) file = yaml_config[v2.C.PARAMETER_FILE] - _copy_file(get_src_path(file), Path(get_dest_path(file))) + v2.write_parameter_df(parameter_df, get_dest_path(file)) + # sub-problems for problem_config in new_yaml_config.problems: + # copy files that don't need conversion + # (models, observables, visualizations) for file in chain( problem_config.observable_files, (model.location for model in problem_config.model_files.values()), diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 2b564e0c..b0b76aa9 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -939,7 +939,7 @@ def add_observable( def add_parameter( self, id_: str, - estimate: bool | str | int = True, + estimate: bool | str = True, nominal_value: Number | None = None, scale: str = None, lb: Number = None, diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 67c47a46..55141ba3 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -140,13 +140,13 @@ def test_modify_problem(): check_dtype=False, ) - problem.add_parameter("parameter1", 1, 0, lb=1, ub=2) + problem.add_parameter("parameter1", True, 0, lb=1, ub=2) problem.add_parameter("parameter2", False, 2) exp_parameter_df = pd.DataFrame( data={ PARAMETER_ID: ["parameter1", "parameter2"], - ESTIMATE: [1, 0], + ESTIMATE: ["true", "false"], NOMINAL_VALUE: [0.0, 2.0], LOWER_BOUND: [1.0, np.nan], UPPER_BOUND: [2.0, np.nan], From d5589c8598805c04990999e4b9bd6f2d682472e1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 3 Apr 2025 13:56:16 +0200 Subject: [PATCH 040/141] Set `__all__` in v1.math.printer --- petab/v1/math/printer.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/petab/v1/math/printer.py b/petab/v1/math/printer.py index 1ba123d9..347c1328 100644 --- a/petab/v1/math/printer.py +++ b/petab/v1/math/printer.py @@ -5,6 +5,8 @@ import sympy as sp from sympy.printing.str import StrPrinter +__all__ = ["PetabStrPrinter", "petab_math_str"] + class PetabStrPrinter(StrPrinter): """A PEtab-compatible sympy string-printer.""" From 4238588e10d4fe46bf638cc64146e99f28546a66 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 23 Apr 2025 10:34:59 +0200 Subject: [PATCH 041/141] Add SbmlModel.{to_antimony,to_sbml_str} (#371) Simplify conversion of SBML models to antimony and XML strings. Convenient in particular for debugging. --- petab/v1/models/sbml_model.py | 24 +++++++++++++++++++++--- tests/v1/test_sbml.py | 5 ++++- 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index 8e8cf498..c6957ca6 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -65,9 +65,7 @@ def __getstate__(self): # libsbml stuff cannot be serialized directly if self.sbml_model: - sbml_document = self.sbml_model.getSBMLDocument() - sbml_writer = libsbml.SBMLWriter() - state["sbml_string"] = sbml_writer.writeSBMLToString(sbml_document) + state["sbml_string"] = self.to_sbml_str() exclude = ["sbml_reader", "sbml_document", "sbml_model"] for key in exclude: @@ -133,6 +131,26 @@ def from_antimony(ant_model: str | Path) -> SbmlModel: sbml_str = antimony2sbml(ant_model) return SbmlModel.from_string(sbml_str) + def to_antimony(self) -> str: + """Convert the SBML model to an Antimony string.""" + import antimony as ant + + sbml_str = self.to_sbml_str() + + ant.clearPreviousLoads() + ant.freeAll() + + if ant.loadSBMLString(sbml_str) < 0: + raise RuntimeError(ant.getLastError()) + + return ant.getAntimonyString() + + def to_sbml_str(self) -> str: + """Convert the SBML model to an SBML/XML string.""" + sbml_document = self.sbml_model.getSBMLDocument() + sbml_writer = libsbml.SBMLWriter() + return sbml_writer.writeSBMLToString(sbml_document) + @property def model_id(self): return self._model_id diff --git a/tests/v1/test_sbml.py b/tests/v1/test_sbml.py index b29f1ea7..c38f5ab5 100644 --- a/tests/v1/test_sbml.py +++ b/tests/v1/test_sbml.py @@ -134,7 +134,7 @@ def test_sbml_model_repr(): assert repr(petab_model) == "" -def test_sbml_from_ant(): +def test_sbml_from_to_ant(): ant_model = """ model test R1: S1 -> S2; k1*S1 @@ -147,3 +147,6 @@ def test_sbml_from_ant(): assert set(petab_model.get_valid_parameters_for_parameter_table()) == { "k1" } + + # convert back to antimony + assert "R1: S1 -> S2; k1*S1" in petab_model.to_antimony() From 8b4bf3f724bf51f9ade3f6d2f291d8fdac18e292 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 23 Apr 2025 21:30:01 +0200 Subject: [PATCH 042/141] Fix pyproject.toml license information Fixes: ``` ******************************************************************************** Please use a simple string containing a SPDX expression for `project.license`. You can also use `project.license-files`. (Both options available on setuptools>=77.0.0). By 2026-Feb-18, you need to update your project and remove deprecated calls or your builds will no longer be supported. See https://packaging.python.org/en/latest/guides/writing-pyproject-toml/#license for details. ******************************************************************************** ``` --- pyproject.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 5168dbd0..703bae6e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [build-system] requires = [ - "setuptools>=62", + "setuptools>=77", "wheel", ] build-backend = "setuptools.build_meta" @@ -24,7 +24,7 @@ dependencies = [ "antlr4-python3-runtime==4.13.1", "pydantic>=2.10", ] -license = {text = "MIT License"} +license = "MIT" authors = [ {name = "The PEtab developers"}, ] From 258abc97e5a571798d5f805003bfdb030851175c Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 24 Apr 2025 09:50:03 +0200 Subject: [PATCH 043/141] Add v2.Problem.model_dump (#372) Convert a Problem to a dict. --- petab/v2/problem.py | 48 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 47 insertions(+), 1 deletion(-) diff --git a/petab/v2/problem.py b/petab/v2/problem.py index b0b76aa9..294256a8 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -10,7 +10,7 @@ from math import nan from numbers import Number from pathlib import Path -from typing import TYPE_CHECKING +from typing import TYPE_CHECKING, Any import pandas as pd import sympy as sp @@ -1096,6 +1096,52 @@ def __iadd__(self, other): ) return self + def model_dump(self, **kwargs) -> dict[str, Any]: + """Convert this Problem to a dictionary. + + This function is intended for debugging purposes and should not be + used for serialization. The output of this function may change + without notice. + + The output includes all PEtab tables, but not the model itself. + + See `pydantic.BaseModel.model_dump `__ + for details. + + :example: + + >>> from pprint import pprint + >>> p = Problem() + >>> p += core.Parameter(id="par", lb=0, ub=1) + >>> pprint(p.model_dump()) + {'conditions': [], + 'config': {'extensions': [], + 'format_version': '2.0.0', + 'parameter_file': None, + 'problems': []}, + 'experiments': [], + 'mappings': [], + 'measurements': [], + 'observables': [], + 'parameters': [{'estimate': 'true', + 'id': 'par', + 'lb': 0.0, + 'nominal_value': None, + 'scale': , + 'ub': 1.0}]} + """ + res = { + "config": (self.config or ProblemConfig()).model_dump(**kwargs), + } + res |= self.mapping_table.model_dump(**kwargs) + res |= self.condition_table.model_dump(**kwargs) + res |= self.experiment_table.model_dump(**kwargs) + res |= self.observable_table.model_dump(**kwargs) + res |= self.measurement_table.model_dump(**kwargs) + res |= self.parameter_table.model_dump(**kwargs) + + return res + class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" From b43f5b81fcfe522f1b126d35fae31c6f9cdd2929 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 24 Apr 2025 16:37:02 +0200 Subject: [PATCH 044/141] Implement proper truncation for prior distributions (#335) Previously, when sampled startpoints were outside the bounds, their value was set to the upper/lower bounds. This may put too much probability mass on the bounds. With these changes, we properly sample from the respective truncated distributions. Closes #330. This also evaluates all priors on the model parameter scale (instead of `parameterScale` scale, see https://github.com/PEtab-dev/PEtab/issues/402. --------- Co-authored-by: Maren Philipps <55318391+m-philipps@users.noreply.github.com> Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- doc/conf.py | 1 + doc/example/distributions.ipynb | 156 +++++++++++------ petab/v1/C.py | 7 + petab/v1/distributions.py | 302 ++++++++++++++++++++++++++------ petab/v1/parameter_mapping.py | 2 +- petab/v1/priors.py | 166 +++++++++++++----- petab/v1/sampling.py | 10 +- tests/v1/test_distributions.py | 81 ++++++--- tests/v1/test_priors.py | 109 +++++++++--- 9 files changed, 633 insertions(+), 201 deletions(-) diff --git a/doc/conf.py b/doc/conf.py index d9498efb..3b378808 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -87,6 +87,7 @@ nb_execution_mode = "force" nb_execution_raise_on_error = True nb_execution_show_tb = True +nb_execution_timeout = 90 # max. seconds/cell source_suffix = { ".rst": "restructuredtext", diff --git a/doc/example/distributions.ipynb b/doc/example/distributions.ipynb index 7776ef8d..2b3ab24f 100644 --- a/doc/example/distributions.ipynb +++ b/doc/example/distributions.ipynb @@ -23,59 +23,99 @@ }, { "cell_type": "code", - "execution_count": null, "id": "initial_id", "metadata": { "collapsed": true }, - "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import seaborn as sns\n", "\n", "from petab.v1.C import *\n", + "from petab.v1.parameters import unscale\n", "from petab.v1.priors import Prior\n", "\n", "sns.set_style(None)\n", "\n", "\n", - "def plot(prior: Prior, ax=None):\n", + "def plot(prior: Prior):\n", " \"\"\"Visualize a distribution.\"\"\"\n", + " fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(12, 4))\n", + " sample = prior.sample(20_000, x_scaled=True)\n", + "\n", + " fig.suptitle(str(prior))\n", + "\n", + " plot_single(prior, ax=ax1, sample=sample, scaled=False)\n", + " plot_single(prior, ax=ax2, sample=sample, scaled=True)\n", + " plt.tight_layout()\n", + " plt.show()\n", + "\n", + "\n", + "def plot_single(\n", + " prior: Prior, scaled: bool = False, ax=None, sample: np.array = None\n", + "):\n", + " fig = None\n", " if ax is None:\n", " fig, ax = plt.subplots()\n", "\n", - " sample = prior.sample(10000)\n", + " if sample is None:\n", + " sample = prior.sample(20_000)\n", "\n", - " # pdf\n", + " # assuming scaled sample\n", + " if not scaled:\n", + " sample = unscale(sample, prior.transformation)\n", + " bounds = prior.bounds\n", + " else:\n", + " bounds = (\n", + " (prior.lb_scaled, prior.ub_scaled)\n", + " if prior.bounds is not None\n", + " else None\n", + " )\n", + "\n", + " # plot pdf\n", " xmin = min(\n", - " sample.min(),\n", - " prior.lb_scaled if prior.bounds is not None else sample.min(),\n", + " sample.min(), bounds[0] if prior.bounds is not None else sample.min()\n", " )\n", " xmax = max(\n", - " sample.max(),\n", - " prior.ub_scaled if prior.bounds is not None else sample.max(),\n", + " sample.max(), bounds[1] if prior.bounds is not None else sample.max()\n", " )\n", + " padding = 0.1 * (xmax - xmin)\n", + " xmin -= padding\n", + " xmax += padding\n", " x = np.linspace(xmin, xmax, 500)\n", - " y = prior.pdf(x)\n", + " y = prior.pdf(x, x_scaled=scaled, rescale=scaled)\n", " ax.plot(x, y, color=\"red\", label=\"pdf\")\n", "\n", " sns.histplot(sample, stat=\"density\", ax=ax, label=\"sample\")\n", "\n", - " # bounds\n", + " # plot bounds\n", " if prior.bounds is not None:\n", - " for bound in (prior.lb_scaled, prior.ub_scaled):\n", + " for bound in bounds:\n", " if bound is not None and np.isfinite(bound):\n", " ax.axvline(bound, color=\"black\", linestyle=\"--\", label=\"bound\")\n", "\n", - " ax.set_title(str(prior))\n", - " ax.set_xlabel(\"Parameter value on the parameter scale\")\n", + " if fig is not None:\n", + " ax.set_title(str(prior))\n", + "\n", + " if scaled:\n", + " ax.set_xlabel(\n", + " f\"Parameter value on parameter scale ({prior.transformation})\"\n", + " )\n", + " ax.set_ylabel(\"Rescaled density\")\n", + " else:\n", + " ax.set_xlabel(\"Parameter value\")\n", + "\n", " ax.grid(False)\n", " handles, labels = ax.get_legend_handles_labels()\n", " unique_labels = dict(zip(labels, handles, strict=False))\n", " ax.legend(unique_labels.values(), unique_labels.keys())\n", - " plt.show()" - ] + "\n", + " if ax is None:\n", + " plt.show()" + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -85,30 +125,28 @@ }, { "cell_type": "code", - "execution_count": null, "id": "4f09e50a3db06d9f", "metadata": {}, - "outputs": [], "source": [ - "plot(Prior(UNIFORM, (0, 1)))\n", - "plot(Prior(NORMAL, (0, 1)))\n", - "plot(Prior(LAPLACE, (0, 1)))\n", - "plot(Prior(LOG_NORMAL, (0, 1)))\n", - "plot(Prior(LOG_LAPLACE, (1, 0.5)))" - ] + "plot_single(Prior(UNIFORM, (0, 1)))\n", + "plot_single(Prior(NORMAL, (0, 1)))\n", + "plot_single(Prior(LAPLACE, (0, 1)))\n", + "plot_single(Prior(LOG_NORMAL, (0, 1)))\n", + "plot_single(Prior(LOG_LAPLACE, (1, 0.5)))" + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "id": "dab4b2d1e0f312d8", "metadata": {}, - "source": "If a parameter scale is specified (`parameterScale=lin|log|log10` not a `parameterScale*`-type distribution), the sample is transformed accordingly (but not the distribution parameters):\n" + "source": "If a parameter scale is specified (`parameterScale=lin|log|log10`), the distribution parameters are used as is without applying the `parameterScale` to them. The exception are the `parameterScale*`-type distributions, as explained below. In the context of PEtab prior distributions, `parameterScale` will only be used for the start point sampling for optimization, where the sample will be transformed accordingly. This is demonstrated below. The left plot always shows the prior distribution for unscaled parameter values, and the right plot shows the prior distribution for scaled parameter values. Note that in the objective function, the prior is always on the unscaled parameters.\n" }, { "cell_type": "code", - "execution_count": null, "id": "f6192c226f179ef9", "metadata": {}, - "outputs": [], "source": [ "plot(Prior(NORMAL, (10, 2), transformation=LIN))\n", "plot(Prior(NORMAL, (10, 2), transformation=LOG))\n", @@ -116,7 +154,9 @@ "# Note that the log-normal distribution is different\n", "# from a log-transformed normal distribution:\n", "plot(Prior(LOG_NORMAL, (10, 2), transformation=LIN))" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", @@ -126,53 +166,69 @@ }, { "cell_type": "code", - "execution_count": null, "id": "34c95268e8921070", "metadata": {}, - "outputs": [], "source": [ "plot(Prior(LOG_NORMAL, (10, 2), transformation=LOG))\n", "plot(Prior(PARAMETER_SCALE_NORMAL, (10, 2)))" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "id": "263c9fd31156a4d5", "metadata": {}, - "source": "Prior distributions can also be defined on the parameter scale by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, 1) the distribution parameter are interpreted on the transformed parameter scale, and 2) a sample from the given distribution is used directly, without applying any transformation according to `parameterScale` (this implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`):" + "source": "Prior distributions can also be defined on the scaled parameters (i.e., transformed according to `parameterScale`) by using the types `parameterScaleUniform`, `parameterScaleNormal` or `parameterScaleLaplace`. In these cases, the distribution parameters are interpreted on the transformed parameter scale (but not the parameter bounds, see below). This implies, that for `parameterScale=lin`, there is no difference between `parameterScaleUniform` and `uniform`." }, { "cell_type": "code", - "execution_count": null, "id": "5ca940bc24312fc6", "metadata": {}, - "outputs": [], "source": [ + "# different, because transformation!=LIN\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LOG10))\n", "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LOG10))\n", "\n", + "# same, because transformation=LIN\n", "plot(Prior(UNIFORM, (0.01, 2), transformation=LIN))\n", "plot(Prior(PARAMETER_SCALE_UNIFORM, (0.01, 2), transformation=LIN))" - ] + ], + "outputs": [], + "execution_count": null }, { "cell_type": "markdown", "id": "b1a8b17d765db826", "metadata": {}, - "source": "To prevent the sampled parameters from exceeding the bounds, the sampled parameters are clipped to the bounds. The bounds are defined in the parameter table. Note that the current implementation does not support sampling from a truncated distribution. Instead, the samples are clipped to the bounds. This may introduce unwanted bias, and thus, should only be used with caution (i.e., the bounds should be chosen wide enough):" + "source": "The given distributions are truncated at the bounds defined in the parameter table:" }, { "cell_type": "code", - "execution_count": null, "id": "4ac42b1eed759bdd", "metadata": {}, - "outputs": [], "source": [ + "plot(Prior(NORMAL, (0, 1), bounds=(-2, 2)))\n", + "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9)))\n", + "plot(Prior(UNIFORM, (1e-8, 1), bounds=(0.1, 0.9), transformation=LOG10))\n", + "plot(Prior(LAPLACE, (0, 1), bounds=(-0.5, 0.5)))\n", "plot(\n", - " Prior(NORMAL, (0, 1), bounds=(-4, 4))\n", - ") # negligible clipping-bias at 4 sigma\n", - "plot(Prior(UNIFORM, (0, 1), bounds=(0.1, 0.9))) # significant clipping-bias" - ] + " Prior(\n", + " PARAMETER_SCALE_UNIFORM,\n", + " (-3, 1),\n", + " bounds=(1e-2, 1),\n", + " transformation=LOG10,\n", + " )\n", + ")" + ], + "outputs": [], + "execution_count": null + }, + { + "metadata": {}, + "cell_type": "markdown", + "source": "This results in a constant shift in the probability density, compared to the non-truncated version (https://en.wikipedia.org/wiki/Truncated_distribution), such that the probability density still sums to 1.", + "id": "67de0cace55617a2" }, { "cell_type": "markdown", @@ -182,22 +238,24 @@ }, { "cell_type": "code", - "execution_count": null, "id": "581e1ac431860419", "metadata": {}, - "outputs": [], "source": [ - "plot(Prior(NORMAL, (10, 1), bounds=(6, 14), transformation=\"log10\"))\n", + "plot(Prior(NORMAL, (10, 1), bounds=(6, 11), transformation=\"log10\"))\n", "plot(\n", " Prior(\n", " PARAMETER_SCALE_NORMAL,\n", - " (10, 1),\n", - " bounds=(10**6, 10**14),\n", + " (2, 1),\n", + " bounds=(10**0, 10**3),\n", " transformation=\"log10\",\n", " )\n", ")\n", - "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))" - ] + "plot(Prior(LAPLACE, (10, 2), bounds=(6, 14)))\n", + "plot(Prior(LOG_LAPLACE, (1, 0.5), bounds=(0.5, 8)))\n", + "plot(Prior(LOG_NORMAL, (2, 1), bounds=(0.5, 8)))" + ], + "outputs": [], + "execution_count": null } ], "metadata": { diff --git a/petab/v1/C.py b/petab/v1/C.py index 0c9310b2..09e94c20 100644 --- a/petab/v1/C.py +++ b/petab/v1/C.py @@ -208,6 +208,13 @@ PARAMETER_SCALE_LAPLACE, ] +#: parameterScale*-type prior distributions +PARAMETER_SCALE_PRIOR_TYPES = [ + PARAMETER_SCALE_UNIFORM, + PARAMETER_SCALE_NORMAL, + PARAMETER_SCALE_LAPLACE, +] + #: Supported noise distributions NOISE_MODELS = [NORMAL, LAPLACE] diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 23deb423..6b2612fd 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -27,17 +27,69 @@ class Distribution(abc.ABC): :param log: If ``True``, the distribution is transformed to its corresponding log distribution (e.g., Normal -> LogNormal). If a float, the distribution is transformed to its corresponding - log distribution with the given base (e.g., Normal -> Log10Normal). + log distribution with the given log-base (e.g., Normal -> Log10Normal). If ``False``, no transformation is applied. + :param trunc: The truncation points (lower, upper) of the distribution + or ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. """ - def __init__(self, log: bool | float = False): + def __init__( + self, *, log: bool | float = False, trunc: tuple[float, float] = None + ): if log is True: log = np.exp(1) + + if trunc == (-np.inf, np.inf): + trunc = None + + if trunc is not None and trunc[0] >= trunc[1]: + raise ValueError( + "The lower truncation limit must be smaller " + "than the upper truncation limit." + ) + self._logbase = log + self._trunc = trunc + + self._cd_low = None + self._cd_high = None + self._truncation_normalizer = 1 + + if self._trunc is not None: + try: + # the cumulative density of the transformed distribution at the + # truncation limits + self._cd_low = self._cdf_transformed_untruncated( + self.trunc_low + ) + self._cd_high = self._cdf_transformed_untruncated( + self.trunc_high + ) + # normalization factor for the PDF/CDF of the transformed + # distribution to account for truncation + self._truncation_normalizer = 1 / ( + self._cd_high - self._cd_low + ) + except NotImplementedError: + pass + + @property + def trunc_low(self) -> float: + """The lower truncation limit of the transformed distribution.""" + return self._trunc[0] if self._trunc else -np.inf + + @property + def trunc_high(self) -> float: + """The upper truncation limit of the transformed distribution.""" + return self._trunc[1] if self._trunc else np.inf + + def _exp(self, x: np.ndarray | float) -> np.ndarray | float: + """Exponentiate / undo the log transformation if applicable. - def _undo_log(self, x: np.ndarray | float) -> np.ndarray | float: - """Undo the log transformation. + Exponentiate if a log transformation is applied to the distribution. + Otherwise, return the input. :param x: The sample to transform. :return: The transformed sample @@ -46,50 +98,62 @@ def _undo_log(self, x: np.ndarray | float) -> np.ndarray | float: return x return self._logbase**x - def _apply_log(self, x: np.ndarray | float) -> np.ndarray | float: - """Apply the log transformation. + def _log(self, x: np.ndarray | float) -> np.ndarray | float: + """Apply the log transformation if enabled. + + Compute the log of `x` with the specified base if a log transformation + is applied to the distribution. Otherwise, return the input. :param x: The value to transform. :return: The transformed value. """ if self._logbase is False: return x - return np.log(x) / np.log(self._logbase) + with np.errstate(invalid="ignore", divide="ignore"): + return np.log(x) / np.log(self._logbase) - def sample(self, shape=None) -> np.ndarray: + def sample(self, shape=None) -> np.ndarray | float: """Sample from the distribution. :param shape: The shape of the sample. :return: A sample from the distribution. """ - sample = self._sample(shape) - return self._undo_log(sample) + sample = ( + self._exp(self._sample(shape)) + if self._trunc is None + else self._inverse_transform_sample(shape) + ) + + return sample @abc.abstractmethod - def _sample(self, shape=None) -> np.ndarray: + def _sample(self, shape=None) -> np.ndarray | float: """Sample from the underlying distribution. :param shape: The shape of the sample. :return: A sample from the underlying distribution, - before applying, e.g., the log transformation. + before applying, e.g., the log transformation or truncation. """ ... - def pdf(self, x): + def pdf(self, x) -> np.ndarray | float: """Probability density function at x. :param x: The value at which to evaluate the PDF. :return: The value of the PDF at ``x``. + NaN, if ``x`` is outside the domain of the PDF. """ - # handle the log transformation; see also: - # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar - chain_rule_factor = ( - (1 / (x * np.log(self._logbase))) if self._logbase else 1 + if self._trunc is None: + return self._pdf_untruncated(x) + + return np.where( + (x >= self.trunc_low) & (x <= self.trunc_high), + self._pdf_untruncated(x) * self._truncation_normalizer, + 0, ) - return self._pdf(self._apply_log(x)) * chain_rule_factor @abc.abstractmethod - def _pdf(self, x): + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: """Probability density function of the underlying distribution at x. :param x: The value at which to evaluate the PDF. @@ -97,6 +161,35 @@ def _pdf(self, x): """ ... + def _pdf_untruncated(self, x) -> np.ndarray | float: + """Probability density function of the untruncated distribution at x. + + :param x: The value at which to evaluate the PDF. + :return: The value of the PDF of the maybe-log-transformed distribution + at ``x``. + """ + if self.logbase is False: + return self._pdf_untransformed_untruncated(x) + + # handle the log transformation; see also: + # https://en.wikipedia.org/wiki/Probability_density_function#Scalar_to_scalar + with np.errstate(invalid="ignore", divide="ignore"): + chain_rule_factor = ( + (1 / (x * np.log(self._logbase))) if self._logbase else 1 + ) + + return np.where( + x >= 0, + np.where( + x > 0, + self._pdf_untransformed_untruncated(self._log(x)) + * chain_rule_factor, + 0, + ), + # NaN outside its domain + np.nan, + ) + @property def logbase(self) -> bool | float: """The base of the log transformation. @@ -105,55 +198,145 @@ def logbase(self) -> bool | float: """ return self._logbase + def cdf(self, x) -> np.ndarray | float: + """Cumulative distribution function at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + if self._trunc is None: + return self._cdf_transformed_untruncated(x) + return ( + self._cdf_transformed_untruncated(x) - self._cd_low + ) * self._truncation_normalizer + + def _cdf_transformed_untruncated(self, x) -> np.ndarray | float: + """Cumulative distribution function of the transformed, but untruncated + distribution at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + if not self.logbase: + return self._cdf_untransformed_untruncated(x) + + with np.errstate(invalid="ignore"): + return np.where( + x < 0, 0, self._cdf_untransformed_untruncated(self._log(x)) + ) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + """Cumulative distribution function of the underlying + (untransformed, untruncated) distribution at x. + + :param x: The value at which to evaluate the CDF. + :return: The value of the CDF at ``x``. + """ + raise NotImplementedError + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + """Percent point function of the underlying + (untransformed, untruncated) distribution at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + raise NotImplementedError + + def _ppf_transformed_untruncated(self, q) -> np.ndarray | float: + """Percent point function of the transformed, but untruncated + distribution at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + return self._exp(self._ppf_untransformed_untruncated(q)) + + def ppf(self, q) -> np.ndarray | float: + """Percent point function at q. + + :param q: The quantile at which to evaluate the PPF. + :return: The value of the PPF at ``q``. + """ + if self._trunc is None: + return self._ppf_transformed_untruncated(q) + + # Adjust quantiles to account for truncation + adjusted_q = self._cd_low + q * (self._cd_high - self._cd_low) + return self._ppf_transformed_untruncated(adjusted_q) + + def _inverse_transform_sample(self, shape) -> np.ndarray | float: + """Generate an inverse transform sample from the transformed and + truncated distribution. + + :param shape: The shape of the sample. + :return: The sample. + """ + uniform_sample = np.random.uniform( + low=self._cd_low, high=self._cd_high, size=shape + ) + return self._ppf_transformed_untruncated(uniform_sample) + class Normal(Distribution): """A (log-)normal distribution. :param loc: The location parameter of the distribution. :param scale: The scale parameter of the distribution. - :param truncation: The truncation limits of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. :param log: If ``True``, the distribution is transformed to a log-normal distribution. If a float, the distribution is transformed to a - log-normal distribution with the given base. + log-normal distribution with the given log-base. If ``False``, no transformation is applied. If a transformation is applied, the location and scale parameters - and the truncation limits are the location, scale and truncation limits - of the underlying normal distribution. + are the location and scale of the underlying normal distribution. """ def __init__( self, loc: float, scale: float, - truncation: tuple[float, float] | None = None, + trunc: tuple[float, float] | None = None, log: bool | float = False, ): - super().__init__(log=log) self._loc = loc self._scale = scale - self._truncation = truncation - - if truncation is not None: - raise NotImplementedError("Truncation is not yet implemented.") + super().__init__(log=log, trunc=trunc) def __repr__(self): - trunc = f", truncation={self._truncation}" if self._truncation else "" - log = f", log={self._logbase}" if self._logbase else "" - return f"Normal(loc={self._loc}, scale={self._scale}{trunc}{log})" + if self._logbase is False: + log = "" + if self._logbase == np.exp(1): + log = ", log=True" + else: + log = f", log={self._logbase}" - def _sample(self, shape=None): + trunc = f", trunc={self._trunc}" if self._trunc else "" + + return f"Normal(loc={self._loc}, scale={self._scale}{log}{trunc})" + + def _sample(self, shape=None) -> np.ndarray | float: return np.random.normal(loc=self._loc, scale=self._scale, size=shape) - def _pdf(self, x): + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: return norm.pdf(x, loc=self._loc, scale=self._scale) + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return norm.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return norm.ppf(q, loc=self._loc, scale=self._scale) + @property - def loc(self): + def loc(self) -> float: """The location parameter of the underlying distribution.""" return self._loc @property - def scale(self): + def scale(self) -> float: """The scale parameter of the underlying distribution.""" return self._scale @@ -165,7 +348,7 @@ class Uniform(Distribution): :param high: The upper bound of the distribution. :param log: If ``True``, the distribution is transformed to a log-uniform distribution. If a float, the distribution is transformed to a - log-uniform distribution with the given base. + log-uniform distribution with the given log-base. If ``False``, no transformation is applied. If a transformation is applied, the lower and upper bounds are the lower and upper bounds of the underlying uniform distribution. @@ -178,67 +361,78 @@ def __init__( *, log: bool | float = False, ): - super().__init__(log=log) self._low = low self._high = high + super().__init__(log=log) def __repr__(self): log = f", log={self._logbase}" if self._logbase else "" return f"Uniform(low={self._low}, high={self._high}{log})" - def _sample(self, shape=None): + def _sample(self, shape=None) -> np.ndarray | float: return np.random.uniform(low=self._low, high=self._high, size=shape) - def _pdf(self, x): + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: return uniform.pdf(x, loc=self._low, scale=self._high - self._low) + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return uniform.cdf(x, loc=self._low, scale=self._high - self._low) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return uniform.ppf(q, loc=self._low, scale=self._high - self._low) + class Laplace(Distribution): """A (log-)Laplace distribution. :param loc: The location parameter of the distribution. :param scale: The scale parameter of the distribution. - :param truncation: The truncation limits of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. :param log: If ``True``, the distribution is transformed to a log-Laplace distribution. If a float, the distribution is transformed to a - log-Laplace distribution with the given base. + log-Laplace distribution with the given log-base. If ``False``, no transformation is applied. If a transformation is applied, the location and scale parameters - and the truncation limits are the location, scale and truncation limits - of the underlying Laplace distribution. + are the location and scale of the underlying Laplace distribution. """ def __init__( self, loc: float, scale: float, - truncation: tuple[float, float] | None = None, + trunc: tuple[float, float] | None = None, log: bool | float = False, ): - super().__init__(log=log) self._loc = loc self._scale = scale - self._truncation = truncation - if truncation is not None: - raise NotImplementedError("Truncation is not yet implemented.") + super().__init__(log=log, trunc=trunc) def __repr__(self): - trunc = f", truncation={self._truncation}" if self._truncation else "" + trunc = f", trunc={self._trunc}" if self._trunc else "" log = f", log={self._logbase}" if self._logbase else "" return f"Laplace(loc={self._loc}, scale={self._scale}{trunc}{log})" - def _sample(self, shape=None): + def _sample(self, shape=None) -> np.ndarray | float: return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) - def _pdf(self, x): + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: return laplace.pdf(x, loc=self._loc, scale=self._scale) + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return laplace.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return laplace.ppf(q, loc=self._loc, scale=self._scale) + @property - def loc(self): + def loc(self) -> float: """The location parameter of the underlying distribution.""" return self._loc @property - def scale(self): + def scale(self) -> float: """The scale parameter of the underlying distribution.""" return self._scale diff --git a/petab/v1/parameter_mapping.py b/petab/v1/parameter_mapping.py index 06e31fe4..a438c6b2 100644 --- a/petab/v1/parameter_mapping.py +++ b/petab/v1/parameter_mapping.py @@ -404,7 +404,7 @@ def get_parameter_mapping_for_condition( # initialize mapping dicts # for the case of matching simulation and optimization parameter vector par_mapping = simulation_parameters.copy() - scale_mapping = {par_id: LIN for par_id in par_mapping.keys()} + scale_mapping = dict.fromkeys(par_mapping.keys(), LIN) _output_parameters_to_nan(par_mapping) # not strictly necessary for preequilibration, be we do it to have diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 1d2b9802..6531fa0e 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -59,6 +59,14 @@ class Prior: on the `parameter_scale` scale). :param bounds: The untransformed bounds of the sample (lower, upper). :param transformation: The transformation of the distribution. + :param _bounds_truncate: **deprecated** + Whether the generated prior will be truncated at the bounds. + If ``True``, the probability density will be rescaled + accordingly and the sample is generated from the truncated + distribution. + If ``False``, the probability density will not be rescaled + accordingly, but the sample will be generated from the truncated + distribution. """ def __init__( @@ -67,6 +75,7 @@ def __init__( parameters: tuple, bounds: tuple = None, transformation: str = C.LIN, + _bounds_truncate: bool = True, ): if transformation not in C.PARAMETER_SCALES: raise ValueError( @@ -88,27 +97,51 @@ def __init__( self._parameters = parameters self._bounds = bounds self._transformation = transformation + self._bounds_truncate = _bounds_truncate + + truncation = bounds + if truncation is not None: + # for uniform, we don't want to implement truncation and just + # adapt the distribution parameters + if type_ == C.PARAMETER_SCALE_UNIFORM: + parameters = ( + max(parameters[0], scale(truncation[0], transformation)), + min(parameters[1], scale(truncation[1], transformation)), + ) + elif type_ == C.UNIFORM: + parameters = ( + max(parameters[0], truncation[0]), + min(parameters[1], truncation[1]), + ) # create the underlying distribution match type_, transformation: case (C.UNIFORM, _) | (C.PARAMETER_SCALE_UNIFORM, C.LIN): self.distribution = Uniform(*parameters) case (C.NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LIN): - self.distribution = Normal(*parameters) + self.distribution = Normal(*parameters, trunc=truncation) case (C.LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LIN): - self.distribution = Laplace(*parameters) + self.distribution = Laplace(*parameters, trunc=truncation) case (C.PARAMETER_SCALE_UNIFORM, C.LOG): self.distribution = Uniform(*parameters, log=True) case (C.LOG_NORMAL, _) | (C.PARAMETER_SCALE_NORMAL, C.LOG): - self.distribution = Normal(*parameters, log=True) + self.distribution = Normal( + *parameters, log=True, trunc=truncation + ) case (C.LOG_LAPLACE, _) | (C.PARAMETER_SCALE_LAPLACE, C.LOG): - self.distribution = Laplace(*parameters, log=True) + self.distribution = Laplace( + *parameters, log=True, trunc=truncation + ) case (C.PARAMETER_SCALE_UNIFORM, C.LOG10): self.distribution = Uniform(*parameters, log=10) case (C.PARAMETER_SCALE_NORMAL, C.LOG10): - self.distribution = Normal(*parameters, log=10) + self.distribution = Normal( + *parameters, log=10, trunc=truncation + ) case (C.PARAMETER_SCALE_LAPLACE, C.LOG10): - self.distribution = Laplace(*parameters, log=10) + self.distribution = Laplace( + *parameters, log=10, trunc=truncation + ) case _: raise ValueError( "Unsupported distribution type / transformation: " @@ -124,69 +157,55 @@ def __repr__(self): ) @property - def type(self): + def type(self) -> str: return self._type @property - def parameters(self): + def parameters(self) -> tuple: + """The parameters of the distribution.""" return self._parameters @property - def bounds(self): + def bounds(self) -> tuple[float, float] | None: + """The non-scaled bounds of the distribution.""" return self._bounds @property - def transformation(self): + def transformation(self) -> str: + """The `parameterScale`.""" return self._transformation - def sample(self, shape=None) -> np.ndarray: + def sample(self, shape=None, x_scaled=False) -> np.ndarray | float: """Sample from the distribution. :param shape: The shape of the sample. + :param x_scaled: Whether the sample should be on the parameter scale. :return: A sample from the distribution. """ raw_sample = self.distribution.sample(shape) - return self._clip_to_bounds(self._scale_sample(raw_sample)) + if x_scaled: + return self._scale_sample(raw_sample) + else: + return raw_sample def _scale_sample(self, sample): """Scale the sample to the parameter space""" - # if self.on_parameter_scale: - # return sample - + # we also need to scale parameterScale* distributions, because + # internally, they are handled as (unscaled) log-distributions return scale(sample, self.transformation) - def _clip_to_bounds(self, x): - """Clip `x` values to bounds. - - :param x: The values to clip. Assumed to be on the parameter scale. - """ - # TODO: replace this by proper truncation - if self.bounds is None: - return x - - return np.maximum( - np.minimum(self.ub_scaled, x), - self.lb_scaled, - ) - @property - def lb_scaled(self): + def lb_scaled(self) -> float: """The lower bound on the parameter scale.""" return scale(self.bounds[0], self.transformation) @property - def ub_scaled(self): + def ub_scaled(self) -> float: """The upper bound on the parameter scale.""" return scale(self.bounds[1], self.transformation) - def pdf(self, x): - """Probability density function at x. - - :param x: The value at which to evaluate the PDF. - ``x`` is assumed to be on the parameter scale. - :return: The value of the PDF at ``x``. Note that the PDF does - currently not account for the clipping at the bounds. - """ + def _chain_rule_coeff(self, x) -> np.ndarray | float: + """The chain rule coefficient for the transformation at x.""" x = unscale(x, self.transformation) # scale the PDF to the parameter scale @@ -199,25 +218,63 @@ def pdf(self, x): else: raise ValueError(f"Unknown transformation: {self.transformation}") - return self.distribution.pdf(x) * coeff + return coeff - def neglogprior(self, x): + def pdf( + self, x, x_scaled: bool = False, rescale=False + ) -> np.ndarray | float: + """Probability density function at x. + + This accounts for truncation, independent of the `bounds_truncate` + parameter. + + :param x: The value at which to evaluate the PDF. + ``x`` is assumed to be on the parameter scale. + :param x_scaled: Whether ``x`` is on the parameter scale. + :param rescale: Whether to rescale the PDF to integrate to 1 on the + parameter scale. Only used if ``x_scaled`` is ``True``. + :return: The value of the PDF at ``x``. + """ + if x_scaled: + coeff = self._chain_rule_coeff(x) if rescale else 1 + x = unscale(x, self.transformation) + return self.distribution.pdf(x) * coeff + + return self.distribution.pdf(x) + + def neglogprior( + self, x: np.array | float, x_scaled: bool = False + ) -> np.ndarray | float: """Negative log-prior at x. :param x: The value at which to evaluate the negative log-prior. - ``x`` is assumed to be on the parameter scale. + :param x_scaled: Whether ``x`` is on the parameter scale. + Note that the prior is always evaluated on the non-scaled + parameters. :return: The negative log-prior at ``x``. """ - return -np.log(self.pdf(x)) + if self._bounds_truncate: + # the truncation is handled by the distribution + # the prior is always evaluated on the non-scaled parameters + return -np.log(self.pdf(x, x_scaled=x_scaled, rescale=False)) + + # we want to evaluate the prior on the untruncated distribution + if x_scaled: + x = unscale(x, self.transformation) + return -np.log(self.distribution._pdf_untruncated(x)) @staticmethod def from_par_dict( - d, type_=Literal["initialization", "objective"] + d, + type_=Literal["initialization", "objective"], + _bounds_truncate: bool = True, ) -> Prior: """Create a distribution from a row of the parameter table. :param d: A dictionary representing a row of the parameter table. :param type_: The type of the distribution. + :param _bounds_truncate: Whether the generated prior will be truncated + at the bounds. **deprecated**. :return: A distribution object. """ dist_type = d.get(f"{type_}PriorType", C.PARAMETER_SCALE_UNIFORM) @@ -243,6 +300,7 @@ def from_par_dict( parameters=params, bounds=(d[C.LOWER_BOUND], d[C.UPPER_BOUND]), transformation=pscale, + _bounds_truncate=_bounds_truncate, ) @@ -271,6 +329,12 @@ def priors_to_measurements(problem: Problem): - `measurement`: the PDF location - `noiseFormula`: the PDF scale + .. warning:: + + This function does not account for the truncation of the prior by + the bounds in the parameter table. The resulting observable will + not be truncated, and the PDF will not be rescaled. + Arguments --------- problem: @@ -295,6 +359,7 @@ def priors_to_measurements(problem: Problem): return new_problem def scaled_observable_formula(parameter_id, parameter_scale): + # The location parameter of the prior if parameter_scale == LIN: return parameter_id if parameter_scale == LOG: @@ -323,6 +388,12 @@ def scaled_observable_formula(parameter_id, parameter_scale): # offset raise NotImplementedError("Uniform priors are not supported.") + if prior_type not in (C.NORMAL, C.LAPLACE): + # we can't (easily) handle parameterScale* priors or log*-priors + raise NotImplementedError( + f"Objective prior type {prior_type} is not implemented." + ) + parameter_id = row.name prior_parameters = tuple( map( @@ -347,7 +418,9 @@ def scaled_observable_formula(parameter_id, parameter_scale): OBSERVABLE_ID: new_obs_id, OBSERVABLE_FORMULA: scaled_observable_formula( parameter_id, - parameter_scale if "parameterScale" in prior_type else LIN, + parameter_scale + if prior_type in C.PARAMETER_SCALE_PRIOR_TYPES + else LIN, ), NOISE_FORMULA: f"noiseParameter1_{new_obs_id}", } @@ -356,12 +429,13 @@ def scaled_observable_formula(parameter_id, parameter_scale): elif OBSERVABLE_TRANSFORMATION in new_problem.observable_df: # only set default if the column is already present new_observable[OBSERVABLE_TRANSFORMATION] = LIN - + # type of the underlying distribution if prior_type in (NORMAL, PARAMETER_SCALE_NORMAL, LOG_NORMAL): new_observable[NOISE_DISTRIBUTION] = NORMAL elif prior_type in (LAPLACE, PARAMETER_SCALE_LAPLACE, LOG_LAPLACE): new_observable[NOISE_DISTRIBUTION] = LAPLACE else: + # we can't (easily) handle uniform priors in PEtab v1 raise NotImplementedError( f"Objective prior type {prior_type} is not implemented." ) diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index a046879f..c99a1eef 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -28,7 +28,10 @@ def sample_from_prior( # unpack info p_type, p_params, scaling, bounds = prior prior = Prior( - p_type, tuple(p_params), bounds=tuple(bounds), transformation=scaling + p_type, + tuple(p_params), + bounds=tuple(bounds), + transformation=scaling, ) return prior.sample(shape=(n_starts,)) @@ -74,7 +77,10 @@ def sample_parameter_startpoints( # get types and parameters of priors from dataframe return np.array( [ - Prior.from_par_dict(row, type_="initialization").sample(n_starts) + Prior.from_par_dict( + row, + type_="initialization", + ).sample(n_starts, x_scaled=True) for row in par_to_estimate.to_dict("records") ] ).T diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py index 9df830fa..e06d9edc 100644 --- a/tests/v1/test_distributions.py +++ b/tests/v1/test_distributions.py @@ -1,3 +1,5 @@ +import sys + import numpy as np import pytest from numpy.testing import assert_allclose @@ -27,6 +29,11 @@ Uniform(2, 4, log=10), Laplace(1, 2), Laplace(1, 0.5, log=True), + Normal(2, 1, trunc=(1, 2)), + Normal(2, 1, log=True, trunc=(0.5, 8)), + Normal(2, 1, log=10), + Laplace(1, 2, trunc=(1, 2)), + Laplace(1, 0.5, log=True, trunc=(0.5, 8)), ], ) def test_sample_matches_pdf(distribution): @@ -51,36 +58,54 @@ def cdf(x): assert p > 0.05, (p, distribution) + # check min/max of CDF at the bounds + assert np.isclose( + distribution.cdf( + distribution.trunc_low + if not distribution.logbase + else max(sys.float_info.min, distribution.trunc_low) + ), + 0, + atol=1e-16, + rtol=0, + ) + assert np.isclose( + distribution.cdf(distribution.trunc_high), 1, atol=1e-14, rtol=0 + ) + # Test samples match scipy CDFs reference_pdf = None - if isinstance(distribution, Normal) and distribution.logbase is False: - reference_pdf = norm.pdf(sample, distribution.loc, distribution.scale) - elif isinstance(distribution, Uniform) and distribution.logbase is False: - reference_pdf = uniform.pdf( - sample, distribution._low, distribution._high - distribution._low - ) - elif isinstance(distribution, Laplace) and distribution.logbase is False: - reference_pdf = laplace.pdf( - sample, distribution.loc, distribution.scale - ) - elif isinstance(distribution, Normal) and distribution.logbase == np.exp( - 1 - ): - reference_pdf = lognorm.pdf( - sample, scale=np.exp(distribution.loc), s=distribution.scale - ) - elif isinstance(distribution, Uniform) and distribution.logbase == np.exp( - 1 - ): - reference_pdf = loguniform.pdf( - sample, np.exp(distribution._low), np.exp(distribution._high) - ) - elif isinstance(distribution, Laplace) and distribution.logbase == np.exp( - 1 - ): - reference_pdf = loglaplace.pdf( - sample, c=1 / distribution.scale, scale=np.exp(distribution.loc) - ) + if distribution._trunc is None and distribution.logbase is False: + if isinstance(distribution, Normal): + reference_pdf = norm.pdf( + sample, distribution.loc, distribution.scale + ) + elif isinstance(distribution, Uniform): + reference_pdf = uniform.pdf( + sample, + distribution._low, + distribution._high - distribution._low, + ) + elif isinstance(distribution, Laplace): + reference_pdf = laplace.pdf( + sample, distribution.loc, distribution.scale + ) + + if distribution._trunc is None and distribution.logbase == np.exp(1): + if isinstance(distribution, Normal): + reference_pdf = lognorm.pdf( + sample, scale=np.exp(distribution.loc), s=distribution.scale + ) + elif isinstance(distribution, Uniform): + reference_pdf = loguniform.pdf( + sample, np.exp(distribution._low), np.exp(distribution._high) + ) + elif isinstance(distribution, Laplace): + reference_pdf = loglaplace.pdf( + sample, + c=1 / distribution.scale, + scale=np.exp(distribution.loc), + ) if reference_pdf is not None: assert_allclose( distribution.pdf(sample), reference_pdf, rtol=1e-10, atol=1e-14 diff --git a/tests/v1/test_priors.py b/tests/v1/test_priors.py index d98879e3..a0ad1116 100644 --- a/tests/v1/test_priors.py +++ b/tests/v1/test_priors.py @@ -6,7 +6,7 @@ import numpy as np import pandas as pd import pytest -from scipy.integrate import cumulative_trapezoid +from scipy.integrate import cumulative_trapezoid, quad from scipy.stats import kstest import petab.v1 @@ -20,9 +20,39 @@ get_simulation_conditions, get_simulation_df, ) +from petab.v1.calculate import calculate_single_llh from petab.v1.priors import Prior, priors_to_measurements +def test_priors_to_measurements_simple(): + """Test the conversion of priors to measurements. + + Illustrates & tests the conversion of a prior to a measurement. + """ + # parameter value at which we evaluate the prior + par_value = 2.5 + # location and scale parameters of the prior + prior_loc = 3 + prior_scale = 3 + + for prior_type in [C.NORMAL, C.LAPLACE]: + # evaluate the original prior + prior = Prior( + prior_type, (prior_loc, prior_scale), transformation=C.LIN + ) + logprior = -prior.neglogprior(par_value, x_scaled=False) + + # evaluate the alternative implementation as a measurement + llh = calculate_single_llh( + measurement=prior_loc, + simulation=par_value, + scale=C.LIN, + noise_distribution=prior_type, + noise_value=prior_scale, + ) + assert np.isclose(llh, logprior, rtol=1e-12, atol=1e-16) + + @pytest.mark.parametrize( "problem_id", ["Schwen_PONE2014", "Isensee_JCB2018", "Raimundez_PCB2020"] ) @@ -59,8 +89,13 @@ def test_priors_to_measurements(problem_id): ) ) - # convert priors to measurements - petab_problem_measurements = priors_to_measurements(petab_problem_priors) + try: + # convert priors to measurements + petab_problem_measurements = priors_to_measurements( + petab_problem_priors + ) + except NotImplementedError as e: + pytest.skip(str(e)) # check that the original problem is not modified for attr in [ @@ -121,9 +156,12 @@ def apply_parameter_values(row): # apply the parameter values to the observable formula for the prior if row[OBSERVABLE_ID].startswith("prior_"): parameter_id = row[OBSERVABLE_ID].removeprefix("prior_") - if original_problem.parameter_df.loc[ - parameter_id, OBJECTIVE_PRIOR_TYPE - ].startswith("parameterScale"): + if ( + original_problem.parameter_df.loc[ + parameter_id, OBJECTIVE_PRIOR_TYPE + ] + in C.PARAMETER_SCALE_PRIOR_TYPES + ): row[SIMULATION] = x_scaled_dict[parameter_id] else: row[SIMULATION] = x_unscaled_dict[parameter_id] @@ -156,13 +194,17 @@ def apply_parameter_values(row): ] priors = [ Prior.from_par_dict( - petab_problem_priors.parameter_df.loc[par_id], type_="objective" + petab_problem_priors.parameter_df.loc[par_id], + type_="objective", + _bounds_truncate=False, ) for par_id in parameter_ids ] prior_contrib = 0 for parameter_id, prior in zip(parameter_ids, priors, strict=True): - prior_contrib -= prior.neglogprior(x_scaled_dict[parameter_id]) + prior_contrib -= prior.neglogprior( + x_scaled_dict[parameter_id], x_scaled=True + ) assert np.isclose( llh_priors + prior_contrib, llh_measurements, rtol=1e-8, atol=1e-16 @@ -194,21 +236,46 @@ def test_sample_matches_pdf(prior_args, transform): """Test that the sample matches the PDF.""" np.random.seed(1) N_SAMPLES = 10_000 + prior = Prior(*prior_args, transformation=transform) - sample = prior.sample(N_SAMPLES) - # pdf -> cdf - def cdf(x): - return cumulative_trapezoid(prior.pdf(x), x) + for x_scaled in [False, True]: + sample = prior.sample(N_SAMPLES, x_scaled=x_scaled) + + # pdf -> cdf + def cdf(x): + return cumulative_trapezoid( + prior.pdf( + x, + x_scaled=x_scaled, # noqa B208 + rescale=x_scaled, # noqa B208 + ), + x, + ) + + # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF + _, p = kstest(sample, cdf) - # Kolmogorov-Smirnov test to check if the sample is drawn from the CDF - _, p = kstest(sample, cdf) + if p < 0.05: + import matplotlib.pyplot as plt - # if p < 0.05: - # import matplotlib.pyplot as plt - # plt.hist(sample, bins=100, density=True) - # x = np.linspace(min(sample), max(sample), 100) - # plt.plot(x, distribution.pdf(x)) - # plt.show() + plt.hist(sample, bins=100, density=True) + x = np.linspace(min(sample), max(sample), 100) + plt.plot(x, prior.pdf(x, x_scaled=x_scaled, rescale=x_scaled)) + plt.xlabel(("scaled" if x_scaled else "unscaled") + " x") + plt.ylabel(("rescaled " if x_scaled else "") + "density") + plt.title(str(prior)) + plt.show() - assert p > 0.05, (p, prior) + assert p > 0.05, (p, prior) + + # check that the integral of the PDF is 1 for the unscaled parameters + integral, abserr = quad( + lambda x: prior.pdf(x, x_scaled=False), + -np.inf if prior.distribution.logbase is False else 0, + np.inf, + limit=100, + epsabs=1e-10, + epsrel=0, + ) + assert np.isclose(integral, 1, rtol=0, atol=10 * abserr) From 2a9142c58c2c5b9aa5ae502f551ee227ce0c99cf Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 25 Apr 2025 08:12:02 +0200 Subject: [PATCH 045/141] v2: Allow applying multiple conditions simultaneously (#373) --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/core.py | 41 +++++++++++++++++++------------- petab/v2/lint.py | 55 +++++++++++++++++++++++++++++++++---------- petab/v2/problem.py | 7 +++++- tests/v2/test_core.py | 16 ++++++------- tests/v2/test_lint.py | 18 +++++++++++++- 5 files changed, 98 insertions(+), 39 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 72eaec31..41abfb36 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -475,7 +475,7 @@ def free_symbols(self) -> set[sp.Symbol]: class ExperimentPeriod(BaseModel): """A period of a timecourse or experiment defined by a start time - and a condition ID. + and a list of condition IDs. This corresponds to a row of the PEtab experiments table. """ @@ -484,20 +484,19 @@ class ExperimentPeriod(BaseModel): time: Annotated[float, AfterValidator(_is_finite_or_neg_inf)] = Field( alias=C.TIME ) - #: The ID of the condition to be applied at the start time. - condition_id: str | None = Field(alias=C.CONDITION_ID, default=None) + #: The IDs of the conditions to be applied at the start time. + condition_ids: list[str] = Field(default_factory=list) #: :meta private: model_config = ConfigDict(populate_by_name=True, extra="allow") - @field_validator("condition_id", mode="before") + @field_validator("condition_ids", mode="before") @classmethod - def _validate_id(cls, condition_id): - if pd.isna(condition_id) or not condition_id: - return None - if not is_valid_identifier(condition_id): - raise ValueError(f"Invalid ID: {condition_id}") - return condition_id + def _validate_ids(cls, condition_ids): + for condition_id in condition_ids: + if not is_valid_identifier(condition_id): + raise ValueError(f"Invalid ID: {condition_id}") + return condition_ids class Experiment(BaseModel): @@ -548,12 +547,20 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentTable: experiments = [] for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): - periods = [ - ExperimentPeriod( - time=row[C.TIME], condition_id=row[C.CONDITION_ID] + periods = [] + for timepoint in cur_exp_df[C.TIME].unique(): + condition_ids = [ + cid + for cid in cur_exp_df.loc[ + cur_exp_df[C.TIME] == timepoint, C.CONDITION_ID + ] + if not pd.isna(cid) + ] + periods.append( + ExperimentPeriod( + time=timepoint, condition_ids=condition_ids + ) ) - for _, row in cur_exp_df.iterrows() - ] experiments.append(Experiment(id=experiment_id, periods=periods)) return cls(experiments=experiments) @@ -563,10 +570,12 @@ def to_df(self) -> pd.DataFrame: records = [ { C.EXPERIMENT_ID: experiment.id, - **period.model_dump(by_alias=True), + C.TIME: period.time, + C.CONDITION_ID: condition_id, } for experiment in self.experiments for period in experiment.periods + for condition_id in period.condition_ids or [""] ] return ( pd.DataFrame(records) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 71d655dd..6e7fc161 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -8,6 +8,7 @@ from collections.abc import Set from dataclasses import dataclass, field from enum import IntEnum +from itertools import chain from pathlib import Path import pandas as pd @@ -373,8 +374,10 @@ class CheckValidConditionTargets(ValidationTask): """Check that all condition table targets are valid.""" def run(self, problem: Problem) -> ValidationIssue | None: - allowed_targets = set( - problem.model.get_valid_ids_for_condition_table() + allowed_targets = ( + set(problem.model.get_valid_ids_for_condition_table()) + if problem.model + else set() ) allowed_targets |= set(get_output_parameters(problem)) allowed_targets |= { @@ -394,6 +397,28 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Condition table contains invalid targets: {invalid}" ) + # Check that changes of simultaneously applied conditions don't + # intersect + for experiment in problem.experiment_table.experiments: + for period in experiment.periods: + if not period.condition_ids: + continue + period_targets = set() + for condition_id in period.condition_ids: + condition_targets = { + change.target_id + for cond in problem.condition_table.conditions + if cond.id == condition_id + for change in cond.changes + } + if invalid := (period_targets & condition_targets): + return ValidationError( + "Simultaneously applied conditions for experiment " + f"{experiment.id} have overlapping targets " + f"{invalid} at time {period.time}." + ) + period_targets |= condition_targets + class CheckUniquePrimaryKeys(ValidationTask): """Check that all primary keys are unique.""" @@ -484,11 +509,14 @@ def run(self, problem: Problem) -> ValidationIssue | None: c.id for c in problem.condition_table.conditions } for experiment in problem.experiment_table.experiments: - missing_conditions = { - period.condition_id - for period in experiment.periods - if period.condition_id is not None - } - available_conditions + missing_conditions = ( + set( + chain.from_iterable( + period.condition_ids for period in experiment.periods + ) + ) + - available_conditions + ) if missing_conditions: messages.append( f"Experiment {experiment.id} requires conditions that are " @@ -646,12 +674,13 @@ class CheckUnusedConditions(ValidationTask): table.""" def run(self, problem: Problem) -> ValidationIssue | None: - used_conditions = { - p.condition_id - for e in problem.experiment_table.experiments - for p in e.periods - if p.condition_id is not None - } + used_conditions = set( + chain.from_iterable( + p.condition_ids + for e in problem.experiment_table.experiments + for p in e.periods + ) + ) available_conditions = { c.id for c in problem.condition_table.conditions } diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 294256a8..c2a8a6fa 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -1062,7 +1062,12 @@ def add_experiment(self, id_: str, *args): ) periods = [ - core.ExperimentPeriod(time=args[i], condition_id=args[i + 1]) + core.ExperimentPeriod( + time=args[i], + condition_ids=[cond] + if isinstance((cond := args[i + 1]), str) + else cond, + ) for i in range(0, len(args), 2) ] diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 181f5523..074c0d2d 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -39,9 +39,9 @@ def test_experiment_add_periods(): exp = Experiment(id="exp1") assert exp.periods == [] - p1 = ExperimentPeriod(time=0, condition_id="p1") - p2 = ExperimentPeriod(time=1, condition_id="p2") - p3 = ExperimentPeriod(time=2, condition_id="p3") + p1 = ExperimentPeriod(time=0, condition_ids=["p1"]) + p2 = ExperimentPeriod(time=1, condition_ids=["p2"]) + p3 = ExperimentPeriod(time=2, condition_ids=["p3"]) exp += p1 exp += p2 @@ -201,8 +201,8 @@ def test_change(): def test_period(): ExperimentPeriod(time=0) - ExperimentPeriod(time=1, condition_id="p1") - ExperimentPeriod(time="-inf", condition_id="p1") + ExperimentPeriod(time=1, condition_ids=["p1"]) + ExperimentPeriod(time="-inf", condition_ids=["p1"]) assert ( ExperimentPeriod(time="1", condition_id="p1", non_petab=1).non_petab @@ -210,13 +210,13 @@ def test_period(): ) with pytest.raises(ValidationError, match="got inf"): - ExperimentPeriod(time="inf", condition_id="p1") + ExperimentPeriod(time="inf", condition_ids=["p1"]) with pytest.raises(ValidationError, match="Invalid ID"): - ExperimentPeriod(time=1, condition_id="1_condition") + ExperimentPeriod(time=1, condition_ids=["1_condition"]) with pytest.raises(ValidationError, match="type=missing"): - ExperimentPeriod(condition_id="condition") + ExperimentPeriod(condition_ids=["condition"]) def test_parameter(): diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index 33cdb300..74aaaa29 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -3,8 +3,8 @@ from copy import deepcopy from petab.v2 import Problem -from petab.v2.C import * from petab.v2.lint import * +from petab.v2.models.sbml_model import SbmlModel def test_check_experiments(): @@ -21,3 +21,19 @@ def test_check_experiments(): tmp_problem = deepcopy(problem) tmp_problem["e1"].periods[0].time = tmp_problem["e1"].periods[1].time assert check.run(tmp_problem) is not None + + +def test_check_incompatible_targets(): + """Multiple conditions with overlapping targets cannot be applied + at the same time.""" + problem = Problem() + problem.model = SbmlModel.from_antimony("p1 = 1; p2 = 2") + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_condition("c1", p1="1") + problem.add_condition("c2", p1="2", p2="2") + check = CheckValidConditionTargets() + assert check.run(problem) is None + + problem["e1"].periods[0].condition_ids.append("c2") + assert (error := check.run(problem)) is not None + assert "overlapping targets {'p1'}" in error.message From 87cec8c3cc1d18234ccb4461711cf8b9e194eb82 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 25 Apr 2025 08:50:49 +0200 Subject: [PATCH 046/141] v2: Adapt models to changes in prior distributions (#378) * priorDistribution/priorParameters instead of separate initializationPrior&objectivePrior * additional prior distributions Related to https://github.com/PEtab-dev/libpetab-python/issues/374. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/C.py | 75 +++++++++++++++++++++++++-------------------- petab/v2/core.py | 73 ++++++++++++++++++++++++------------------- petab/v2/problem.py | 49 +++++++++++------------------ 3 files changed, 102 insertions(+), 95 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index c94a1d29..b5325ff4 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -79,14 +79,10 @@ NOMINAL_VALUE = "nominalValue" #: Estimate column in the parameter table ESTIMATE = "estimate" -#: Initialization prior type column in the parameter table -INITIALIZATION_PRIOR_TYPE = "initializationPriorType" -#: Initialization prior parameters column in the parameter table -INITIALIZATION_PRIOR_PARAMETERS = "initializationPriorParameters" -#: Objective prior type column in the parameter table -OBJECTIVE_PRIOR_TYPE = "objectivePriorType" -#: Objective prior parameters column in the parameter table -OBJECTIVE_PRIOR_PARAMETERS = "objectivePriorParameters" +#: Prior distribution type column in the parameter table +PRIOR_DISTRIBUTION = "priorDistribution" +#: Prior parameters column in the parameter table +PRIOR_PARAMETERS = "priorParameters" #: Mandatory columns of parameter table PARAMETER_DF_REQUIRED_COLS = [ @@ -101,10 +97,8 @@ PARAMETER_DF_OPTIONAL_COLS = [ PARAMETER_NAME, NOMINAL_VALUE, - INITIALIZATION_PRIOR_TYPE, - INITIALIZATION_PRIOR_PARAMETERS, - OBJECTIVE_PRIOR_TYPE, - OBJECTIVE_PRIOR_PARAMETERS, + PRIOR_DISTRIBUTION, + PRIOR_PARAMETERS, ] #: Parameter table columns @@ -193,35 +187,50 @@ # NOISE MODELS -#: Uniform distribution -UNIFORM = "uniform" -#: Uniform distribution on the parameter scale -PARAMETER_SCALE_UNIFORM = "parameterScaleUniform" -#: Normal distribution -NORMAL = "normal" -#: Normal distribution on the parameter scale -PARAMETER_SCALE_NORMAL = "parameterScaleNormal" + +#: Cauchy distribution. +CAUCHY = "cauchy" +#: Chi-squared distribution. +# FIXME: "chisquare" in PEtab and sbml-distrib, but usually "chi-squared" +CHI_SQUARED = "chisquare" +#: Exponential distribution. +EXPONENTIAL = "exponential" +#: Gamma distribution. +GAMMA = "gamma" #: Laplace distribution LAPLACE = "laplace" -#: Laplace distribution on the parameter scale -PARAMETER_SCALE_LAPLACE = "parameterScaleLaplace" -#: Log-normal distribution -LOG_NORMAL = "logNormal" +#: Log10-normal distribution. +LOG10_NORMAL = "log10-normal" #: Log-Laplace distribution -LOG_LAPLACE = "logLaplace" +LOG_LAPLACE = "log-laplace" +#: Log-normal distribution +LOG_NORMAL = "log-normal" +#: Log-uniform distribution. +LOG_UNIFORM = "log-uniform" +#: Normal distribution +NORMAL = "normal" +#: Rayleigh distribution. +RAYLEIGH = "rayleigh" +#: Uniform distribution +UNIFORM = "uniform" -#: Supported prior types -PRIOR_TYPES = [ - UNIFORM, - NORMAL, +#: Supported prior distribution types +PRIOR_DISTRIBUTIONS = [ + CAUCHY, + CHI_SQUARED, + EXPONENTIAL, + GAMMA, LAPLACE, - LOG_NORMAL, + LOG10_NORMAL, LOG_LAPLACE, - PARAMETER_SCALE_UNIFORM, - PARAMETER_SCALE_NORMAL, - PARAMETER_SCALE_LAPLACE, + LOG_NORMAL, + LOG_UNIFORM, + NORMAL, + RAYLEIGH, + UNIFORM, ] + #: Supported noise distributions NOISE_MODELS = [NORMAL, LAPLACE] diff --git a/petab/v2/core.py b/petab/v2/core.py index 41abfb36..e4bd4370 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -124,35 +124,41 @@ class NoiseDistribution(str, Enum): LAPLACE = C.LAPLACE -class PriorType(str, Enum): +class PriorDistribution(str, Enum): """Prior types. Prior types as used in the PEtab parameters table. """ - #: Normal distribution. - NORMAL = C.NORMAL + #: Cauchy distribution. + CAUCHY = C.CAUCHY + #: Chi-squared distribution. + CHI_SQUARED = C.CHI_SQUARED + #: Exponential distribution. + EXPONENTIAL = C.EXPONENTIAL + #: Gamma distribution. + GAMMA = C.GAMMA #: Laplace distribution. LAPLACE = C.LAPLACE - #: Uniform distribution. - UNIFORM = C.UNIFORM - #: Log-normal distribution. - LOG_NORMAL = C.LOG_NORMAL + #: Log10-normal distribution. + LOG10_NORMAL = C.LOG10_NORMAL #: Log-Laplace distribution LOG_LAPLACE = C.LOG_LAPLACE - PARAMETER_SCALE_NORMAL = C.PARAMETER_SCALE_NORMAL - PARAMETER_SCALE_LAPLACE = C.PARAMETER_SCALE_LAPLACE - PARAMETER_SCALE_UNIFORM = C.PARAMETER_SCALE_UNIFORM - + #: Log-normal distribution. + LOG_NORMAL = C.LOG_NORMAL + #: Log-uniform distribution. + LOG_UNIFORM = C.LOG_UNIFORM + #: Normal distribution. + NORMAL = C.NORMAL + #: Rayleigh distribution. + RAYLEIGH = C.RAYLEIGH + #: Uniform distribution. + UNIFORM = C.UNIFORM -#: Objective prior types as used in the PEtab parameters table. -ObjectivePriorType = PriorType -#: Initialization prior types as used in the PEtab parameters table. -InitializationPriorType = PriorType -assert set(C.PRIOR_TYPES) == {e.value for e in ObjectivePriorType}, ( - "ObjectivePriorType enum does not match C.PRIOR_TYPES: " - f"{set(C.PRIOR_TYPES)} vs { {e.value for e in ObjectivePriorType} }" +assert set(C.PRIOR_DISTRIBUTIONS) == {e.value for e in PriorDistribution}, ( + "PriorDistribution enum does not match C.PRIOR_DISTRIBUTIONS " + f"{set(C.PRIOR_DISTRIBUTIONS)} vs { {e.value for e in PriorDistribution} }" ) @@ -849,18 +855,16 @@ class Parameter(BaseModel): ub: float | None = Field(alias=C.UPPER_BOUND, default=None) #: Nominal value. nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) - #: Parameter scale. - # TODO: keep or remove? - scale: ParameterScale = Field( - alias=C.PARAMETER_SCALE, default=ParameterScale.LIN - ) - # TODO: change to bool in PEtab, or serialize as 0/1? - # https://github.com/PEtab-dev/PEtab/discussions/610 #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) - - # TODO priors - # pydantic vs. petab.v1.priors.Prior + #: Type of parameter prior distribution. + prior_distribution: PriorDistribution | None = Field( + alias=C.PRIOR_DISTRIBUTION, default=None + ) + #: Prior distribution parameters. + prior_parameters: list[float] = Field( + alias=C.PRIOR_PARAMETERS, default_factory=list + ) #: :meta private: model_config = ConfigDict( @@ -879,13 +883,22 @@ def _validate_id(cls, v): raise ValueError(f"Invalid ID: {v}") return v + @field_validator("prior_parameters", mode="before") + @classmethod + def _validate_prior_parameters(cls, v): + if isinstance(v, str): + v = v.split(C.PARAMETER_SEPARATOR) + elif not isinstance(v, Sequence): + v = [v] + + return [float(x) for x in v] + @field_validator("estimate", mode="before") @classmethod def _validate_estimate_before(cls, v): if isinstance(v, bool): return v - # TODO: clarify whether extra whitespace is allowed if isinstance(v, str): v = v.strip().lower() if v == "true": @@ -929,8 +942,6 @@ def _validate(self) -> Self: ): raise ValueError("Lower bound must be less than upper bound.") - # TODO parameterScale? - # TODO priorType, priorParameters return self diff --git a/petab/v2/problem.py b/petab/v2/problem.py index c2a8a6fa..cf4dc430 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -813,14 +813,13 @@ def n_measurements(self) -> int: """Number of measurements.""" return len(self.measurement_table.measurements) - # TODO: update after implementing priors in `Parameter` @property def n_priors(self) -> int: """Number of priors.""" - if OBJECTIVE_PRIOR_PARAMETERS not in self.parameter_df: - return 0 - - return self.parameter_df[OBJECTIVE_PRIOR_PARAMETERS].notna().sum() + return sum( + p.prior_distribution is not None + for p in self.parameter_table.parameters + ) def validate( self, validation_tasks: list[ValidationTask] = None @@ -944,10 +943,8 @@ def add_parameter( scale: str = None, lb: Number = None, ub: Number = None, - init_prior_type: str = None, - init_prior_pars: str | Sequence = None, - obj_prior_type: str = None, - obj_prior_pars: str | Sequence = None, + prior_dist: str = None, + prior_pars: str | Sequence = None, **kwargs, ): """Add a parameter to the problem. @@ -959,11 +956,8 @@ def add_parameter( scale: The parameter scale lb: The lower bound of the parameter ub: The upper bound of the parameter - init_prior_type: The type of the initialization prior distribution - init_prior_pars: The parameters of the initialization prior - distribution - obj_prior_type: The type of the objective prior distribution - obj_prior_pars: The parameters of the objective prior distribution + prior_dist: The type of the prior distribution + prior_pars: The parameters of the prior distribution kwargs: additional columns/values to add to the parameter table """ record = { @@ -979,22 +973,14 @@ def add_parameter( record[LOWER_BOUND] = lb if ub is not None: record[UPPER_BOUND] = ub - if init_prior_type is not None: - record[INITIALIZATION_PRIOR_TYPE] = init_prior_type - if init_prior_pars is not None: - if not isinstance(init_prior_pars, str): - init_prior_pars = PARAMETER_SEPARATOR.join( - map(str, init_prior_pars) - ) - record[INITIALIZATION_PRIOR_PARAMETERS] = init_prior_pars - if obj_prior_type is not None: - record[OBJECTIVE_PRIOR_TYPE] = obj_prior_type - if obj_prior_pars is not None: - if not isinstance(obj_prior_pars, str): - obj_prior_pars = PARAMETER_SEPARATOR.join( - map(str, obj_prior_pars) - ) - record[OBJECTIVE_PRIOR_PARAMETERS] = obj_prior_pars + if prior_dist is not None: + record[PRIOR_DISTRIBUTION] = prior_dist + if prior_pars is not None: + if isinstance(prior_pars, Sequence) and not isinstance( + prior_pars, str + ): + prior_pars = PARAMETER_SEPARATOR.join(map(str, prior_pars)) + record[PRIOR_PARAMETERS] = prior_pars record.update(kwargs) self.parameter_table += core.Parameter(**record) @@ -1132,7 +1118,8 @@ def model_dump(self, **kwargs) -> dict[str, Any]: 'id': 'par', 'lb': 0.0, 'nominal_value': None, - 'scale': , + 'prior_distribution': None, + 'prior_parameters': [], 'ub': 1.0}]} """ res = { From 3c6b45b406d55f00f2d5eaa5fab85a73b227981d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 25 Apr 2025 12:22:45 +0200 Subject: [PATCH 047/141] =?UTF-8?q?v2:=20handle=20merged=20observable.obse?= =?UTF-8?q?rvableTransformation=20observable.nois=E2=80=A6=20(#376)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * v2: handle merged observable.observableTransformation observable.noiseDistribution Update to changes in PEtab v2 draft, see https://github.com/PEtab-dev/PEtab/pull/619. Closes #375. * upconversion --- petab/v2/C.py | 7 +---- petab/v2/core.py | 25 +++------------- petab/v2/lint.py | 4 +-- petab/v2/petab1to2.py | 58 +++++++++++++++++++++++++++++++++++-- petab/v2/problem.py | 5 +--- tests/v2/test_conversion.py | 5 +++- 6 files changed, 68 insertions(+), 36 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index b5325ff4..fc1f6fa2 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -147,8 +147,6 @@ OBSERVABLE_FORMULA = "observableFormula" #: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" -#: Observable transformation column in the observable table -OBSERVABLE_TRANSFORMATION = "observableTransformation" #: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" @@ -162,7 +160,6 @@ #: Optional columns of observable table OBSERVABLE_DF_OPTIONAL_COLS = [ OBSERVABLE_NAME, - OBSERVABLE_TRANSFORMATION, NOISE_DISTRIBUTION, ] @@ -181,8 +178,6 @@ LOG = "log" #: Logarithmic base 10 transformation LOG10 = "log10" -#: Supported observable transformations -OBSERVABLE_TRANSFORMATIONS = [LIN, LOG, LOG10] # NOISE MODELS @@ -232,7 +227,7 @@ #: Supported noise distributions -NOISE_MODELS = [NORMAL, LAPLACE] +NOISE_DISTRIBUTIONS = [NORMAL, LAPLACE, LOG_NORMAL, LOG_LAPLACE] # VISUALIZATION diff --git a/petab/v2/core.py b/petab/v2/core.py index e4bd4370..38279545 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -32,7 +32,6 @@ __all__ = [ "Observable", "ObservableTable", - "ObservableTransformation", "NoiseDistribution", "Change", "Condition", @@ -87,20 +86,6 @@ def _valid_petab_id(v: str) -> str: return v -class ObservableTransformation(str, Enum): - """Observable transformation types. - - Observable transformations as used in the PEtab observables table. - """ - - #: No transformation - LIN = C.LIN - #: Logarithmic transformation (natural logarithm) - LOG = C.LOG - #: Logarithmic transformation (base 10) - LOG10 = C.LOG10 - - class ParameterScale(str, Enum): """Parameter scales. @@ -122,6 +107,10 @@ class NoiseDistribution(str, Enum): NORMAL = C.NORMAL #: Laplace distribution LAPLACE = C.LAPLACE + #: Log-normal distribution + LOG_NORMAL = C.LOG_NORMAL + #: Log-Laplace distribution + LOG_LAPLACE = C.LOG_LAPLACE class PriorDistribution(str, Enum): @@ -173,10 +162,6 @@ class Observable(BaseModel): name: str | None = Field(alias=C.OBSERVABLE_NAME, default=None) #: Observable formula. formula: sp.Basic | None = Field(alias=C.OBSERVABLE_FORMULA, default=None) - #: Observable transformation. - transformation: ObservableTransformation = Field( - alias=C.OBSERVABLE_TRANSFORMATION, default=ObservableTransformation.LIN - ) #: Noise formula. noise_formula: sp.Basic | None = Field(alias=C.NOISE_FORMULA, default=None) #: Noise distribution. @@ -193,9 +178,7 @@ class Observable(BaseModel): "name", "formula", "noise_formula", - "noise_formula", "noise_distribution", - "transformation", mode="before", ) @classmethod diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 6e7fc161..0fb055e8 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -326,12 +326,12 @@ class CheckPosLogMeasurements(ValidationTask): log-transformation are positive.""" def run(self, problem: Problem) -> ValidationIssue | None: - from .core import ObservableTransformation as ot + from .core import NoiseDistribution as nd log_observables = { o.id for o in problem.observable_table.observables - if o.transformation in [ot.LOG, ot.LOG10] + if o.noise_distribution in [nd.LOG_NORMAL, nd.LOG_LAPLACE] } if log_observables: for m in problem.measurement_table.measurements: diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 29107238..7cbc5369 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -104,14 +104,23 @@ def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): # sub-problems for problem_config in new_yaml_config.problems: # copy files that don't need conversion - # (models, observables, visualizations) + # (models, visualizations) for file in chain( - problem_config.observable_files, (model.location for model in problem_config.model_files.values()), problem_config.visualization_files, ): _copy_file(get_src_path(file), Path(get_dest_path(file))) + # Update observable table + for observable_file in problem_config.observable_files: + observable_df = v1.get_observable_df(get_src_path(observable_file)) + observable_df = v1v2_observable_df( + observable_df, + ) + v2.write_observable_df( + observable_df, get_dest_path(observable_file) + ) + # Update condition table for condition_file in problem_config.condition_files: condition_df = v1.get_condition_df(get_src_path(condition_file)) @@ -339,3 +348,48 @@ def v1v2_condition_df( ) return condition_df + + +def v1v2_observable_df(observable_df: pd.DataFrame) -> pd.DataFrame: + """Convert observable table from petab v1 to v2. + + Perform all updates that can be done solely on the observable table: + * drop observableTransformation, update noiseDistribution + """ + df = observable_df.copy().reset_index() + + # drop observableTransformation, update noiseDistribution + # if there is no observableTransformation, no need to update + if v1.C.OBSERVABLE_TRANSFORMATION in df.columns: + df[v1.C.OBSERVABLE_TRANSFORMATION] = df[ + v1.C.OBSERVABLE_TRANSFORMATION + ].fillna(v1.C.LIN) + + if v1.C.NOISE_DISTRIBUTION in df: + df[v1.C.NOISE_DISTRIBUTION] = df[v1.C.NOISE_DISTRIBUTION].fillna( + v1.C.NORMAL + ) + else: + df[v1.C.NOISE_DISTRIBUTION] = v1.C.NORMAL + + # merge observableTransformation into noiseDistribution + def update_noise_dist(row): + dist = row.get(v1.C.NOISE_DISTRIBUTION) + trans = row.get(v1.C.OBSERVABLE_TRANSFORMATION) + + if trans == v1.C.LIN: + new_dist = dist + else: + new_dist = f"{trans}-{dist}" + + if new_dist not in v2.C.NOISE_DISTRIBUTIONS: + raise NotImplementedError( + f"Noise distribution `{new_dist}' for " + f"observable `{row[v1.C.OBSERVABLE_ID]}'" + f" is not supported in PEtab v2." + ) + + df[v2.C.NOISE_DISTRIBUTION] = df.apply(update_noise_dist, axis=1) + df.drop(columns=[v1.C.OBSERVABLE_TRANSFORMATION], inplace=True) + + return df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index cf4dc430..ef4cfc51 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -903,7 +903,6 @@ def add_observable( formula: str, noise_formula: str | float | int = None, noise_distribution: str = None, - transform: str = None, name: str = None, **kwargs, ): @@ -914,7 +913,6 @@ def add_observable( formula: The observable formula noise_formula: The noise formula noise_distribution: The noise distribution - transform: The observable transformation name: The observable name kwargs: additional columns/values to add to the observable table @@ -929,8 +927,7 @@ def add_observable( record[NOISE_FORMULA] = noise_formula if noise_distribution is not None: record[NOISE_DISTRIBUTION] = noise_distribution - if transform is not None: - record[OBSERVABLE_TRANSFORMATION] = transform + record.update(kwargs) self.observable_table += core.Observable(**record) diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index 612606ab..43e14662 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -40,6 +40,9 @@ def test_benchmark_collection(problem_id): pytest.skip("Too slow. Re-enable once we are faster.") yaml_path = benchmark_models_petab.get_problem_yaml_path(problem_id) - problem = petab1to2(yaml_path) + try: + problem = petab1to2(yaml_path) + except NotImplementedError as e: + pytest.skip(str(e)) assert isinstance(problem, Problem) assert len(problem.measurement_table.measurements) From 3de2ce9ca5ac5e4108e95aeda1917b34e024b2b9 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 25 Apr 2025 12:37:17 +0200 Subject: [PATCH 048/141] v2: Additional probability distributions (#377) Add additional probability distributions as required for https://github.com/PEtab-dev/PEtab/pull/595. See https://github.com/PEtab-dev/libpetab-python/issues/374. --- petab/v1/distributions.py | 286 +++++++++++++++++++++++++++++++++++--- 1 file changed, 270 insertions(+), 16 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 6b2612fd..ac005d49 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -3,9 +3,19 @@ from __future__ import annotations import abc +from typing import Any import numpy as np -from scipy.stats import laplace, norm, uniform +from scipy.stats import ( + cauchy, + chi2, + expon, + gamma, + laplace, + norm, + rayleigh, + uniform, +) __all__ = [ "Distribution", @@ -277,6 +287,21 @@ def _inverse_transform_sample(self, shape) -> np.ndarray | float: ) return self._ppf_transformed_untruncated(uniform_sample) + def _repr(self, pars: dict[str, Any] = None) -> str: + """Return a string representation of the distribution.""" + pars = ", ".join(f"{k}={v}" for k, v in pars.items()) if pars else "" + + if self._logbase is False: + log = "" + elif self._logbase == np.exp(1): + log = ", log=True" + else: + log = f", log={self._logbase}" + + trunc = f", trunc={self._trunc}" if self._trunc else "" + + return f"{self.__class__.__name__}({pars}{log}{trunc})" + class Normal(Distribution): """A (log-)normal distribution. @@ -307,16 +332,7 @@ def __init__( super().__init__(log=log, trunc=trunc) def __repr__(self): - if self._logbase is False: - log = "" - if self._logbase == np.exp(1): - log = ", log=True" - else: - log = f", log={self._logbase}" - - trunc = f", trunc={self._trunc}" if self._trunc else "" - - return f"Normal(loc={self._loc}, scale={self._scale}{log}{trunc})" + return self._repr({"loc": self._loc, "scale": self._scale}) def _sample(self, shape=None) -> np.ndarray | float: return np.random.normal(loc=self._loc, scale=self._scale, size=shape) @@ -366,8 +382,7 @@ def __init__( super().__init__(log=log) def __repr__(self): - log = f", log={self._logbase}" if self._logbase else "" - return f"Uniform(low={self._low}, high={self._high}{log})" + return self._repr({"low": self._low, "high": self._high}) def _sample(self, shape=None) -> np.ndarray | float: return np.random.uniform(low=self._low, high=self._high, size=shape) @@ -411,9 +426,7 @@ def __init__( super().__init__(log=log, trunc=trunc) def __repr__(self): - trunc = f", trunc={self._trunc}" if self._trunc else "" - log = f", log={self._logbase}" if self._logbase else "" - return f"Laplace(loc={self._loc}, scale={self._scale}{trunc}{log})" + return self._repr({"loc": self._loc, "scale": self._scale}) def _sample(self, shape=None) -> np.ndarray | float: return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) @@ -436,3 +449,244 @@ def loc(self) -> float: def scale(self) -> float: """The scale parameter of the underlying distribution.""" return self._scale + + +class Cauchy(Distribution): + """Cauchy distribution. + + A (possibly truncated) `Cauchy distribution + `__. + + :param loc: The location parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + :param log: If ``True``, the distribution is transformed to a log-Cauchy + distribution. If a float, the distribution is transformed to a + log-Cauchy distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the location and scale parameters + are the location and scale of the underlying Cauchy distribution. + """ + + def __init__( + self, + loc: float, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + self._loc = loc + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"loc": self._loc, "scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return cauchy.pdf(x, loc=self._loc, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return cauchy.cdf(x, loc=self._loc, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return cauchy.ppf(q, loc=self._loc, scale=self._scale) + + @property + def loc(self) -> float: + """The location parameter of the underlying distribution.""" + return self._loc + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class ChiSquare(Distribution): + """Chi-squared distribution. + + A (possibly truncated) `Chi-squared distribution + `__. + + :param dof: The degrees of freedom parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + If the distribution is log-scaled, the truncation limits are expected + to be on the same log scale. + :param log: If ``True``, the distribution is transformed to a + log-Chi-squared distribution. + If a float, the distribution is transformed to a + log-Chi-squared distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the degrees of freedom parameter + is the degrees of freedom of the underlying Chi-squared distribution. + """ + + def __init__( + self, + dof: int, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + if not dof.is_integer() or dof < 1: + raise ValueError( + f"`dof' must be a positive integer, but was `{dof}'." + ) + + self._dof = dof + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"dof": self._dof}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return chi2.pdf(x, df=self._dof) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return chi2.cdf(x, df=self._dof) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return chi2.ppf(q, df=self._dof) + + @property + def dof(self) -> int: + """The degrees of freedom parameter.""" + return self._dof + + +class Exponential(Distribution): + """Exponential distribution. + + A (possibly truncated) `Exponential distribution + `__. + + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + """ + + def __init__( + self, + scale: float, + trunc: tuple[float, float] | None = None, + ): + self._scale = scale + super().__init__(log=False, trunc=trunc) + + def __repr__(self): + return self._repr({"scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return expon.pdf(x, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return expon.cdf(x, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return expon.ppf(q, scale=self._scale) + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Gamma(Distribution): + """Gamma distribution. + + A (possibly truncated) `Gamma distribution + `__. + + :param shape: The shape parameter of the distribution. + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + :param log: If ``True``, the distribution is transformed to a + log-Gamma distribution. + If a float, the distribution is transformed to a + log-Gamma distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the shape and scale parameters + are the shape and scale of the underlying Gamma distribution. + """ + + def __init__( + self, + shape: float, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + self._shape = shape + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"shape": self._shape, "scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return gamma.pdf(x, a=self._shape, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return gamma.cdf(x, a=self._shape, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return gamma.ppf(q, a=self._shape, scale=self._scale) + + @property + def shape(self) -> float: + """The shape parameter of the underlying distribution.""" + return self._shape + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale + + +class Rayleigh(Distribution): + """Rayleigh distribution. + + A (possibly truncated) `Rayleigh distribution + `__. + + :param scale: The scale parameter of the distribution. + :param trunc: The truncation limits of the distribution. + ``None`` if the distribution is not truncated. + :param log: If ``True``, the distribution is transformed to a + log-Rayleigh distribution. + If a float, the distribution is transformed to a + log-Rayleigh distribution with the given log-base. + If ``False``, no transformation is applied. + If a transformation is applied, the scale parameter + is the scale of the underlying Rayleigh distribution. + """ + + def __init__( + self, + scale: float, + trunc: tuple[float, float] | None = None, + log: bool | float = False, + ): + self._scale = scale + super().__init__(log=log, trunc=trunc) + + def __repr__(self): + return self._repr({"scale": self._scale}) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return rayleigh.pdf(x, scale=self._scale) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return rayleigh.cdf(x, scale=self._scale) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return rayleigh.ppf(q, scale=self._scale) + + @property + def scale(self) -> float: + """The scale parameter of the underlying distribution.""" + return self._scale From b73e0a9e51c7fcf2c2845879460562359750e8d9 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 25 Apr 2025 13:07:55 +0200 Subject: [PATCH 049/141] v1->v2: Update priors (#379) Extend PEtab v1->v2 conversion: * remove initializationPrior* * rename objectivePrior* * convert parameterScale* priors --- petab/v2/petab1to2.py | 75 +++++++++++++++++++++++++++++++++++++++---- 1 file changed, 69 insertions(+), 6 deletions(-) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 7cbc5369..c788f116 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -92,12 +92,8 @@ def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): # Update tables - # parameter table: - # * parameter.estimate: int -> bool - parameter_df = petab_problem.parameter_df.copy() - parameter_df[v1.C.ESTIMATE] = parameter_df[v1.C.ESTIMATE].apply( - lambda x: str(bool(int(x))).lower() - ) + # parameter table + parameter_df = v1v2_parameter_df(petab_problem.parameter_df.copy()) file = yaml_config[v2.C.PARAMETER_FILE] v2.write_parameter_df(parameter_df, get_dest_path(file)) @@ -393,3 +389,70 @@ def update_noise_dist(row): df.drop(columns=[v1.C.OBSERVABLE_TRANSFORMATION], inplace=True) return df + + +def v1v2_parameter_df( + parameter_df: pd.DataFrame, +) -> pd.DataFrame: + """Convert parameter table from petab v1 to v2. + + Do all the necessary conversions to the parameter table that can + be done with the parameter table alone. + """ + df = parameter_df.copy().reset_index() + + # parameter.estimate: int -> bool + df[v2.C.ESTIMATE] = df[v1.C.ESTIMATE].apply( + lambda x: str(bool(int(x))).lower() + ) + + def update_prior(row): + """Convert prior to v2 format.""" + prior_type = row.get(v1.C.OBJECTIVE_PRIOR_TYPE) + if pd.isna(prior_type): + prior_type = v1.C.UNIFORM + + pscale = row.get(v1.C.PARAMETER_SCALE) + if pd.isna(pscale): + pscale = v1.C.LIN + + if prior_type not in v1.C.PARAMETER_SCALE_PRIOR_TYPES: + return prior_type + + new_prior_type = prior_type.removeprefix("parameterScale").lower() + if pscale != v1.C.LIN: + new_prior_type = f"{pscale}-{new_prior_type}" + + if new_prior_type not in v2.C.PRIOR_DISTRIBUTIONS: + raise NotImplementedError( + f"PEtab v2 does not support prior type `{new_prior_type}' " + f"required for parameter `{row.index}'." + ) + + return new_prior_type + + # update parameterScale*-priors + if v1.C.OBJECTIVE_PRIOR_TYPE in df.columns: + df[v1.C.OBJECTIVE_PRIOR_TYPE] = df.apply(update_prior, axis=1) + + # rename objectivePrior* to prior* + df.rename( + columns={ + v1.C.OBJECTIVE_PRIOR_TYPE: v2.C.PRIOR_DISTRIBUTION, + v1.C.OBJECTIVE_PRIOR_PARAMETERS: v2.C.PRIOR_PARAMETERS, + }, + inplace=True, + errors="ignore", + ) + # some columns were dropped in PEtab v2 + df.drop( + columns=[ + v1.C.INITIALIZATION_PRIOR_TYPE, + v1.C.INITIALIZATION_PRIOR_PARAMETERS, + v1.C.PARAMETER_SCALE, + ], + inplace=True, + errors="ignore", + ) + + return df From 84fbdba15d24049c87c9b24b3d945df2f0d6a947 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 28 Apr 2025 11:16:59 +0200 Subject: [PATCH 050/141] v2: Startpoint sampling (#380) * Add `Parameter.prior_dist` * Update `v1.distributions.__all__` * Implement startpoint sampling for `v2.Problem` supporting all new prior distributions --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/distributions.py | 18 +++++++++++-- petab/v2/core.py | 53 +++++++++++++++++++++++++++++++++++++++ petab/v2/problem.py | 28 +++++++++++++-------- tests/v2/test_problem.py | 20 +++++++++++++++ 4 files changed, 106 insertions(+), 13 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index ac005d49..411add56 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -1,4 +1,13 @@ -"""Probability distributions used by PEtab.""" +"""Probability distributions used by PEtab. + +This module provides a set of univariate probability distributions +that can be used for sampling and evaluating the probability density +function (PDF) and cumulative distribution function (CDF). +Most of these distributions also support log transformations and truncation. + +Not all distributions that can be represented by these classes are valid +as PEtab parameter prior or noise distributions. +""" from __future__ import annotations @@ -19,9 +28,14 @@ __all__ = [ "Distribution", + "Cauchy", + "ChiSquare", + "Exponential", + "Gamma", + "Laplace", "Normal", + "Rayleigh", "Uniform", - "Laplace", ] diff --git a/petab/v2/core.py b/petab/v2/core.py index 38279545..37797610 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -25,6 +25,7 @@ ) from typing_extensions import Self +from ..v1.distributions import * from ..v1.lint import is_valid_identifier from ..v1.math import petab_math_str, sympify_petab from . import C, get_observable_df @@ -150,6 +151,26 @@ class PriorDistribution(str, Enum): f"{set(C.PRIOR_DISTRIBUTIONS)} vs { {e.value for e in PriorDistribution} }" ) +_prior_to_cls = { + PriorDistribution.CAUCHY: Cauchy, + PriorDistribution.CHI_SQUARED: ChiSquare, + PriorDistribution.EXPONENTIAL: Exponential, + PriorDistribution.GAMMA: Gamma, + PriorDistribution.LAPLACE: Laplace, + PriorDistribution.LOG10_NORMAL: Normal, + PriorDistribution.LOG_LAPLACE: Laplace, + PriorDistribution.LOG_NORMAL: Normal, + PriorDistribution.LOG_UNIFORM: Uniform, + PriorDistribution.NORMAL: Normal, + PriorDistribution.RAYLEIGH: Rayleigh, + PriorDistribution.UNIFORM: Uniform, +} + +assert not (_mismatch := set(PriorDistribution) ^ set(_prior_to_cls)), ( + "PriorDistribution enum does not match _prior_to_cls. " + f"Mismatches: {_mismatch}" +) + class Observable(BaseModel): """Observable definition.""" @@ -929,6 +950,38 @@ def _validate(self) -> Self: return self + @property + def prior_dist(self) -> Distribution: + """Get the pior distribution of the parameter.""" + if self.estimate is False: + raise ValueError(f"Parameter `{self.id}' is not estimated.") + + if self.prior_distribution is None: + return Uniform(self.lb, self.ub) + + if not (cls := _prior_to_cls.get(self.prior_distribution)): + raise ValueError( + f"Prior distribution `{self.prior_distribution}' not " + "supported." + ) + + if str(self.prior_distribution).startswith("log-"): + log = True + elif str(self.prior_distribution).startswith("log10-"): + log = 10 + else: + log = False + + if cls == Exponential: + # `Exponential.__init__` does not accept the `log` parameter + if log is not False: + raise ValueError( + "Exponential distribution does not support log " + "transformation." + ) + return cls(*self.prior_parameters, trunc=[self.lb, self.ub]) + return cls(*self.prior_parameters, log=log, trunc=[self.lb, self.ub]) + class ParameterTable(BaseModel): """PEtab parameter table.""" diff --git a/petab/v2/problem.py b/petab/v2/problem.py index ef4cfc51..01903b16 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -12,6 +12,7 @@ from pathlib import Path from typing import TYPE_CHECKING, Any +import numpy as np import pandas as pd import sympy as sp from pydantic import AnyUrl, BaseModel, Field @@ -22,10 +23,10 @@ observables, parameter_mapping, parameters, - sampling, yaml, ) from ..v1.core import concat_tables, get_visualization_df +from ..v1.distributions import Distribution from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 @@ -726,24 +727,29 @@ def get_optimization_to_simulation_parameter_mapping(self, **kwargs): ) ) - def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): - """Create 2D array with starting points for optimization + def get_priors(self) -> dict[str, Distribution]: + """Get prior distributions. - See :py:func:`petab.sample_parameter_startpoints`. + :returns: The prior distributions for the estimated parameters. """ - return sampling.sample_parameter_startpoints( - self.parameter_df, n_starts=n_starts, **kwargs - ) + return { + p.id: p.prior_dist + for p in self.parameter_table.parameters + if p.estimate + } + + def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): + """Create 2D array with starting points for optimization""" + priors = self.get_priors() + return np.vstack([p.sample(n_starts) for p in priors.values()]).T def sample_parameter_startpoints_dict( self, n_starts: int = 100 ) -> list[dict[str, float]]: """Create dictionaries with starting points for optimization - See also :py:func:`petab.sample_parameter_startpoints`. - - Returns: - A list of dictionaries with parameter IDs mapping to samples + :returns: + A list of dictionaries with parameter IDs mapping to sampled parameter values. """ return [ diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 55141ba3..7d5b6e1c 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -23,6 +23,7 @@ TARGET_VALUE, UPPER_BOUND, ) +from petab.v2.core import * def test_load_remote(): @@ -170,3 +171,22 @@ def test_modify_problem(): } ).set_index([PETAB_ENTITY_ID]) assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) + + +def test_sample_startpoint_shape(): + """Test startpoint sampling.""" + problem = Problem() + problem += Parameter(id="p1", estimate=True, lb=1, ub=2) + problem += Parameter( + id="p2", + estimate=True, + lb=2, + ub=3, + prior_distribution="normal", + prior_parameters=[2.5, 0.5], + ) + problem += Parameter(id="p3", estimate=False, nominal_value=1) + + n_starts = 10 + sp = problem.sample_parameter_startpoints(n_starts=n_starts) + assert sp.shape == (n_starts, 2) From 2337e2d53692dcbb4533bf69a19e7570d4492bd8 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 28 Apr 2025 19:54:55 +0200 Subject: [PATCH 051/141] v2: More validation (#381) * Check priors (related to https://github.com/PEtab-dev/libpetab-python/issues/374) * Check observables * Fix missing prior parameters after v1->v2 conversion of uniform priors * Fix some pydantic validation / serialization * Fix style --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/core.py | 71 ++++++++++++++----- petab/v2/lint.py | 157 ++++++++++++++++++++++++++++++++++++++---- petab/v2/petab1to2.py | 17 +++++ petab/v2/problem.py | 4 +- tests/v2/test_core.py | 2 +- 5 files changed, 219 insertions(+), 32 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 37797610..1ee74ace 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -73,8 +73,11 @@ def _not_nan(v: float, info: ValidationInfo) -> float: def _convert_nan_to_none(v): + """Convert NaN or "" to None.""" if isinstance(v, float) and np.isnan(v): return None + if isinstance(v, str) and v == "": + return None return v @@ -503,9 +506,17 @@ class ExperimentPeriod(BaseModel): @field_validator("condition_ids", mode="before") @classmethod def _validate_ids(cls, condition_ids): + if condition_ids in [None, "", [], [""]]: + # unspecified, or "use-model-as-is" + return [] + for condition_id in condition_ids: + # The empty condition ID for "use-model-as-is" has been handled + # above. Having a combination of empty and non-empty IDs is an + # error, since the targets of conditions to be combined must be + # disjoint. if not is_valid_identifier(condition_id): - raise ValueError(f"Invalid ID: {condition_id}") + raise ValueError(f"Invalid {C.CONDITION_ID}: `{condition_id}'") return condition_ids @@ -854,17 +865,23 @@ class Parameter(BaseModel): #: Parameter ID. id: str = Field(alias=C.PARAMETER_ID) #: Lower bound. - lb: float | None = Field(alias=C.LOWER_BOUND, default=None) + lb: Annotated[float | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.LOWER_BOUND, default=None + ) #: Upper bound. - ub: float | None = Field(alias=C.UPPER_BOUND, default=None) + ub: Annotated[float | None, BeforeValidator(_convert_nan_to_none)] = Field( + alias=C.UPPER_BOUND, default=None + ) #: Nominal value. - nominal_value: float | None = Field(alias=C.NOMINAL_VALUE, default=None) + nominal_value: Annotated[ + float | None, BeforeValidator(_convert_nan_to_none) + ] = Field(alias=C.NOMINAL_VALUE, default=None) #: Is the parameter to be estimated? estimate: bool = Field(alias=C.ESTIMATE, default=True) #: Type of parameter prior distribution. - prior_distribution: PriorDistribution | None = Field( - alias=C.PRIOR_DISTRIBUTION, default=None - ) + prior_distribution: Annotated[ + PriorDistribution | None, BeforeValidator(_convert_nan_to_none) + ] = Field(alias=C.PRIOR_DISTRIBUTION, default=None) #: Prior distribution parameters. prior_parameters: list[float] = Field( alias=C.PRIOR_PARAMETERS, default_factory=list @@ -889,8 +906,18 @@ def _validate_id(cls, v): @field_validator("prior_parameters", mode="before") @classmethod - def _validate_prior_parameters(cls, v): + def _validate_prior_parameters( + cls, v: str | list[str] | float | None | np.ndarray + ): + if v is None: + return [] + + if isinstance(v, float) and np.isnan(v): + return [] + if isinstance(v, str): + if v == "": + return [] v = v.split(C.PARAMETER_SEPARATOR) elif not isinstance(v, Sequence): v = [v] @@ -899,7 +926,7 @@ def _validate_prior_parameters(cls, v): @field_validator("estimate", mode="before") @classmethod - def _validate_estimate_before(cls, v): + def _validate_estimate_before(cls, v: bool | str): if isinstance(v, bool): return v @@ -918,12 +945,17 @@ def _validate_estimate_before(cls, v): def _serialize_estimate(self, estimate: bool, _info): return str(estimate).lower() - @field_validator("lb", "ub", "nominal_value") - @classmethod - def _convert_nan_to_none(cls, v): - if isinstance(v, float) and np.isnan(v): - return None - return v + @field_serializer("prior_distribution") + def _serialize_prior_distribution( + self, prior_distribution: PriorDistribution | None, _info + ): + if prior_distribution is None: + return "" + return str(prior_distribution) + + @field_serializer("prior_parameters") + def _serialize_prior_parameters(self, prior_parameters: list[str], _info): + return C.PARAMETER_SEPARATOR.join(prior_parameters) @model_validator(mode="after") def _validate(self) -> Self: @@ -952,7 +984,7 @@ def _validate(self) -> Self: @property def prior_dist(self) -> Distribution: - """Get the pior distribution of the parameter.""" + """Get the prior distribution of the parameter.""" if self.estimate is False: raise ValueError(f"Parameter `{self.id}' is not estimated.") @@ -980,6 +1012,13 @@ def prior_dist(self) -> Distribution: "transformation." ) return cls(*self.prior_parameters, trunc=[self.lb, self.ub]) + + if cls == Uniform: + # `Uniform.__init__` does not accept the `trunc` parameter + low = max(self.prior_parameters[0], self.lb) + high = min(self.prior_parameters[1], self.ub) + return cls(low, high, log=log) + return cls(*self.prior_parameters, log=log, trunc=[self.lb, self.ub]) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 0fb055e8..2558ea3c 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -14,6 +14,8 @@ import pandas as pd import sympy as sp +from ..v2.C import * +from .core import PriorDistribution from .problem import Problem logger = logging.getLogger(__name__) @@ -37,6 +39,8 @@ "CheckUnusedExperiments", "CheckObservablesDoNotShadowModelEntities", "CheckUnusedConditions", + "CheckAllObservablesDefined", + "CheckPriorDistribution", "lint_problem", "default_validation_tasks", ] @@ -77,8 +81,12 @@ def __post_init__(self): def __str__(self): return f"{self.level.name}: {self.message}" - def _get_task_name(self): - """Get the name of the ValidationTask that raised this error.""" + @staticmethod + def _get_task_name() -> str | None: + """Get the name of the ValidationTask that raised this error. + + Expected to be called from below a `ValidationTask.run`. + """ import inspect # walk up the stack until we find the ValidationTask.run method @@ -88,6 +96,7 @@ def _get_task_name(self): task = frame.f_locals["self"] if isinstance(task, ValidationTask): return task.__class__.__name__ + return None @dataclass @@ -222,6 +231,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Missing files: {', '.join(missing_files)}" ) + return None + class CheckModel(ValidationTask): """A task to validate the model of a PEtab problem.""" @@ -234,6 +245,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: # TODO get actual model validation messages return ValidationError("Model is invalid.") + return None + class CheckMeasuredObservablesDefined(ValidationTask): """A task to check that all observables referenced by the measurements @@ -252,10 +265,13 @@ def run(self, problem: Problem) -> ValidationIssue | None: "measurement table but not defined in observable table." ) + return None + class CheckOverridesMatchPlaceholders(ValidationTask): """A task to check that the number of observable/noise parameters - in the measurements match the number of placeholders in the observables.""" + in the measurements matches the number of placeholders in the observables. + """ def run(self, problem: Problem) -> ValidationIssue | None: observable_parameters_count = { @@ -320,18 +336,20 @@ def run(self, problem: Problem) -> ValidationIssue | None: if messages: return ValidationError("\n".join(messages)) + return None + class CheckPosLogMeasurements(ValidationTask): """Check that measurements for observables with log-transformation are positive.""" def run(self, problem: Problem) -> ValidationIssue | None: - from .core import NoiseDistribution as nd + from .core import NoiseDistribution as ND # noqa: N813 log_observables = { o.id for o in problem.observable_table.observables - if o.noise_distribution in [nd.LOG_NORMAL, nd.LOG_LAPLACE] + if o.noise_distribution in [ND.LOG_NORMAL, ND.LOG_LAPLACE] } if log_observables: for m in problem.measurement_table.measurements: @@ -342,6 +360,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"positive, but {m.measurement} <= 0 for {m}" ) + return None + class CheckMeasuredExperimentsDefined(ValidationTask): """A task to check that all experiments referenced by measurements @@ -369,6 +389,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: + str(missing_experiments) ) + return None + class CheckValidConditionTargets(ValidationTask): """Check that all condition table targets are valid.""" @@ -418,6 +440,32 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"{invalid} at time {period.time}." ) period_targets |= condition_targets + return None + + +class CheckAllObservablesDefined(ValidationTask): + """A task to validate that all observables in the measurement table are + defined in the observable table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + if problem.measurement_df is None: + return None + + measurement_df = problem.measurement_df + observable_df = problem.observable_df + used_observables = set(measurement_df[OBSERVABLE_ID].values) + defined_observables = ( + set(observable_df.index.values) + if observable_df is not None + else set() + ) + if undefined_observables := (used_observables - defined_observables): + return ValidationError( + f"Observables {undefined_observables} are used in the" + "measurements table but are not defined in observables table." + ) + + return None class CheckUniquePrimaryKeys(ValidationTask): @@ -429,7 +477,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # check for uniqueness of all primary keys counter = Counter(c.id for c in problem.condition_table.conditions) - duplicates = {id for id, count in counter.items() if count > 1} + duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: return ValidationError( @@ -437,7 +485,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) counter = Counter(o.id for o in problem.observable_table.observables) - duplicates = {id for id, count in counter.items() if count > 1} + duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: return ValidationError( @@ -445,7 +493,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) counter = Counter(e.id for e in problem.experiment_table.experiments) - duplicates = {id for id, count in counter.items() if count > 1} + duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: return ValidationError( @@ -453,13 +501,15 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) counter = Counter(p.id for p in problem.parameter_table.parameters) - duplicates = {id for id, count in counter.items() if count > 1} + duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: return ValidationError( f"Parameter table contains duplicate IDs: {duplicates}" ) + return None + class CheckObservablesDoNotShadowModelEntities(ValidationTask): """A task to check that observable IDs do not shadow model entities.""" @@ -479,6 +529,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Observable IDs {shadowed_entities} shadow model entities." ) + return None + class CheckExperimentTable(ValidationTask): """A task to validate the experiment table of a PEtab problem.""" @@ -498,6 +550,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: if messages: return ValidationError("\n".join(messages)) + return None + class CheckExperimentConditionsExist(ValidationTask): """A task to validate that all conditions in the experiment table exist @@ -526,6 +580,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: if messages: return ValidationError("\n".join(messages)) + return None + class CheckAllParametersPresentInParameterTable(ValidationTask): """Ensure all required parameters are contained in the parameter table @@ -573,6 +629,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: + str(extraneous) ) + return None + class CheckValidParameterInConditionOrParameterTable(ValidationTask): """A task to check that all required and only allowed model parameters are @@ -646,9 +704,11 @@ def run(self, problem: Problem) -> ValidationIssue | None: "the condition table and the parameter table." ) + return None + class CheckUnusedExperiments(ValidationTask): - """A task to check for experiments that are not used in the measurements + """A task to check for experiments that are not used in the measurement table.""" def run(self, problem: Problem) -> ValidationIssue | None: @@ -668,9 +728,11 @@ def run(self, problem: Problem) -> ValidationIssue | None: "measurements table." ) + return None + class CheckUnusedConditions(ValidationTask): - """A task to check for conditions that are not used in the experiments + """A task to check for conditions that are not used in the experiment table.""" def run(self, problem: Problem) -> ValidationIssue | None: @@ -692,6 +754,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: "experiments table." ) + return None + class CheckVisualizationTable(ValidationTask): """A task to validate the visualization table of a PEtab problem.""" @@ -708,6 +772,68 @@ def run(self, problem: Problem) -> ValidationIssue | None: message="Visualization table is invalid.", ) + return None + + +class CheckPriorDistribution(ValidationTask): + """A task to validate the prior distribution of a PEtab problem.""" + + _num_pars = { + PriorDistribution.CAUCHY: 2, + PriorDistribution.CHI_SQUARED: 1, + PriorDistribution.EXPONENTIAL: 1, + PriorDistribution.GAMMA: 2, + PriorDistribution.LAPLACE: 2, + PriorDistribution.LOG10_NORMAL: 2, + PriorDistribution.LOG_LAPLACE: 2, + PriorDistribution.LOG_NORMAL: 2, + PriorDistribution.LOG_UNIFORM: 2, + PriorDistribution.NORMAL: 2, + PriorDistribution.RAYLEIGH: 1, + PriorDistribution.UNIFORM: 2, + } + + def run(self, problem: Problem) -> ValidationIssue | None: + messages = [] + for parameter in problem.parameter_table.parameters: + if parameter.prior_distribution is None: + continue + + if parameter.prior_distribution not in PRIOR_DISTRIBUTIONS: + messages.append( + f"Prior distribution `{parameter.prior_distribution}' " + f"for parameter `{parameter.id}' is not valid." + ) + continue + + if ( + exp_num_par := self._num_pars[parameter.prior_distribution] + ) != len(parameter.prior_parameters): + messages.append( + f"Prior distribution `{parameter.prior_distribution}' " + f"for parameter `{parameter.id}' requires " + f"{exp_num_par} parameters, but got " + f"{len(parameter.prior_parameters)} " + f"({parameter.prior_parameters})." + ) + + # TODO: check distribution parameter domains more specifically + try: + if parameter.estimate: + # .prior_dist fails for non-estimated parameters + _ = parameter.prior_dist.sample(1) + except Exception as e: + messages.append( + f"Prior parameters `{parameter.prior_parameters}' " + f"for parameter `{parameter.id}' are invalid " + f"(hint: {e})." + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None + def get_valid_parameters_for_parameter_table( problem: Problem, @@ -752,7 +878,7 @@ def get_valid_parameters_for_parameter_table( if mapping.model_id and mapping.model_id in parameter_ids.keys(): parameter_ids[mapping.petab_id] = None - # add output parameters from observables table + # add output parameters from observable table output_parameters = get_output_parameters(problem) for p in output_parameters: if p not in invalid: @@ -781,7 +907,7 @@ def get_required_parameters_for_parameter_table( problem: Problem, ) -> Set[str]: """ - Get set of parameters which need to go into the parameter table + Get the set of parameters that need to go into the parameter table Arguments: problem: The PEtab problem @@ -965,4 +1091,9 @@ def get_placeholders( # TODO: atomize checks, update to long condition table, re-enable # CheckVisualizationTable(), # TODO validate mapping table + CheckValidParameterInConditionOrParameterTable(), + CheckAllObservablesDefined(), + CheckAllParametersPresentInParameterTable(), + CheckValidConditionTargets(), + CheckPriorDistribution(), ] diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index c788f116..bc7398fc 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -455,4 +455,21 @@ def update_prior(row): errors="ignore", ) + # if uniform, we need to explicitly set the parameters + def update_prior_pars(row): + prior_type = row.get(v2.C.PRIOR_DISTRIBUTION) + prior_pars = row.get(v2.C.PRIOR_PARAMETERS) + + if prior_type in (v2.C.UNIFORM, v2.C.LOG_UNIFORM) and pd.isna( + prior_pars + ): + return ( + f"{row[v2.C.LOWER_BOUND]}{v2.C.PARAMETER_SEPARATOR}" + f"{row[v2.C.UPPER_BOUND]}" + ) + + return prior_pars + + df[v2.C.PRIOR_PARAMETERS] = df.apply(update_prior_pars, axis=1) + return df diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 01903b16..52baf724 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -1121,8 +1121,8 @@ def model_dump(self, **kwargs) -> dict[str, Any]: 'id': 'par', 'lb': 0.0, 'nominal_value': None, - 'prior_distribution': None, - 'prior_parameters': [], + 'prior_distribution': '', + 'prior_parameters': '', 'ub': 1.0}]} """ res = { diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 074c0d2d..2aba25e4 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -212,7 +212,7 @@ def test_period(): with pytest.raises(ValidationError, match="got inf"): ExperimentPeriod(time="inf", condition_ids=["p1"]) - with pytest.raises(ValidationError, match="Invalid ID"): + with pytest.raises(ValidationError, match="Invalid conditionId"): ExperimentPeriod(time=1, condition_ids=["1_condition"]) with pytest.raises(ValidationError, match="type=missing"): From 0e90fd9522dfca8536feaedef43980fcb61c0a52 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 28 Apr 2025 20:16:56 +0200 Subject: [PATCH 052/141] Handle `observableTransformation` in `petab.v1.simulate.sample_noise` (#383) Previously, `petab.v1.simulate.sample_noise` silently ignored `observableTransformation`. For example, in case of observableTransformation=log and noiseDistribution=normal, it incorrectly sampled from a normal instead of a log-normal distribution. Fixes https://github.com/PEtab-dev/libpetab-python/issues/382. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/simulate.py | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/petab/v1/simulate.py b/petab/v1/simulate.py index 334929ad..46001a72 100644 --- a/petab/v1/simulate.py +++ b/petab/v1/simulate.py @@ -241,20 +241,39 @@ def sample_noise( simulated_value, ) - # default noise distribution is petab.C.NORMAL - noise_distribution = petab_problem.observable_df.loc[ + observable_row = petab_problem.observable_df.loc[ measurement_row[petab.C.OBSERVABLE_ID] - ].get(petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL) + ] + # default noise distribution is petab.C.NORMAL + noise_distribution = observable_row.get( + petab.C.NOISE_DISTRIBUTION, petab.C.NORMAL + ) # an empty noise distribution column in an observables table can result in # `noise_distribution == float('nan')` if pd.isna(noise_distribution): noise_distribution = petab.C.NORMAL + observable_transformation = observable_row.get( + petab.C.OBSERVABLE_TRANSFORMATION, petab.C.LIN + ) + transform = lambda x: x # noqa: E731 + # observableTransformation=log -> the log of the simulated value is + # distributed according to `noise_distribution` + if observable_transformation == petab.C.LOG: + simulated_value = np.log(simulated_value) + transform = np.exp + elif observable_transformation == petab.C.LOG10: + simulated_value = np.log10(simulated_value) + transform = lambda x: np.power(10, x) # noqa: E731 + # below is e.g.: `np.random.normal(loc=simulation, scale=noise_value)` simulated_value_with_noise = getattr(rng, noise_distribution)( loc=simulated_value, scale=noise_value * noise_scaling_factor ) + # apply observable transformation, ensure `float` type + simulated_value_with_noise = float(transform(simulated_value_with_noise)) + if zero_bounded and np.sign(simulated_value) != np.sign( simulated_value_with_noise ): From 7a8ad8a2e9c93bd24c44d8d96b40055eee9ebd78 Mon Sep 17 00:00:00 2001 From: Paul Jonas Jost <70631928+PaulJonasJost@users.noreply.github.com> Date: Wed, 7 May 2025 17:34:51 +0200 Subject: [PATCH 053/141] Plot without vis spec without ids_per_plot (#386) * If not providing "ids_per_plot" it automatically assumes that "groupy_by='observable'". This should be fixed by creating the ids_per_plot based on the selected groupy by. * Update petab/v1/visualize/plotting.py Co-authored-by: Daniel Weindl --------- Co-authored-by: Daniel Weindl --- petab/v1/visualize/plotting.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/petab/v1/visualize/plotting.py b/petab/v1/visualize/plotting.py index 17db6d7d..8ff813a3 100644 --- a/petab/v1/visualize/plotting.py +++ b/petab/v1/visualize/plotting.py @@ -861,9 +861,15 @@ def parse_from_id_list( """ if ids_per_plot is None: # this is the default case. If no grouping is specified, - # all observables are plotted. One observable per plot. - unique_obs_list = self._data_df[OBSERVABLE_ID].unique() - ids_per_plot = [[obs_id] for obs_id in unique_obs_list] + # each group_by category will be plotted on a separate plot + unique_ids_list = self._data_df[ + { + "dataset": DATASET_ID, + "observable": OBSERVABLE_ID, + "simulation": SIMULATION_CONDITION_ID, + }[group_by] + ].unique() + ids_per_plot = [[id_] for id_ in unique_ids_list] if group_by == "dataset" and DATASET_ID not in self._data_df: raise ValueError( From 3bc6777017bb8850ee99391c68500b1f06d2884c Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 8 May 2025 13:31:30 +0200 Subject: [PATCH 054/141] Fix parameter scale in sample_from_prior (#385) Return sampled parameters on the parameter scale. This broke in #329 & #335. --- petab/v1/sampling.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/petab/v1/sampling.py b/petab/v1/sampling.py index c99a1eef..035fe7aa 100644 --- a/petab/v1/sampling.py +++ b/petab/v1/sampling.py @@ -33,7 +33,7 @@ def sample_from_prior( bounds=tuple(bounds), transformation=scaling, ) - return prior.sample(shape=(n_starts,)) + return prior.sample(shape=(n_starts,), x_scaled=True) def sample_parameter_startpoints( From ef795230ed71d8d5d51e7ee997a884fe6830b2b0 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 2 Jul 2025 07:24:01 +0200 Subject: [PATCH 055/141] v2: Update to new observable placeholder specification (#393) Adapt to the changes in https://github.com/PEtab-dev/PEtab/pull/625. Placeholders are now listed explicitly. Closes https://github.com/PEtab-dev/libpetab-python/issues/390. --- petab/v2/C.py | 4 +++ petab/v2/core.py | 60 +++++++++++++++++++++---------------------- petab/v2/petab1to2.py | 40 +++++++++++++++++++++++++++++ tests/v2/test_core.py | 22 ++++++---------- 4 files changed, 82 insertions(+), 44 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index fc1f6fa2..99abf343 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -145,10 +145,14 @@ OBSERVABLE_NAME = "observableName" #: Observable formula column in the observable table OBSERVABLE_FORMULA = "observableFormula" +#: Observable placeholders column in the observable table +OBSERVABLE_PLACEHOLDERS = "observablePlaceholders" #: Noise formula column in the observable table NOISE_FORMULA = "noiseFormula" #: Noise distribution column in the observable table NOISE_DISTRIBUTION = "noiseDistribution" +#: Noise placeholders column in the observable table +NOISE_PLACEHOLDERS = "noisePlaceholders" #: Mandatory columns of observable table OBSERVABLE_DF_REQUIRED_COLS = [ diff --git a/petab/v2/core.py b/petab/v2/core.py index 1ee74ace..a847b196 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,12 +2,11 @@ from __future__ import annotations -import re from collections.abc import Sequence from enum import Enum from itertools import chain from pathlib import Path -from typing import Annotated, Literal +from typing import Annotated import numpy as np import pandas as pd @@ -192,6 +191,14 @@ class Observable(BaseModel): noise_distribution: NoiseDistribution = Field( alias=C.NOISE_DISTRIBUTION, default=NoiseDistribution.NORMAL ) + #: Placeholder symbols for the observable formula. + observable_placeholders: list[sp.Symbol] = Field( + alias=C.OBSERVABLE_PLACEHOLDERS, default=[] + ) + #: Placeholder symbols for the noise formula. + noise_placeholders: list[sp.Symbol] = Field( + alias=C.NOISE_PLACEHOLDERS, default=[] + ) #: :meta private: model_config = ConfigDict( @@ -221,37 +228,24 @@ def _sympify(cls, v): return sympify_petab(v) - def _placeholders( - self, type_: Literal["observable", "noise"] - ) -> set[sp.Symbol]: - formula = ( - self.formula - if type_ == "observable" - else self.noise_formula - if type_ == "noise" - else None - ) - if formula is None or formula.is_number: - return set() - - if not (free_syms := formula.free_symbols): - return set() + @field_validator( + "observable_placeholders", "noise_placeholders", mode="before" + ) + @classmethod + def _sympify_id_list(cls, v): + if v is None: + return [] - # TODO: add field validator to check for 1-based consecutive numbering - t = f"{re.escape(type_)}Parameter" - o = re.escape(self.id) - pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") - return {s for s in free_syms if pattern.match(str(s))} + if isinstance(v, float) and np.isnan(v): + return [] - @property - def observable_placeholders(self) -> set[sp.Symbol]: - """Placeholder symbols for the observable formula.""" - return self._placeholders("observable") + if isinstance(v, str): + v = v.split(C.PARAMETER_SEPARATOR) + elif not isinstance(v, Sequence): + v = [v] - @property - def noise_placeholders(self) -> set[sp.Symbol]: - """Placeholder symbols for the noise formula.""" - return self._placeholders("noise") + v = [pid.strip() for pid in v] + return [sympify_petab(_valid_petab_id(pid)) for pid in v if pid] class ObservableTable(BaseModel): @@ -289,6 +283,12 @@ def to_df(self) -> pd.DataFrame: noise = record[C.NOISE_FORMULA] record[C.OBSERVABLE_FORMULA] = petab_math_str(obs) record[C.NOISE_FORMULA] = petab_math_str(noise) + record[C.OBSERVABLE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.OBSERVABLE_PLACEHOLDERS]) + ) + record[C.NOISE_PLACEHOLDERS] = C.PARAMETER_SEPARATOR.join( + map(str, record[C.NOISE_PLACEHOLDERS]) + ) return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) @classmethod diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index bc7398fc..3869307f 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -2,6 +2,7 @@ from __future__ import annotations +import re import shutil from contextlib import suppress from itertools import chain @@ -14,6 +15,7 @@ from pandas.io.common import get_handle, is_url from .. import v1, v2 +from ..v1.math import sympify_petab from ..v1.yaml import get_path_prefix, load_yaml, validate from ..versions import get_major_version from .models import MODEL_TYPE_SBML @@ -351,6 +353,7 @@ def v1v2_observable_df(observable_df: pd.DataFrame) -> pd.DataFrame: Perform all updates that can be done solely on the observable table: * drop observableTransformation, update noiseDistribution + * update placeholder parameters """ df = observable_df.copy().reset_index() @@ -388,6 +391,43 @@ def update_noise_dist(row): df[v2.C.NOISE_DISTRIBUTION] = df.apply(update_noise_dist, axis=1) df.drop(columns=[v1.C.OBSERVABLE_TRANSFORMATION], inplace=True) + def extract_placeholders(row: pd.Series, type_: str) -> str: + """Extract placeholders from observable formula.""" + if type_ == "observable": + formula = row[v1.C.OBSERVABLE_FORMULA] + elif type_ == "noise": + formula = row[v1.C.NOISE_FORMULA] + else: + raise ValueError(f"Unknown placeholder type: {type_}") + + if pd.isna(formula): + return "" + + t = f"{re.escape(type_)}Parameter" + o = re.escape(row[v1.C.OBSERVABLE_ID]) + + pattern = re.compile(rf"(?:^|\W)({t}\d+_{o})(?=\W|$)") + + expr = sympify_petab(formula) + # for 10+ placeholders, the current lexicographical sorting will result + # in incorrect ordering of the placeholder IDs, so that they don't + # align with the overrides in the measurement table, but who does + # that anyway? + return v2.C.PARAMETER_SEPARATOR.join( + sorted( + str(sym) + for sym in expr.free_symbols + if sym.is_Symbol and pattern.match(str(sym)) + ) + ) + + df[v2.C.OBSERVABLE_PLACEHOLDERS] = df.apply( + extract_placeholders, args=("observable",), axis=1 + ) + df[v2.C.NOISE_PLACEHOLDERS] = df.apply( + extract_placeholders, args=("noise",), axis=1 + ) + return df diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 2aba25e4..2d55f219 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -160,28 +160,22 @@ def test_observable(): assert Observable(id="obs1", formula="x + y", non_petab=1).non_petab == 1 o = Observable(id="obs1", formula=x + y) - assert o.observable_placeholders == set() - assert o.noise_placeholders == set() + assert o.observable_placeholders == [] + assert o.noise_placeholders == [] o = Observable( id="obs1", formula="observableParameter1_obs1", noise_formula="noiseParameter1_obs1", + observable_placeholders="observableParameter1_obs1", + noise_placeholders="noiseParameter1_obs1", ) - assert o.observable_placeholders == { + assert o.observable_placeholders == [ sp.Symbol("observableParameter1_obs1", real=True), - } - assert o.noise_placeholders == { + ] + assert o.noise_placeholders == [ sp.Symbol("noiseParameter1_obs1", real=True) - } - - # TODO: this should raise an error - # (numbering is not consecutive / not starting from 1) - # TODO: clarify if observableParameter0_obs1 would be allowed - # as regular parameter - # - # with pytest.raises(ValidationError): - # Observable(id="obs1", formula="observableParameter2_obs1") + ] def test_change(): From 3f49363034a21b7adc9824de892cfc7cc40b058a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 3 Jul 2025 13:34:14 +0200 Subject: [PATCH 056/141] v2: prettify petablint output (#400) Previously, the raw `ValidationError` tracebacks where shown, which is not what a user wants to see. Related to #369. --- petab/petablint.py | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/petab/petablint.py b/petab/petablint.py index 244b7536..702eb533 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -6,6 +6,7 @@ import logging import sys +import pydantic from colorama import Fore from colorama import init as init_colorama from jsonschema.exceptions import ValidationError as SchemaValidationError @@ -179,12 +180,24 @@ def main(): case 2: from petab.v2.lint import lint_problem - validation_issues = lint_problem(args.yaml_file_name) - if validation_issues: - validation_issues.log(logger=logger) + try: + validation_issues = lint_problem(args.yaml_file_name) + if validation_issues: + # Handle petab.v2.lint.ValidationTask issues + validation_issues.log(logger=logger) + sys.exit(1) + logger.info("PEtab format check completed successfully.") + sys.exit(0) + except pydantic.ValidationError as e: + # Handle Pydantic validation errors + for err in e.errors(): + loc = ", ".join(str(loc) for loc in err["loc"]) + msg = err["msg"] + # TODO: include model info here once available + # https://github.com/pydantic/pydantic/issues/7224 + logger.error(f"Error in field(s) `{loc}`: {msg}") sys.exit(1) - logger.info("PEtab format check completed successfully.") - sys.exit(0) + case _: logger.error( "The provided PEtab files are of unsupported version " From c6d4cabafc82698ed1ddcfe5adca62c4abecdaa7 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 3 Jul 2025 13:49:23 +0200 Subject: [PATCH 057/141] Prettify linter output (#401) Prettify linter output in case of schema violations in the problem yaml file. Previously, the messages were rather confusing. Also fix an error message and a bug in the default schema choice. Related to #369. --- petab/petablint.py | 15 ++++++++++++++- petab/v1/yaml.py | 6 ++---- 2 files changed, 16 insertions(+), 5 deletions(-) diff --git a/petab/petablint.py b/petab/petablint.py index 702eb533..030f4545 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -161,10 +161,23 @@ def main(): try: validate(args.yaml_file_name) except SchemaValidationError as e: + path = "" + if e.absolute_path: + # construct a path to the error location inside the YAML file + path = list(e.absolute_path) + path = ( + f" at {path[0]}" + + "".join(f"[{str(p)}]" for p in path[1:]) + + ": " + ) logger.error( - f"Provided YAML file does not adhere to PEtab schema: {e}" + "Provided YAML file does not adhere to the PEtab schema" + f"{path}: {e.args[0]}" ) sys.exit(1) + except ValueError as e: + logger.error(e) + sys.exit(1) if petab.is_composite_problem(args.yaml_file_name): # TODO: further checking: diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index b8330028..0c092049 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -77,16 +77,14 @@ def validate_yaml_syntax( # but let's still use the latest PEtab schema for full validation version = yaml_config.get(FORMAT_VERSION, None) version = ( - parse_version(version)[:2] - if version - else list(SCHEMAS.values())[-1] + parse_version(version)[:2] if version else list(SCHEMAS.keys())[-1] ) try: schema = SCHEMAS[version] except KeyError as e: raise ValueError( - "Unknown PEtab version given in problem " + "No or unknown PEtab version given in problem " f"specification: {version}" ) from e schema = load_yaml(schema) From b0f4c4f1f307849741d6f17a42b783b417ad4128 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 3 Jul 2025 13:52:45 +0200 Subject: [PATCH 058/141] v2: Remove redundant CheckAllObservablesDefined (#399) There is already `CheckMeasuredObservablesDefined`. Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/lint.py | 36 +++++------------------------------- 1 file changed, 5 insertions(+), 31 deletions(-) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2558ea3c..38b7ff74 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -39,7 +39,6 @@ "CheckUnusedExperiments", "CheckObservablesDoNotShadowModelEntities", "CheckUnusedConditions", - "CheckAllObservablesDefined", "CheckPriorDistribution", "lint_problem", "default_validation_tasks", @@ -261,8 +260,9 @@ def run(self, problem: Problem) -> ValidationIssue | None: } if undefined_observables := (used_observables - defined_observables): return ValidationError( - f"Observables {undefined_observables} used in " - "measurement table but not defined in observable table." + f"Observable(s) {undefined_observables} are used in the " + "measurement table but are not defined in the observable " + "table." ) return None @@ -289,8 +289,8 @@ def run(self, problem: Problem) -> ValidationIssue | None: expected = observable_parameters_count[m.observable_id] except KeyError: messages.append( - f"Observable {m.observable_id} used in measurement " - f"table is not defined." + f"Observable {m.observable_id} is used in the measurement " + f"table but is not defined in the observable table." ) continue @@ -443,31 +443,6 @@ def run(self, problem: Problem) -> ValidationIssue | None: return None -class CheckAllObservablesDefined(ValidationTask): - """A task to validate that all observables in the measurement table are - defined in the observable table.""" - - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.measurement_df is None: - return None - - measurement_df = problem.measurement_df - observable_df = problem.observable_df - used_observables = set(measurement_df[OBSERVABLE_ID].values) - defined_observables = ( - set(observable_df.index.values) - if observable_df is not None - else set() - ) - if undefined_observables := (used_observables - defined_observables): - return ValidationError( - f"Observables {undefined_observables} are used in the" - "measurements table but are not defined in observables table." - ) - - return None - - class CheckUniquePrimaryKeys(ValidationTask): """Check that all primary keys are unique.""" @@ -1092,7 +1067,6 @@ def get_placeholders( # CheckVisualizationTable(), # TODO validate mapping table CheckValidParameterInConditionOrParameterTable(), - CheckAllObservablesDefined(), CheckAllParametersPresentInParameterTable(), CheckValidConditionTargets(), CheckPriorDistribution(), From 70ef57b0feb200e5c87f4a97b24f0ca5abb88ff5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 3 Jul 2025 19:09:23 +0200 Subject: [PATCH 059/141] Add `v2.calculate` / fix `v1.calculate` (#395) * Mostly a copy of `v1.calculate` with a few adaptations to handle the new noise distributions and placeholder format. * Fix sign of residuals * Fix incorrect scaling of observables used in noise expressions for log-type-distributions --- petab/v1/calculate.py | 28 +-- petab/v2/calculate.py | 487 +++++++++++++++++++++++++++++++++++++ petab/v2/math/__init__.py | 3 + petab/v2/problem.py | 9 +- tests/v1/test_calculate.py | 47 ++-- tests/v2/test_calculate.py | 451 ++++++++++++++++++++++++++++++++++ 6 files changed, 984 insertions(+), 41 deletions(-) create mode 100644 petab/v2/calculate.py create mode 100644 petab/v2/math/__init__.py create mode 100644 tests/v2/test_calculate.py diff --git a/petab/v1/calculate.py b/petab/v1/calculate.py index 4c129b88..131d0d60 100644 --- a/petab/v1/calculate.py +++ b/petab/v1/calculate.py @@ -1,6 +1,7 @@ """Functions performing various calculations.""" import numbers +import operator from functools import reduce import numpy as np @@ -139,19 +140,17 @@ def calculate_residuals_for_table( # apply scaling observable = observable_df.loc[row[OBSERVABLE_ID]] trafo = observable.get(OBSERVABLE_TRANSFORMATION, LIN) - simulation = petab.scale(simulation, trafo) - measurement = petab.scale(measurement, trafo) + scaled_simulation = petab.scale(simulation, trafo) + scaled_measurement = petab.scale(measurement, trafo) # non-normalized residual is just the difference - residual = simulation - measurement + residual = scaled_measurement - scaled_simulation - noise_value = 1 if normalize: - # look up noise standard deviation - noise_value = evaluate_noise_formula( + # divide by standard deviation + residual /= evaluate_noise_formula( row, noise_formulas, parameter_df, simulation ) - residual /= noise_value # fill in value residual_df.loc[irow, RESIDUAL] = residual @@ -169,13 +168,10 @@ def get_symbolic_noise_formulas(observable_df) -> dict[str, sp.Expr]: """ noise_formulas = {} # iterate over observables - for row in observable_df.itertuples(): - observable_id = row.Index - if NOISE_FORMULA not in observable_df.columns: - noise_formula = None - else: - noise_formula = sympify_petab(row.noiseFormula) - noise_formulas[observable_id] = noise_formula + for observable_id, row in observable_df.iterrows(): + noise_formulas[observable_id] = ( + sympify_petab(row.noiseFormula) if NOISE_FORMULA in row else None + ) return noise_formulas @@ -364,7 +360,7 @@ def calculate_llh_for_table( (simulation_df[col] == row[col]) | petab.is_empty(row[col]) for col in compared_cols ] - mask = reduce(lambda x, y: x & y, masks) + mask = reduce(operator.and_, masks) simulation = simulation_df.loc[mask][SIMULATION].iloc[0] @@ -375,7 +371,7 @@ def calculate_llh_for_table( # get noise standard deviation noise_value = evaluate_noise_formula( - row, noise_formulas, parameter_df, petab.scale(simulation, scale) + row, noise_formulas, parameter_df, simulation ) # get noise distribution diff --git a/petab/v2/calculate.py b/petab/v2/calculate.py new file mode 100644 index 00000000..830c2d89 --- /dev/null +++ b/petab/v2/calculate.py @@ -0,0 +1,487 @@ +"""Functions performing various calculations.""" + +import numbers +import operator +from functools import reduce + +import numpy as np +import pandas as pd +import sympy as sp + +from petab.v1 import is_empty, split_parameter_replacement_list + +from .C import * +from .math import sympify_petab + +__all__ = [ + "calculate_residuals", + "calculate_residuals_for_table", + "get_symbolic_noise_formulas", + "evaluate_noise_formula", + "calculate_chi2", + "calculate_chi2_for_table_from_residuals", + "calculate_llh", + "calculate_llh_for_table", + "calculate_single_llh", +] + + +def calculate_residuals( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> list[pd.DataFrame]: + """Calculate residuals. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + List of DataFrames in the same structure as `measurement_dfs` + with a field `residual` instead of measurement. + """ + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + residual_dfs = [] + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + strict=True, + ): + residual_df = calculate_residuals_for_table( + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize, + scale, + ) + residual_dfs.append(residual_df) + return residual_dfs + + +def calculate_residuals_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> pd.DataFrame: + """ + Calculate residuals for a single measurement table. + For the arguments, see `calculate_residuals`. + """ + from petab.v1 import scale + + # below, we rely on a unique index + measurement_df = measurement_df.reset_index(drop=True) + + # create residual df as copy of measurement df, change column + residual_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: RESIDUAL} + ) + residual_df[RESIDUAL] = residual_df[RESIDUAL].astype("float64") + # matching columns + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for irow, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + # look up in simulation df + masks = [ + (simulation_df[col] == row[col]) | is_empty(row[col]) + for col in compared_cols + ] + mask = reduce(operator.and_, masks) + if mask.sum() == 0: + raise ValueError( + f"Could not find simulation for measurement {row}." + ) + # if we have multiple matches, check that the rows are all identical + elif ( + mask.sum() > 1 + and simulation_df.loc[mask].drop_duplicates().shape[0] > 1 + ): + raise ValueError( + f"Multiple different simulations found for measurement " + f"{row}:\n{simulation_df.loc[mask]}" + ) + + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + if scale: + # apply scaling + observable = observable_df.loc[row[OBSERVABLE_ID]] + # for v2, the transformation is part of the noise distribution + noise_distr = observable.get(NOISE_DISTRIBUTION, NORMAL) + if noise_distr.startswith("log-"): + trafo = LOG + elif noise_distr.startswith("log10-"): + trafo = LOG10 + else: + trafo = LIN + + # scale simulation and measurement + + scaled_simulation = scale(simulation, trafo) + scaled_measurement = scale(measurement, trafo) + + # non-normalized residual is just the difference + residual = scaled_measurement - scaled_simulation + + if normalize: + # divide by standard deviation + residual /= evaluate_noise_formula( + row, noise_formulas, parameter_df, simulation, observable + ) + + # fill in value + residual_df.loc[irow, RESIDUAL] = residual + return residual_df + + +def get_symbolic_noise_formulas(observable_df) -> dict[str, sp.Expr]: + """Sympify noise formulas. + + Arguments: + observable_df: The observable table. + + Returns: + Dictionary of {observable_id}: {noise_formula}. + """ + noise_formulas = {} + # iterate over observables + for observable_id, row in observable_df.iterrows(): + noise_formulas[observable_id] = ( + sympify_petab(row.noiseFormula) if NOISE_FORMULA in row else None + ) + return noise_formulas + + +def evaluate_noise_formula( + measurement: pd.Series, + noise_formulas: dict[str, sp.Expr], + parameter_df: pd.DataFrame, + simulation: numbers.Number, + observable: dict, +) -> float: + """Fill in parameters for `measurement` and evaluate noise_formula. + + Arguments: + measurement: A measurement table row. + noise_formulas: The noise formulas as computed by + `get_symbolic_noise_formulas`. + parameter_df: The parameter table. + simulation: The simulation corresponding to the measurement, scaled. + observable: The observable table row corresponding to the measurement. + + Returns: + The noise value. + """ + # the observable id + observable_id = measurement[OBSERVABLE_ID] + + # extract measurement specific overrides + observable_parameter_overrides = split_parameter_replacement_list( + measurement.get(OBSERVABLE_PARAMETERS, None) + ) + noise_parameter_overrides = split_parameter_replacement_list( + measurement.get(NOISE_PARAMETERS, None) + ) + observable_parameter_placeholders = observable.get( + OBSERVABLE_PLACEHOLDERS, "" + ).split(PARAMETER_SEPARATOR) + noise_parameter_placeholders = observable.get( + NOISE_PLACEHOLDERS, "" + ).split(PARAMETER_SEPARATOR) + + # fill in measurement specific parameters + overrides = { + sp.Symbol(placeholder, real=True): override + for placeholder, override in zip( + [ + p.strip() + for p in observable_parameter_placeholders + + noise_parameter_placeholders + if p.strip() + ], + observable_parameter_overrides + noise_parameter_overrides, + strict=False, + ) + } + + # fill in observables + overrides[sp.Symbol(observable_id, real=True)] = simulation + + # fill in general parameters + for row in parameter_df.itertuples(): + overrides[sp.Symbol(row.Index, real=True)] = row.nominalValue + + # replace parametric measurement specific parameters + for key, value in overrides.items(): + if not isinstance(value, numbers.Number): + # is parameter + overrides[key] = parameter_df.loc[value, NOMINAL_VALUE] + + # replace parameters by values in formula + noise_formula = noise_formulas[observable_id] + noise_value = noise_formula.subs(overrides) + + # conversion is possible if all parameters are replaced + try: + noise_value = float(noise_value) + except TypeError as e: + raise ValueError( + f"Cannot replace all parameters in noise formula {noise_value} " + f"for observable {observable_id}. " + f"Missing {noise_formula.free_symbols}. Note that model states " + "are currently not supported." + ) from e + return noise_value + + +def calculate_chi2( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, + normalize: bool = True, + scale: bool = True, +) -> float: + """Calculate the chi2 value. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + normalize: + Whether to normalize residuals by the noise standard deviation + terms. + scale: + Whether to calculate residuals of scaled values. + + Returns: + The aggregated chi2 value. + """ + residual_dfs = calculate_residuals( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + normalize, + scale, + ) + chi2s = [ + calculate_chi2_for_table_from_residuals(df) for df in residual_dfs + ] + return sum(chi2s) + + +def calculate_chi2_for_table_from_residuals( + residual_df: pd.DataFrame, +) -> float: + """Compute chi2 value for a single residual table.""" + return (np.array(residual_df[RESIDUAL]) ** 2).sum() + + +def calculate_llh( + measurement_dfs: list[pd.DataFrame] | pd.DataFrame, + simulation_dfs: list[pd.DataFrame] | pd.DataFrame, + observable_dfs: list[pd.DataFrame] | pd.DataFrame, + parameter_dfs: list[pd.DataFrame] | pd.DataFrame, +) -> float: + """Calculate total log likelihood. + + Arguments: + measurement_dfs: + The problem measurement tables. + simulation_dfs: + Simulation tables corresponding to the measurement tables. + observable_dfs: + The problem observable tables. + parameter_dfs: + The problem parameter tables. + + Returns: + The log-likelihood. + """ + # convenience + if isinstance(measurement_dfs, pd.DataFrame): + measurement_dfs = [measurement_dfs] + if isinstance(simulation_dfs, pd.DataFrame): + simulation_dfs = [simulation_dfs] + if isinstance(observable_dfs, pd.DataFrame): + observable_dfs = [observable_dfs] + if isinstance(parameter_dfs, pd.DataFrame): + parameter_dfs = [parameter_dfs] + + # iterate over data frames + llhs = [] + for measurement_df, simulation_df, observable_df, parameter_df in zip( + measurement_dfs, + simulation_dfs, + observable_dfs, + parameter_dfs, + strict=True, + ): + _llh = calculate_llh_for_table( + measurement_df, simulation_df, observable_df, parameter_df + ) + llhs.append(_llh) + return sum(llhs) + + +def calculate_llh_for_table( + measurement_df: pd.DataFrame, + simulation_df: pd.DataFrame, + observable_df: pd.DataFrame, + parameter_df: pd.DataFrame, +) -> float: + """Calculate log-likelihood for one set of tables. For the arguments, see + `calculate_llh`. + """ + + llhs = [] + + # matching columns + compared_cols = set(measurement_df.columns) & set(simulation_df.columns) + + # compute noise formulas for observables + noise_formulas = get_symbolic_noise_formulas(observable_df) + + # iterate over measurements, find corresponding simulations + for _, row in measurement_df.iterrows(): + measurement = row[MEASUREMENT] + + # look up in simulation df + masks = [ + (simulation_df[col] == row[col]) | is_empty(row[col]) + for col in compared_cols + ] + mask = reduce(lambda x, y: x & y, masks) + + simulation = simulation_df.loc[mask][SIMULATION].iloc[0] + + observable = observable_df.loc[row[OBSERVABLE_ID]] + + # get noise distribution + noise_distr = observable.get(NOISE_DISTRIBUTION, NORMAL) + + if noise_distr.startswith("log-"): + obs_scale = LOG + noise_distr = noise_distr.removeprefix("log-") + elif noise_distr.startswith("log10-"): + obs_scale = LOG10 + noise_distr = noise_distr.removeprefix("log10-") + else: + obs_scale = LIN + + # get noise standard deviation + noise_value = evaluate_noise_formula( + row, + noise_formulas, + parameter_df, + simulation, + observable, + ) + + llh = calculate_single_llh( + measurement, simulation, obs_scale, noise_distr, noise_value + ) + llhs.append(llh) + return sum(llhs) + + +def calculate_single_llh( + measurement: float, + simulation: float, + scale: str, + noise_distribution: str, + noise_value: float, +) -> float: + """Calculate a single log likelihood. + + Arguments: + measurement: The measurement value. + simulation: The simulated value. + scale: The scale on which the noise model is to be applied. + noise_distribution: The noise distribution. + noise_value: The considered noise models possess a single noise + parameter, e.g. the normal standard deviation. + + Returns: + The computed likelihood for the given values. + """ + # PEtab v2: + if noise_distribution == LOG10_NORMAL and scale == LIN: + noise_distribution = NORMAL + scale = LOG10 + elif noise_distribution == LOG_NORMAL and scale == LIN: + noise_distribution = NORMAL + scale = LOG + + # short-hand + m, s, sigma = measurement, simulation, noise_value + pi, log, log10 = np.pi, np.log, np.log10 + + # go over the possible cases + if noise_distribution == NORMAL and scale == LIN: + nllh = 0.5 * log(2 * pi * sigma**2) + 0.5 * ((s - m) / sigma) ** 2 + elif noise_distribution == NORMAL and scale == LOG: + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2) + + 0.5 * ((log(s) - log(m)) / sigma) ** 2 + ) + elif noise_distribution == NORMAL and scale == LOG10: + nllh = ( + 0.5 * log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + + 0.5 * ((log10(s) - log10(m)) / sigma) ** 2 + ) + elif noise_distribution == LAPLACE and scale == LIN: + nllh = log(2 * sigma) + abs((s - m) / sigma) + elif noise_distribution == LAPLACE and scale == LOG: + nllh = log(2 * sigma * m) + abs((log(s) - log(m)) / sigma) + elif noise_distribution == LAPLACE and scale == LOG10: + nllh = log(2 * sigma * m * log(10)) + abs( + (log10(s) - log10(m)) / sigma + ) + else: + raise NotImplementedError( + "Unsupported combination of noise_distribution and scale " + f"specified: {noise_distribution}, {scale}." + ) + return -nllh diff --git a/petab/v2/math/__init__.py b/petab/v2/math/__init__.py new file mode 100644 index 00000000..8a5a5559 --- /dev/null +++ b/petab/v2/math/__init__.py @@ -0,0 +1,3 @@ +"""Functions for parsing and evaluating mathematical expressions.""" + +from petab.v1.math import * # noqa: F401 diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 52baf724..f6ec9c6a 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -909,6 +909,8 @@ def add_observable( formula: str, noise_formula: str | float | int = None, noise_distribution: str = None, + observable_placeholders: list[str] = None, + noise_placeholders: list[str] = None, name: str = None, **kwargs, ): @@ -919,6 +921,8 @@ def add_observable( formula: The observable formula noise_formula: The noise formula noise_distribution: The noise distribution + observable_placeholders: Placeholders for the observable formula + noise_placeholders: Placeholders for the noise formula name: The observable name kwargs: additional columns/values to add to the observable table @@ -933,7 +937,10 @@ def add_observable( record[NOISE_FORMULA] = noise_formula if noise_distribution is not None: record[NOISE_DISTRIBUTION] = noise_distribution - + if observable_placeholders is not None: + record[OBSERVABLE_PLACEHOLDERS] = observable_placeholders + if noise_placeholders is not None: + record[NOISE_PLACEHOLDERS] = noise_placeholders record.update(kwargs) self.observable_table += core.Observable(**record) diff --git a/tests/v1/test_calculate.py b/tests/v1/test_calculate.py index 526ea7c9..c13105a8 100644 --- a/tests/v1/test_calculate.py +++ b/tests/v1/test_calculate.py @@ -4,14 +4,14 @@ import pandas as pd import pytest -import petab -from petab import ( +from petab.v1 import get_observable_df, get_parameter_df +from petab.v1.C import * +from petab.v1.calculate import ( calculate_chi2, calculate_llh, calculate_residuals, calculate_single_llh, ) -from petab.C import * def model_simple(): @@ -43,12 +43,12 @@ def model_simple(): simulation_df[SIMULATION] = [2, 2, 19, 20] expected_residuals = { - (2 - 0) / 2, - (2 - 1) / 2, - (19 - 20) / 3, - (20 - 22) / 3, + (0 - 2) / 2, + (1 - 2) / 2, + (20 - 19) / 3, + (22 - 20) / 3, } - expected_residuals_nonorm = {2 - 0, 2 - 1, 19 - 20, 20 - 22} + expected_residuals_nonorm = {0 - 2, 1 - 2, 20 - 19, 22 - 20} expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() - 0.5 * np.log(2 * np.pi * np.array([2, 2, 3, 3]) ** 2).sum() @@ -56,8 +56,8 @@ def model_simple(): return ( measurement_df, - petab.get_observable_df(observable_df), - petab.get_parameter_df(parameter_df), + get_observable_df(observable_df), + get_parameter_df(parameter_df), simulation_df, expected_residuals, expected_residuals_nonorm, @@ -93,8 +93,8 @@ def model_replicates(): ) simulation_df[SIMULATION] = [2, 2] - expected_residuals = {(2 - 0) / 2, (2 - 1) / 2} - expected_residuals_nonorm = {2 - 0, 2 - 1} + expected_residuals = {(0 - 2) / 2, (1 - 2) / 2} + expected_residuals_nonorm = {0 - 2, 1 - 2} expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() - 0.5 * np.log(2 * np.pi * np.array([2, 2]) ** 2).sum() @@ -141,12 +141,12 @@ def model_scalings(): simulation_df[SIMULATION] = [2, 3] expected_residuals = { - (np.log(2) - np.log(0.5)) / 2, - (np.log(3) - np.log(1)) / 2, + (np.log(0.5) - np.log(2)) / 2, + (np.log(1) - np.log(3)) / 2, } expected_residuals_nonorm = { - np.log(2) - np.log(0.5), - np.log(3) - np.log(1), + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), } expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() @@ -201,12 +201,12 @@ def model_non_numeric_overrides(): simulation_df[SIMULATION] = [2, 3] expected_residuals = { - (np.log(2) - np.log(0.5)) / (2 * 7 + 8 + 4 + np.log(2)), - (np.log(3) - np.log(1)) / (2 * 2 + 3 + 4 + np.log(3)), + (np.log(0.5) - np.log(2)) / (2 * 7 + 8 + 4 + 2), + (np.log(1) - np.log(3)) / (2 * 2 + 3 + 4 + 3), } expected_residuals_nonorm = { - np.log(2) - np.log(0.5), - np.log(3) - np.log(1), + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), } expected_llh = ( -0.5 * (np.array(list(expected_residuals)) ** 2).sum() @@ -214,8 +214,7 @@ def model_non_numeric_overrides(): * np.log( 2 * np.pi - * np.array([2 * 7 + 8 + 4 + np.log(2), 2 * 2 + 3 + 4 + np.log(3)]) - ** 2 + * np.array([2 * 7 + 8 + 4 + 2, 2 * 2 + 3 + 4 + 3]) ** 2 * np.array([0.5, 1]) ** 2 ).sum() ) @@ -261,8 +260,8 @@ def model_custom_likelihood(): ) simulation_df[SIMULATION] = [2, 3] - expected_residuals = {(np.log(2) - np.log(0.5)) / 2, (3 - 2) / 1.5} - expected_residuals_nonorm = {np.log(2) - np.log(0.5), 3 - 2} + expected_residuals = {(np.log(0.5) - np.log(2)) / 2, (2 - 3) / 1.5} + expected_residuals_nonorm = {np.log(0.5) - np.log(2), 2 - 3} expected_llh = ( -np.abs(list(expected_residuals)).sum() - np.log(2 * np.array([2, 1.5]) * np.array([0.5, 1])).sum() diff --git a/tests/v2/test_calculate.py b/tests/v2/test_calculate.py new file mode 100644 index 00000000..cba929ae --- /dev/null +++ b/tests/v2/test_calculate.py @@ -0,0 +1,451 @@ +"""Tests related to petab.calculate.""" + +import numpy as np +import pandas as pd +import pytest + +from petab.v2 import get_observable_df, get_parameter_df +from petab.v2.C import * +from petab.v2.calculate import ( + calculate_chi2, + calculate_llh, + calculate_residuals, + calculate_single_llh, +) + + +def model_simple(): + "Simple model." + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a", "obs_b", "obs_b"], + EXPERIMENT_ID: ["c0", "c1", "c0", "c1"], + TIME: [0, 10, 0, 10], + MEASUREMENT: [0, 1, 20, 22], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + OBSERVABLE_FORMULA: ["A", "B"], + NOISE_FORMULA: [2, 3], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 2, 19, 20] + + expected_residuals = { + (0 - 2) / 2, + (1 - 2) / 2, + (20 - 19) / 3, + (22 - 20) / 3, + } + expected_residuals_nonorm = {0 - 2, 1 - 2, 20 - 19, 22 - 20} + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 * np.log(2 * np.pi * np.array([2, 2, 3, 3]) ** 2).sum() + ) + + return ( + measurement_df, + get_observable_df(observable_df), + get_parameter_df(parameter_df), + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_replicates(): + """Model with replicates.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [10, 10], + MEASUREMENT: [0, 1], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_FORMULA: [2], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 2] + + expected_residuals = {(0 - 2) / 2, (1 - 2) / 2} + expected_residuals_nonorm = {0 - 2, 1 - 2} + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 * np.log(2 * np.pi * np.array([2, 2]) ** 2).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_scalings(): + """Model with scalings.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 1], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_DISTRIBUTION: [LOG_NORMAL], + NOISE_FORMULA: [2], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = { + (np.log(0.5) - np.log(2)) / 2, + (np.log(1) - np.log(3)) / 2, + } + expected_residuals_nonorm = { + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), + } + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 + * np.log( + 2 * np.pi * np.array([2, 2]) ** 2 * np.array([0.5, 1]) ** 2 + ).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_non_numeric_overrides(): + """Model with non-numeric overrides.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_a"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 1], + NOISE_PARAMETERS: ["7;8", "2;par1"], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a"], + OBSERVABLE_FORMULA: ["A"], + NOISE_DISTRIBUTION: [LOG_NORMAL], + NOISE_FORMULA: [ + "2*noiseParameter1_obs_a + " + "noiseParameter2_obs_a + par2 + obs_a" + ], + NOISE_PLACEHOLDERS: [ + "noiseParameter1_obs_a;noiseParameter2_obs_a" + ], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = { + (np.log(0.5) - np.log(2)) / (2 * 7 + 8 + 4 + 2), + (np.log(1) - np.log(3)) / (2 * 2 + 3 + 4 + 3), + } + expected_residuals_nonorm = { + np.log(0.5) - np.log(2), + np.log(1) - np.log(3), + } + expected_llh = ( + -0.5 * (np.array(list(expected_residuals)) ** 2).sum() + - 0.5 + * np.log( + 2 + * np.pi + * np.array([2 * 7 + 8 + 4 + 2, 2 * 2 + 3 + 4 + 3]) ** 2 + * np.array([0.5, 1]) ** 2 + ).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +def model_custom_likelihood(): + """Model with customized likelihoods.""" + measurement_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + EXPERIMENT_ID: ["c0", "c0"], + TIME: [5, 10], + MEASUREMENT: [0.5, 2], + } + ) + + observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["obs_a", "obs_b"], + OBSERVABLE_FORMULA: ["A", "B"], + NOISE_FORMULA: [2, 1.5], + NOISE_DISTRIBUTION: [LOG_LAPLACE, LAPLACE], + } + ).set_index([OBSERVABLE_ID]) + + parameter_df = pd.DataFrame( + data={PARAMETER_ID: ["par1", "par2"], NOMINAL_VALUE: [3, 4]} + ).set_index([PARAMETER_ID]) + + simulation_df = measurement_df.copy(deep=True).rename( + columns={MEASUREMENT: SIMULATION} + ) + simulation_df[SIMULATION] = [2, 3] + + expected_residuals = {(np.log(0.5) - np.log(2)) / 2, (2 - 3) / 1.5} + expected_residuals_nonorm = {np.log(0.5) - np.log(2), 2 - 3} + expected_llh = ( + -np.abs(list(expected_residuals)).sum() + - np.log(2 * np.array([2, 1.5]) * np.array([0.5, 1])).sum() + ) + + return ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + expected_residuals_nonorm, + expected_llh, + ) + + +@pytest.fixture +def models(): + """Test model collection covering different features.""" + return [ + model_simple(), + model_replicates(), + model_scalings(), + model_non_numeric_overrides(), + model_custom_likelihood(), + ] + + +def test_calculate_residuals(models): # pylint: disable=W0621 + """Test calculate.calculate_residuals.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + _, + _, + ) = model + residual_dfs = calculate_residuals( + measurement_df, simulation_df, observable_df, parameter_df + ) + assert sorted(residual_dfs[0][RESIDUAL]) == pytest.approx( + sorted(expected_residuals) + ) + + +def test_calculate_non_normalized_residuals(models): # pylint: disable=W0621 + """Test calculate.calculate_residuals without normalization.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + _, + expected_residuals_nonorm, + _, + ) = model + residual_dfs = calculate_residuals( + measurement_df, + simulation_df, + observable_df, + parameter_df, + normalize=False, + ) + assert sorted(residual_dfs[0][RESIDUAL]) == pytest.approx( + sorted(expected_residuals_nonorm) + ) + + +def test_calculate_chi2(models): # pylint: disable=W0621 + """Test calculate.calculate_chi2.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + expected_residuals, + _, + _, + ) = model + chi2 = calculate_chi2( + measurement_df, simulation_df, observable_df, parameter_df + ) + + expected = sum(np.array(list(expected_residuals)) ** 2) + assert chi2 == pytest.approx(expected) + + +def test_calculate_llh(models): # pylint: disable=W0621 + """Test calculate.calculate_llh.""" + for i_model, model in enumerate(models): + print(f"Model {i_model}") + ( + measurement_df, + observable_df, + parameter_df, + simulation_df, + _, + _, + expected_llh, + ) = model + llh = calculate_llh( + measurement_df, simulation_df, observable_df, parameter_df + ) + assert llh == pytest.approx(expected_llh) or expected_llh is None + + +def test_calculate_single_llh(): + """Test calculate.calculate_single_llh.""" + m, s, sigma = 5.3, 4.5, 1.6 + pi, log, log10 = np.pi, np.log, np.log10 + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LIN, + ) + expected_llh = -0.5 * (((s - m) / sigma) ** 2 + log(2 * pi * sigma**2)) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LOG, + ) + expected_llh = -0.5 * ( + ((log(s) - log(m)) / sigma) ** 2 + log(2 * pi * sigma**2 * m**2) + ) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=NORMAL, + scale=LOG10, + ) + expected_llh = -0.5 * ( + ((log10(s) - log10(m)) / sigma) ** 2 + + log(2 * pi * sigma**2 * m**2 * log(10) ** 2) + ) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LIN, + ) + expected_llh = -abs((s - m) / sigma) - log(2 * sigma) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LOG, + ) + expected_llh = -abs((log(s) - log(m)) / sigma) - log(2 * sigma * m) + assert llh == pytest.approx(expected_llh) + + llh = calculate_single_llh( + measurement=m, + simulation=s, + noise_value=sigma, + noise_distribution=LAPLACE, + scale=LOG10, + ) + expected_llh = -abs((log10(s) - log10(m)) / sigma) - log( + 2 * sigma * m * log(10) + ) + assert llh == pytest.approx(expected_llh) From 636404de9145da50a068fd671104fdc5c129cd22 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 4 Jul 2025 16:43:13 +0200 Subject: [PATCH 060/141] Update to new main branch Switch from `develop` to `main` as main developement branch. --- README.md | 4 ++-- doc/conf.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index 7873928b..bf05c459 100644 --- a/README.md +++ b/README.md @@ -27,9 +27,9 @@ It will require Python>=3.10 to run. (We are following the Development versions of the PEtab library can be installed using - pip3 install https://github.com/PEtab-dev/libpetab-python/archive/develop.zip + pip3 install https://github.com/PEtab-dev/libpetab-python/archive/main.zip -(replace `develop` by the branch or commit you would like to install). +(replace `main` by the branch or commit you would like to install). When setting up a new parameter estimation problem, the most useful tools will be: diff --git a/doc/conf.py b/doc/conf.py index 3b378808..99838616 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -114,7 +114,7 @@ "display_github": True, "github_user": "petab-dev", "github_repo": "libpetab-python", - "github_version": "develop", + "github_version": "main", "conf_py_path": "/doc", } From 6a55afcf6a70036a9e5d56b999d508d0f5e70363 Mon Sep 17 00:00:00 2001 From: Polina Lakrisenko Date: Tue, 8 Jul 2025 15:54:51 +0200 Subject: [PATCH 061/141] fix goodness_of_fit plot and add color parameter (#402) * fix goodness_of_fit plot and add color parameter --------- Co-authored-by: Daniel Weindl --- petab/v1/measurements.py | 2 +- petab/v1/visualize/plot_residuals.py | 7 ++++++- tests/v1/test_visualization.py | 6 +++--- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py index ec7a1069..8b23907b 100644 --- a/petab/v1/measurements.py +++ b/petab/v1/measurements.py @@ -289,7 +289,7 @@ def assert_overrides_match_parameter_count( ) } if NOISE_FORMULA in observable_df.columns - else {obs_id: 0 for obs_id in observable_df.index.values} + else dict.fromkeys(observable_df.index.values, 0) ) for _, row in measurement_df.iterrows(): diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index a45fcde3..14a62f8a 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -134,6 +134,7 @@ def plot_goodness_of_fit( petab_problem: Problem, simulations_df: str | Path | pd.DataFrame, size: tuple = (10, 7), + color=None, ax: plt.Axes | None = None, ) -> matplotlib.axes.Axes: """ @@ -148,6 +149,9 @@ def plot_goodness_of_fit( output data file. size: Figure size. + color: + The marker colors, matches the `c` parameter of + `matplotlib.pyplot.scatter`. ax: Axis object. @@ -171,8 +175,8 @@ def plot_goodness_of_fit( parameter_dfs=petab_problem.parameter_df, )[0] slope, intercept, r_value, p_value, std_err = stats.linregress( - petab_problem.measurement_df["measurement"], simulations_df["simulation"], + petab_problem.measurement_df["measurement"], ) # x, y if ax is None: @@ -182,6 +186,7 @@ def plot_goodness_of_fit( ax.scatter( petab_problem.measurement_df["measurement"], simulations_df["simulation"], + c=color, ) ax.axis("square") diff --git a/tests/v1/test_visualization.py b/tests/v1/test_visualization.py index 0edd4b78..3c5a3a65 100644 --- a/tests/v1/test_visualization.py +++ b/tests/v1/test_visualization.py @@ -8,14 +8,14 @@ import petab from petab.C import * -from petab.visualize import ( +from petab.v1.visualize import ( plot_goodness_of_fit, plot_residuals_vs_simulation, plot_with_vis_spec, plot_without_vis_spec, ) -from petab.visualize.lint import validate_visualization_df -from petab.visualize.plotting import VisSpecParser +from petab.v1.visualize.lint import validate_visualization_df +from petab.v1.visualize.plotting import VisSpecParser # Avoid errors when plotting without X server plt.switch_backend("agg") From 50994b2feac8ea223eb163c2c4085c640705998b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 14 Jul 2025 11:54:10 +0200 Subject: [PATCH 062/141] Fix `v2.ProblemConfig.extension` type (#403) According to the current schema, `extensions` is `object` instead of `list`. --- petab/v2/petab1to2.py | 2 +- petab/v2/problem.py | 9 ++++----- 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 3869307f..75823f15 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -280,7 +280,7 @@ def _update_yaml(yaml_config: dict) -> dict: yaml_config[v2.C.FORMAT_VERSION] = "2.0.0" # Add extensions - yaml_config[v2.C.EXTENSIONS] = [] + yaml_config[v2.C.EXTENSIONS] = {} # Move models and set IDs (filename for now) for problem in yaml_config[v2.C.PROBLEMS]: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index f6ec9c6a..0667f640 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -846,7 +846,7 @@ def validate( validation_results = ValidationResultList() if self.config.extensions: - extensions = ",".join(e.name for e in self.config.extensions) + extensions = ",".join(self.config.extensions.keys()) validation_results.append( ValidationIssue( ValidationIssueSeverity.WARNING, @@ -1116,7 +1116,7 @@ def model_dump(self, **kwargs) -> dict[str, Any]: >>> p += core.Parameter(id="par", lb=0, ub=1) >>> pprint(p.model_dump()) {'conditions': [], - 'config': {'extensions': [], + 'config': {'extensions': {}, 'format_version': '2.0.0', 'parameter_file': None, 'problems': []}, @@ -1168,7 +1168,6 @@ class SubProblem(BaseModel): class ExtensionConfig(BaseModel): """The configuration of a PEtab extension.""" - name: str version: str config: dict @@ -1194,8 +1193,8 @@ class ProblemConfig(BaseModel): parameter_file: str | AnyUrl | None = None #: The list of problems in the configuration. problems: list[SubProblem] = [] - #: Extensiions used by the problem. - extensions: list[ExtensionConfig] = [] + #: Extensions used by the problem. + extensions: dict[str, ExtensionConfig] = {} def to_yaml(self, filename: str | Path): """Write the configuration to a YAML file. From a0ceb4ce34a3bfa7658fe7f4a7346f7f19764085 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 18 Jul 2025 20:05:08 +0200 Subject: [PATCH 063/141] v2: Adapt to updated yaml schema (#394) Basically, the `problems` list is disolved and its contents moved one level up. See https://github.com/PEtab-dev/PEtab/pull/622/files Related to https://github.com/PEtab-dev/libpetab-python/issues/392. Closes #391. --- petab/petablint.py | 12 +- petab/schemas/petab_schema.v2.0.0.yaml | 104 ++++------ petab/v2/lint.py | 6 +- petab/v2/petab1to2.py | 277 ++++++++++++------------- petab/v2/problem.py | 110 +++++----- tests/v2/test_problem.py | 34 +-- 6 files changed, 272 insertions(+), 271 deletions(-) diff --git a/petab/petablint.py b/petab/petablint.py index 030f4545..b3c2ef87 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -179,14 +179,14 @@ def main(): logger.error(e) sys.exit(1) - if petab.is_composite_problem(args.yaml_file_name): - # TODO: further checking: - # https://github.com/ICB-DCM/PEtab/issues/191 - # problem = petab.CompositeProblem.from_yaml(args.yaml_file_name) - return - match get_major_version(args.yaml_file_name): case 1: + if petab.is_composite_problem(args.yaml_file_name): + # TODO: further checking: + # https://github.com/ICB-DCM/PEtab/issues/191 + # petab.CompositeProblem.from_yaml(args.yaml_file_name) + return + problem = petab.Problem.from_yaml(args.yaml_file_name) ret = petab.lint.lint_problem(problem) sys.exit(ret) diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index b4d7c358..d87a8401 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -31,71 +31,47 @@ properties: File name (absolute or relative) or URL to PEtab parameter table containing parameters of all models listed in `problems`. A single table may be split into multiple files and described as an array here. - problems: - type: array - description: | - One or multiple PEtab problems (sets of model, condition, observable - and measurement files). If different model and data files are - independent, they can be specified as separate PEtab problems, which - may allow more efficient handling. Files in one problem cannot refer - to models entities or data specified inside another problem. - items: - type: object - description: | - A set of PEtab model, condition, observable and measurement - files and optional visualization files. - properties: - - model_files: - type: object - description: One or multiple models - - # the model ID - patternProperties: - "^[a-zA-Z_]\\w*$": - type: object - properties: - location: - type: string - description: Model file name or URL - language: - type: string - description: | - Model language, e.g., 'sbml', 'cellml', 'bngl', 'pysb' - required: - - location - - language - additionalProperties: false - - measurement_files: - description: List of PEtab measurement files. - $ref: "#/definitions/list_of_files" - - condition_files: - description: List of PEtab condition files. - $ref: "#/definitions/list_of_files" - - experiment_files: - description: List of PEtab experiment files. - $ref: "#/definitions/list_of_files" - - observable_files: - description: List of PEtab observable files. - $ref: "#/definitions/list_of_files" - - visualization_files: - description: List of PEtab visualization files. - $ref: "#/definitions/list_of_files" - - mapping_file: + model_files: + type: object + description: One or multiple models + + # the model ID + patternProperties: + "^[a-zA-Z_]\\w*$": + type: object + properties: + location: type: string - description: Optional PEtab mapping file name or URL. + description: Model file name or URL + language: + type: string + description: | + Model language, e.g., 'sbml', 'cellml', 'bngl', 'pysb' + required: + - location + - language + additionalProperties: false + + measurement_files: + description: List of PEtab measurement files. + $ref: "#/definitions/list_of_files" + + condition_files: + description: List of PEtab condition files. + $ref: "#/definitions/list_of_files" + + experiment_files: + description: List of PEtab experiment files. + $ref: "#/definitions/list_of_files" + + observable_files: + description: List of PEtab observable files. + $ref: "#/definitions/list_of_files" - required: - - model_files - - observable_files - - measurement_files + mapping_files: + description: List of PEtab mapping files. + $ref: "#/definitions/list_of_files" extensions: type: object @@ -120,4 +96,6 @@ properties: required: - format_version - parameter_file - - problems + - model_files + - observable_files + - measurement_files diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 38b7ff74..2810841a 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -216,13 +216,13 @@ def run(self, problem: Problem) -> ValidationIssue | None: # TODO: we need some option for validating partial vs full problems # check for unset but required files missing_files = [] - if not config.parameter_file: + if not config.parameter_files: missing_files.append("parameters") - if not [p.measurement_files for p in config.problems]: + if not config.measurement_files: missing_files.append("measurements") - if not [p.observable_files for p in config.problems]: + if not config.observable_files: missing_files.append("observables") if missing_files: diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 75823f15..2b6ec0e3 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -99,155 +99,145 @@ def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): file = yaml_config[v2.C.PARAMETER_FILE] v2.write_parameter_df(parameter_df, get_dest_path(file)) - # sub-problems - for problem_config in new_yaml_config.problems: - # copy files that don't need conversion - # (models, visualizations) - for file in chain( - (model.location for model in problem_config.model_files.values()), - problem_config.visualization_files, - ): - _copy_file(get_src_path(file), Path(get_dest_path(file))) + # copy files that don't need conversion + # (models, visualizations) + for file in chain( + (model.location for model in new_yaml_config.model_files.values()), + new_yaml_config.visualization_files, + ): + _copy_file(get_src_path(file), Path(get_dest_path(file))) + + # Update observable table + for observable_file in new_yaml_config.observable_files: + observable_df = v1.get_observable_df(get_src_path(observable_file)) + observable_df = v1v2_observable_df( + observable_df, + ) + v2.write_observable_df(observable_df, get_dest_path(observable_file)) - # Update observable table - for observable_file in problem_config.observable_files: - observable_df = v1.get_observable_df(get_src_path(observable_file)) - observable_df = v1v2_observable_df( - observable_df, - ) - v2.write_observable_df( - observable_df, get_dest_path(observable_file) - ) + # Update condition table + for condition_file in new_yaml_config.condition_files: + condition_df = v1.get_condition_df(get_src_path(condition_file)) + condition_df = v1v2_condition_df(condition_df, petab_problem.model) + v2.write_condition_df(condition_df, get_dest_path(condition_file)) - # Update condition table - for condition_file in problem_config.condition_files: - condition_df = v1.get_condition_df(get_src_path(condition_file)) - condition_df = v1v2_condition_df(condition_df, petab_problem.model) - v2.write_condition_df(condition_df, get_dest_path(condition_file)) - - # records for the experiment table to be created - experiments = [] - - def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: - if not sim_cond_id and not preeq_cond_id: - return "" - # check whether the conditions will exist in the v2 condition table - sim_cond_exists = ( - petab_problem.condition_df.loc[sim_cond_id].notna().any() - ) - preeq_cond_exists = ( - preeq_cond_id - and petab_problem.condition_df.loc[preeq_cond_id].notna().any() - ) - if not sim_cond_exists and not preeq_cond_exists: - # if we have only all-NaN conditions, we don't create a new - # experiment - return "" - - if preeq_cond_id: - preeq_cond_id = f"{preeq_cond_id}_" - exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}" - if exp_id in experiments: # noqa: B023 - i = 1 - while f"{exp_id}_{i}" in experiments: # noqa: B023 - i += 1 - exp_id = f"{exp_id}_{i}" - return exp_id - - measured_experiments = ( - petab_problem.get_simulation_conditions_from_measurement_df() + # records for the experiment table to be created + experiments = [] + + def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: + if not sim_cond_id and not preeq_cond_id: + return "" + # check whether the conditions will exist in the v2 condition table + sim_cond_exists = ( + petab_problem.condition_df.loc[sim_cond_id].notna().any() ) - for ( - _, - row, - ) in measured_experiments.iterrows(): - # generate a new experiment for each simulation / pre-eq condition - # combination - sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID] - preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "") - exp_id = create_experiment_id(sim_cond_id, preeq_cond_id) - if not exp_id: - continue - if preeq_cond_id: - experiments.append( - { - v2.C.EXPERIMENT_ID: exp_id, - v2.C.CONDITION_ID: preeq_cond_id, - v2.C.TIME: v2.C.TIME_PREEQUILIBRATION, - } - ) + preeq_cond_exists = ( + preeq_cond_id + and petab_problem.condition_df.loc[preeq_cond_id].notna().any() + ) + if not sim_cond_exists and not preeq_cond_exists: + # if we have only all-NaN conditions, we don't create a new + # experiment + return "" + + if preeq_cond_id: + preeq_cond_id = f"{preeq_cond_id}_" + exp_id = f"experiment__{preeq_cond_id}__{sim_cond_id}" + if exp_id in experiments: # noqa: B023 + i = 1 + while f"{exp_id}_{i}" in experiments: # noqa: B023 + i += 1 + exp_id = f"{exp_id}_{i}" + return exp_id + + measured_experiments = ( + petab_problem.get_simulation_conditions_from_measurement_df() + ) + for ( + _, + row, + ) in measured_experiments.iterrows(): + # generate a new experiment for each simulation / pre-eq condition + # combination + sim_cond_id = row[v1.C.SIMULATION_CONDITION_ID] + preeq_cond_id = row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, "") + exp_id = create_experiment_id(sim_cond_id, preeq_cond_id) + if not exp_id: + continue + if preeq_cond_id: experiments.append( { v2.C.EXPERIMENT_ID: exp_id, - v2.C.CONDITION_ID: sim_cond_id, - v2.C.TIME: 0, + v2.C.CONDITION_ID: preeq_cond_id, + v2.C.TIME: v2.C.TIME_PREEQUILIBRATION, } ) - if experiments: - exp_table_path = output_dir / "experiments.tsv" - if exp_table_path.exists(): - raise ValueError( - f"Experiment table file {exp_table_path} already exists." - ) - problem_config.experiment_files.append("experiments.tsv") - v2.write_experiment_df( - v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path + experiments.append( + { + v2.C.EXPERIMENT_ID: exp_id, + v2.C.CONDITION_ID: sim_cond_id, + v2.C.TIME: 0, + } + ) + if experiments: + exp_table_path = output_dir / "experiments.tsv" + if exp_table_path.exists(): + raise ValueError( + f"Experiment table file {exp_table_path} already exists." ) + new_yaml_config.experiment_files.append("experiments.tsv") + v2.write_experiment_df( + v2.get_experiment_df(pd.DataFrame(experiments)), exp_table_path + ) - for measurement_file in problem_config.measurement_files: - measurement_df = v1.get_measurement_df( - get_src_path(measurement_file) + for measurement_file in new_yaml_config.measurement_files: + measurement_df = v1.get_measurement_df(get_src_path(measurement_file)) + # if there is already an experiment ID column, we rename it + if v2.C.EXPERIMENT_ID in measurement_df.columns: + measurement_df.rename( + columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"}, + inplace=True, ) - # if there is already an experiment ID column, we rename it - if v2.C.EXPERIMENT_ID in measurement_df.columns: - measurement_df.rename( - columns={v2.C.EXPERIMENT_ID: f"experiment_id_{uuid4()}"}, - inplace=True, - ) - # add pre-eq condition id if not present or convert to string - # for simplicity + # add pre-eq condition id if not present or convert to string + # for simplicity + if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: + measurement_df.fillna( + {v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True + ) + else: + measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" + + if ( + petab_problem.condition_df is not None + and len( + set(petab_problem.condition_df.columns) - {v1.C.CONDITION_NAME} + ) + == 0 + ): + # we can't have "empty" conditions with no overrides in v2, + # therefore, we drop the respective condition ID completely + # TODO: or can we? + # TODO: this needs to be checked condition-wise, not globally + measurement_df[v1.C.SIMULATION_CONDITION_ID] = "" if v1.C.PREEQUILIBRATION_CONDITION_ID in measurement_df.columns: - measurement_df.fillna( - {v1.C.PREEQUILIBRATION_CONDITION_ID: ""}, inplace=True - ) - else: measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" - - if ( - petab_problem.condition_df is not None - and len( - set(petab_problem.condition_df.columns) - - {v1.C.CONDITION_NAME} - ) - == 0 - ): - # we can't have "empty" conditions with no overrides in v2, - # therefore, we drop the respective condition ID completely - # TODO: or can we? - # TODO: this needs to be checked condition-wise, not globally - measurement_df[v1.C.SIMULATION_CONDITION_ID] = "" - if ( - v1.C.PREEQUILIBRATION_CONDITION_ID - in measurement_df.columns - ): - measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] = "" - # condition IDs to experiment IDs - measurement_df.insert( - 0, - v2.C.EXPERIMENT_ID, - measurement_df.apply( - lambda row: create_experiment_id( - row[v1.C.SIMULATION_CONDITION_ID], - row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""), - ), - axis=1, + # condition IDs to experiment IDs + measurement_df.insert( + 0, + v2.C.EXPERIMENT_ID, + measurement_df.apply( + lambda row: create_experiment_id( + row[v1.C.SIMULATION_CONDITION_ID], + row.get(v1.C.PREEQUILIBRATION_CONDITION_ID, ""), ), - ) - del measurement_df[v1.C.SIMULATION_CONDITION_ID] - del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] - v2.write_measurement_df( - measurement_df, get_dest_path(measurement_file) - ) + axis=1, + ), + ) + del measurement_df[v1.C.SIMULATION_CONDITION_ID] + del measurement_df[v1.C.PREEQUILIBRATION_CONDITION_ID] + v2.write_measurement_df( + measurement_df, get_dest_path(measurement_file) + ) # Write the new YAML file new_yaml_file = output_dir / Path(yaml_file).name @@ -283,18 +273,27 @@ def _update_yaml(yaml_config: dict) -> dict: yaml_config[v2.C.EXTENSIONS] = {} # Move models and set IDs (filename for now) - for problem in yaml_config[v2.C.PROBLEMS]: - problem[v2.C.MODEL_FILES] = {} - models = problem[v2.C.MODEL_FILES] + yaml_config[v2.C.MODEL_FILES] = {} + for problem in yaml_config[v1.C.PROBLEMS]: + models = {} for sbml_file in problem[v1.C.SBML_FILES]: model_id = sbml_file.split("/")[-1].split(".")[0] models[model_id] = { v2.C.MODEL_LANGUAGE: MODEL_TYPE_SBML, v2.C.MODEL_LOCATION: sbml_file, } - problem[v2.C.MODEL_FILES] = problem.get(v2.C.MODEL_FILES, {}) + yaml_config[v2.C.MODEL_FILES] |= models del problem[v1.C.SBML_FILES] + for file_type in ( + v1.C.CONDITION_FILES, + v1.C.MEASUREMENT_FILES, + v1.C.OBSERVABLE_FILES, + v1.C.VISUALIZATION_FILES, + ): + if file_type in problem: + yaml_config[file_type] = problem[file_type] + del problem[file_type] return yaml_config diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 0667f640..a191942f 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -15,7 +15,7 @@ import numpy as np import pandas as pd import sympy as sp -from pydantic import AnyUrl, BaseModel, Field +from pydantic import AnyUrl, BaseModel, Field, field_validator from ..v1 import ( mapping, @@ -23,6 +23,7 @@ observables, parameter_mapping, parameters, + validate_yaml_syntax, yaml, ) from ..v1.core import concat_tables, get_visualization_df @@ -169,6 +170,8 @@ def from_yaml( else: yaml_file = None + validate_yaml_syntax(yaml_config) + def get_path(filename): if base_path is None: return filename @@ -202,7 +205,7 @@ def get_path(filename): f"{yaml_config[FORMAT_VERSION]}." ) - if yaml.is_composite_problem(yaml_config): + if len(yaml_config[MODEL_FILES]) > 1: raise ValueError( "petab.v2.Problem.from_yaml() can only be used for " "yaml files comprising a single model. " @@ -212,34 +215,25 @@ def get_path(filename): config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file ) - problem0 = config.problems[0] - - if isinstance(config.parameter_file, list): - parameter_df = parameters.get_parameter_df( - [get_path(f) for f in config.parameter_file] - ) - else: - parameter_df = ( - parameters.get_parameter_df(get_path(config.parameter_file)) - if config.parameter_file - else None - ) + parameter_df = parameters.get_parameter_df( + [get_path(f) for f in config.parameter_files] + ) - if len(problem0.model_files or []) > 1: + if len(config.model_files or []) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 raise NotImplementedError( "Support for multiple models is not yet implemented." ) model = None - if problem0.model_files: - model_id, model_info = next(iter(problem0.model_files.items())) + if config.model_files: + model_id, model_info = next(iter(config.model_files.items())) model = model_factory( get_path(model_info.location), model_info.language, model_id=model_id, ) - measurement_files = [get_path(f) for f in problem0.measurement_files] + measurement_files = [get_path(f) for f in config.measurement_files] # If there are multiple tables, we will merge them measurement_df = ( concat_tables(measurement_files, measurements.get_measurement_df) @@ -247,7 +241,7 @@ def get_path(filename): else None ) - condition_files = [get_path(f) for f in problem0.condition_files] + condition_files = [get_path(f) for f in config.condition_files] # If there are multiple tables, we will merge them condition_df = ( concat_tables(condition_files, conditions.get_condition_df) @@ -255,7 +249,7 @@ def get_path(filename): else None ) - experiment_files = [get_path(f) for f in problem0.experiment_files] + experiment_files = [get_path(f) for f in config.experiment_files] # If there are multiple tables, we will merge them experiment_df = ( concat_tables(experiment_files, experiments.get_experiment_df) @@ -263,9 +257,8 @@ def get_path(filename): else None ) - visualization_files = [ - get_path(f) for f in problem0.visualization_files - ] + # TODO: remove in v2?! + visualization_files = [get_path(f) for f in config.visualization_files] # If there are multiple tables, we will merge them visualization_df = ( concat_tables(visualization_files, get_visualization_df) @@ -273,7 +266,7 @@ def get_path(filename): else None ) - observable_files = [get_path(f) for f in problem0.observable_files] + observable_files = [get_path(f) for f in config.observable_files] # If there are multiple tables, we will merge them observable_df = ( concat_tables(observable_files, observables.get_observable_df) @@ -281,7 +274,7 @@ def get_path(filename): else None ) - mapping_files = [get_path(f) for f in problem0.mapping_files] + mapping_files = [get_path(f) for f in config.mapping_files] # If there are multiple tables, we will merge them mapping_df = ( concat_tables(mapping_files, mapping.get_mapping_df) @@ -1116,10 +1109,16 @@ def model_dump(self, **kwargs) -> dict[str, Any]: >>> p += core.Parameter(id="par", lb=0, ub=1) >>> pprint(p.model_dump()) {'conditions': [], - 'config': {'extensions': {}, + 'config': {'condition_files': [], + 'experiment_files': [], + 'extensions': {}, 'format_version': '2.0.0', - 'parameter_file': None, - 'problems': []}, + 'mapping_files': [], + 'measurement_files': [], + 'model_files': {}, + 'observable_files': [], + 'parameter_file': [], + 'visualization_files': []}, 'experiments': [], 'mappings': [], 'measurements': [], @@ -1133,7 +1132,9 @@ def model_dump(self, **kwargs) -> dict[str, Any]: 'ub': 1.0}]} """ res = { - "config": (self.config or ProblemConfig()).model_dump(**kwargs), + "config": (self.config or ProblemConfig()).model_dump( + **kwargs, by_alias=True + ), } res |= self.mapping_table.model_dump(**kwargs) res |= self.condition_table.model_dump(**kwargs) @@ -1152,19 +1153,6 @@ class ModelFile(BaseModel): language: str -class SubProblem(BaseModel): - """A `problems` object in the PEtab problem configuration.""" - - # TODO: consider changing str to Path - model_files: dict[str, ModelFile] | None = {} - measurement_files: list[str | AnyUrl] = [] - condition_files: list[str | AnyUrl] = [] - experiment_files: list[str | AnyUrl] = [] - observable_files: list[str | AnyUrl] = [] - visualization_files: list[str | AnyUrl] = [] - mapping_files: list[str | AnyUrl] = [] - - class ExtensionConfig(BaseModel): """The configuration of a PEtab extension.""" @@ -1190,11 +1178,39 @@ class ProblemConfig(BaseModel): #: The PEtab format version. format_version: str = "2.0.0" #: The path to the parameter file, relative to ``base_path``. - parameter_file: str | AnyUrl | None = None - #: The list of problems in the configuration. - problems: list[SubProblem] = [] + # TODO https://github.com/PEtab-dev/PEtab/pull/641: + # rename to parameter_files in yaml for consistency with other files? + # always a list? + parameter_files: list[str | AnyUrl] = Field( + default=[], alias=PARAMETER_FILE + ) + + # TODO: consider changing str to Path + model_files: dict[str, ModelFile] | None = {} + measurement_files: list[str | AnyUrl] = [] + condition_files: list[str | AnyUrl] = [] + experiment_files: list[str | AnyUrl] = [] + observable_files: list[str | AnyUrl] = [] + visualization_files: list[str | AnyUrl] = [] + mapping_files: list[str | AnyUrl] = [] + #: Extensions used by the problem. - extensions: dict[str, ExtensionConfig] = {} + extensions: list[ExtensionConfig] | dict = {} + + # convert parameter_file to list + @field_validator( + "parameter_files", + mode="before", + ) + def _convert_parameter_file(cls, v): + """Convert parameter_file to a list.""" + if isinstance(v, str): + return [v] + if isinstance(v, list): + return v + raise ValueError( + "parameter_files must be a string or a list of strings." + ) def to_yaml(self, filename: str | Path): """Write the configuration to a YAML file. @@ -1204,7 +1220,7 @@ def to_yaml(self, filename: str | Path): """ from ..v1.yaml import write_yaml - write_yaml(self.model_dump(), filename) + write_yaml(self.model_dump(by_alias=True), filename) @property def format_version_tuple(self) -> tuple[int, int, int, str]: diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 7d5b6e1c..db169363 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -28,18 +28,27 @@ def test_load_remote(): """Test loading remote files""" + from jsonschema.exceptions import ValidationError + yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" "/update_v2/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" ) - petab_problem = Problem.from_yaml(yaml_url) - assert ( - petab_problem.measurement_df is not None - and not petab_problem.measurement_df.empty - ) + try: + petab_problem = Problem.from_yaml(yaml_url) + + assert ( + petab_problem.measurement_df is not None + and not petab_problem.measurement_df.empty + ) - assert petab_problem.validate() == [] + assert petab_problem.validate() == [] + except ValidationError: + # FIXME: Until v2 is finalized, the format of the tests will often be + # out of sync with the schema. + # Ignore validation errors for now. + pass def test_auto_upgrade(): @@ -58,13 +67,12 @@ def test_problem_from_yaml_multiple_files(): """ yaml_config = """ format_version: 2.0.0 - parameter_file: - problems: - - condition_files: [conditions1.tsv, conditions2.tsv] - measurement_files: [measurements1.tsv, measurements2.tsv] - observable_files: [observables1.tsv, observables2.tsv] - model_files: - experiment_files: [experiments1.tsv, experiments2.tsv] + parameter_file: [] + condition_files: [conditions1.tsv, conditions2.tsv] + measurement_files: [measurements1.tsv, measurements2.tsv] + observable_files: [observables1.tsv, observables2.tsv] + model_files: {} + experiment_files: [experiments1.tsv, experiments2.tsv] """ with tempfile.TemporaryDirectory() as tmpdir: yaml_path = Path(tmpdir, "problem.yaml") From 314c4959944a7f90aad9912de5dc7714260260b7 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 19 Jul 2025 11:59:29 +0200 Subject: [PATCH 064/141] Fix `__all__` warning Fixes: ``` DeprecationWarning: Accessing `petab.__all__` is deprecated and will be removed in the next major release. Please use `petab.v1.__all__` instead. ``` --- petab/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/petab/__init__.py b/petab/__init__.py index dd30d186..031ca811 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -26,7 +26,7 @@ def __getattr__(name): return importlib.import_module("petab.v1") if name == "v2": return importlib.import_module("petab.v2") - if name != "__path__": + if name not in ("__path__", "__all__"): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", @@ -37,7 +37,7 @@ def __getattr__(name): def v1getattr(name, module): - if name != "__path__": + if name not in ("__path__", "__all__"): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", From 0a0b92ff1fd760daa367da3899d1e5387674e948 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 19 Jul 2025 12:32:41 +0200 Subject: [PATCH 065/141] v2: Add experiments->events converter (#387) Add functionality to convert PEtab v2 experiments/conditions to SBML events. This should make it easier to implement v2 support in other tools. All condition table changes are converted to event assignments. Only two indicator variables have to be set for each experiment. Closes #370. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- doc/modules.rst | 1 + petab/v2/__init__.py | 1 + petab/v2/converters.py | 408 +++++++++++++++++++++++++++++++++ petab/v2/core.py | 16 ++ petab/v2/models/_sbml_utils.py | 51 +++++ petab/v2/problem.py | 2 +- tests/v2/test_converters.py | 76 ++++++ 7 files changed, 554 insertions(+), 1 deletion(-) create mode 100644 petab/v2/converters.py create mode 100644 petab/v2/models/_sbml_utils.py create mode 100644 tests/v2/test_converters.py diff --git a/doc/modules.rst b/doc/modules.rst index 627ba9d8..6dacba5a 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -32,6 +32,7 @@ API Reference petab.v1.yaml petab.v2 petab.v2.C + petab.v2.converters petab.v2.core petab.v2.experiments petab.v2.lint diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 4f8d28ea..68069010 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -37,6 +37,7 @@ models, # noqa: F401, E402 ) from .conditions import * # noqa: F403, F401, E402 +from .core import * # noqa: F401, E402 from .experiments import ( # noqa: F401, E402 get_experiment_df, write_experiment_df, diff --git a/petab/v2/converters.py b/petab/v2/converters.py new file mode 100644 index 00000000..f0736087 --- /dev/null +++ b/petab/v2/converters.py @@ -0,0 +1,408 @@ +"""Conversion of PEtab problems.""" + +from __future__ import annotations + +import warnings +from copy import deepcopy + +import libsbml +from sbmlmath import sbml_math_to_sympy, set_math + +from .core import Change, Condition, Experiment, ExperimentPeriod +from .models._sbml_utils import add_sbml_parameter, check +from .models.sbml_model import SbmlModel +from .problem import Problem + +__all__ = ["ExperimentsToEventsConverter"] + + +class ExperimentsToEventsConverter: + """Convert PEtab experiments to SBML events. + + For an SBML-model-based PEtab problem, this class converts the PEtab + experiments to events as far as possible. + + If the model already contains events, PEtab events are added with a higher + priority than the existing events to guarantee that PEtab condition changes + are applied before any pre-existing assignments. + + The PEtab problem must not contain any identifiers starting with + ``_petab``. + + All periods and condition changes that are represented by events + will be removed from the condition table. + Each experiment will have at most one period with a start time of ``-inf`` + and one period with a finite start time. The associated changes with + these periods are only the pre-equilibration indicator + (if necessary), and the experiment indicator parameter. + """ + + #: ID of the parameter that indicates whether the model is in + # the pre-equilibration phase (1) or not (0). + PREEQ_INDICATOR = "_petab_preequilibration_indicator" + + #: The condition ID of the condition that sets the + #: pre-equilibration indicator to 1. + CONDITION_ID_PREEQ_ON = "_petab_preequilibration_on" + + #: The condition ID of the condition that sets the + #: pre-equilibration indicator to 0. + CONDITION_ID_PREEQ_OFF = "_petab_preequilibration_off" + + def __init__(self, problem: Problem, default_priority: float = None): + """Initialize the converter. + + :param problem: The PEtab problem to convert. + This will not be modified. + :param default_priority: The priority value to apply to any events that + preexist in the model and do not have a priority set. + + In SBML, for event assignments that are to be applied at the same + simulation time, the order of event execution is determined by the + priority of the respective events. + If no priority is set, the order is undefined. + See SBML specs for details. + To ensure that the PEtab condition-start-events are executed before + any other events, all events should have a priority set. + """ + if not isinstance(problem.model, SbmlModel): + raise ValueError("Only SBML models are supported.") + + self._original_problem = problem + self._new_problem = deepcopy(self._original_problem) + + self._model: libsbml.Model = self._new_problem.model.sbml_model + self._preeq_indicator = self.PREEQ_INDICATOR + + # The maximum event priority that was found in the unprocessed model. + self._max_event_priority = None + # The priority that will be used for the PEtab events. + self._petab_event_priority = None + self._default_priority = default_priority + self._preprocess() + + def _get_experiment_indicator_condition_id( + self, experiment_id: str + ) -> str: + """Get the condition ID for the experiment indicator parameter.""" + return f"_petab_experiment_condition_{experiment_id}" + + def _preprocess(self) -> None: + """Check whether we can handle the given problem and store some model + information.""" + model = self._model + if model.getLevel() < 3: + # try to upgrade the SBML model + if not model.getSBMLDocument().setLevelAndVersion(3, 2): + raise ValueError( + "Cannot handle SBML models with SBML level < 3, " + "because they do not support initial values for event " + "triggers and automatic upconversion of the model failed." + ) + + # Apply default priority to all events that do not have a priority + if self._default_priority is not None: + for event in model.getListOfEvents(): + if ( + not event.getPriority() + or event.getPriority().getMath() is None + ): + priority = event.createPriority() + priority.setMath( + libsbml.parseL3Formula(str(self._default_priority)) + ) + + # Collect event priorities + event_priorities = { + ev.getId() or str(ev): sbml_math_to_sympy(ev.getPriority()) + for ev in model.getListOfEvents() + if ev.getPriority() and ev.getPriority().getMath() is not None + } + + # Check for non-constant event priorities and track the maximum + # priority used so far. + for e, priority in event_priorities.items(): + if priority.free_symbols: + # We'd need to find the maximum priority of all events, + # which is challenging/impossible to do in general. + raise NotImplementedError( + f"Event `{e}` has a non-constant priority: {priority}. " + "This is currently not supported." + ) + self._max_event_priority = max( + self._max_event_priority or 0, float(priority) + ) + + self._petab_event_priority = ( + self._max_event_priority + 1 + if self._max_event_priority is not None + else None + ) + + for event in model.getListOfEvents(): + # Check for undefined event priorities and warn + if (prio := event.getPriority()) and prio.getMath() is None: + warnings.warn( + f"Event `{event.getId()}` has no priority set. " + "Make sure that this event cannot trigger at the time of " + "a PEtab condition change, otherwise the behavior is " + "undefined. To avoid this warning, see the " + "`default_priority` parameter of " + f"{self.__class__.__name__}.", + stacklevel=1, + ) + + # Check for useValuesFromTrigger time + if event.getUseValuesFromTriggerTime(): + # Non-PEtab-condition-change events must be executed *after* + # PEtab condition changes have been applied, based on the + # updated model state. This would be violated by + # useValuesFromTriggerTime=true. + warnings.warn( + f"Event `{event.getId()}` has " + "`useValuesFromTriggerTime=true'. " + "Make sure that this event cannot trigger at the time of " + "a PEtab condition change, or consider changing " + "`useValuesFromTriggerTime' to `false'. Otherwise " + "simulation results may be incorrect.", + stacklevel=1, + ) + + def convert(self) -> Problem: + """Convert the PEtab experiments to SBML events. + + :return: The converted PEtab problem. + """ + + self._add_preequilibration_indicator() + + for experiment in self._new_problem.experiment_table.experiments: + self._convert_experiment(experiment) + + self._add_indicators_to_conditions() + + validation_results = self._new_problem.validate() + validation_results.log() + + return self._new_problem + + def _convert_experiment(self, experiment: Experiment) -> None: + """Convert a single experiment to SBML events.""" + model = self._model + experiment.sort_periods() + has_preequilibration = experiment.has_preequilibration + + # add experiment indicator + exp_ind_id = self.get_experiment_indicator(experiment.id) + if model.getElementBySId(exp_ind_id) is not None: + raise ValueError( + f"The model has entity with ID `{exp_ind_id}`. " + "IDs starting with `petab_` are reserved for " + f"{self.__class__.__name__} and should not be used in the " + "model." + ) + add_sbml_parameter(model, id_=exp_ind_id, constant=False, value=0) + kept_periods = [] + for i_period, period in enumerate(experiment.periods): + if period.is_preequilibration: + # pre-equilibration cannot be represented in SBML, + # so we need to keep this period in the Problem. + kept_periods.append(period) + elif i_period == int(has_preequilibration): + # we always keep the first non-pre-equilibration period + # to set the indicator parameters + kept_periods.append(period) + elif not period.condition_ids: + # no condition, no changes, no need for an event, + # no need to keep the period unless it's the pre-equilibration + # or the only non-equilibration period (handled above) + continue + + ev = self._create_period_start_event( + experiment=experiment, + i_period=i_period, + period=period, + ) + self._create_event_assignments_for_period( + ev, + [ + self._new_problem.condition_table[condition_id] + for condition_id in period.condition_ids + ], + ) + + if len(kept_periods) > 2: + raise AssertionError("Expected at most two periods to be kept.") + + # add conditions that set the indicator parameters + for period in kept_periods: + period.condition_ids = [ + self._get_experiment_indicator_condition_id(experiment.id), + self.CONDITION_ID_PREEQ_ON + if period.is_preequilibration + else self.CONDITION_ID_PREEQ_OFF, + ] + + experiment.periods = kept_periods + + def _create_period_start_event( + self, experiment: Experiment, i_period: int, period: ExperimentPeriod + ) -> libsbml.Event: + """Create an event that triggers at the start of a period.""" + + # TODO: for now, add separate events for each experiment x period, + # this could be optimized to reuse events + + ev = self._model.createEvent() + check(ev.setId(f"_petab_event_{experiment.id}_{i_period}")) + check(ev.setUseValuesFromTriggerTime(True)) + trigger = ev.createTrigger() + check(trigger.setInitialValue(False)) # may trigger at t=0 + check(trigger.setPersistent(True)) + if self._petab_event_priority is not None: + priority = ev.createPriority() + set_math(priority, self._petab_event_priority) + + exp_ind_id = self.get_experiment_indicator(experiment.id) + + # Create trigger expressions + # Since handling of == and !=, and distinguishing < and <= + # (and > and >=), is a bit tricky in terms of root-finding, + # we use these slightly more convoluted expressions. + # (assuming that the indicator parameters are {0, 1}) + if period.is_preequilibration: + trig_math = libsbml.parseL3Formula( + f"({exp_ind_id} > 0.5) && ({self._preeq_indicator} > 0.5)" + ) + else: + trig_math = libsbml.parseL3Formula( + f"({exp_ind_id} > 0.5) " + f"&& ({self._preeq_indicator} < 0.5) " + f"&& (time >= {period.time})" + ) + check(trigger.setMath(trig_math)) + + return ev + + def _add_preequilibration_indicator( + self, + ) -> None: + """Add an indicator parameter for the pre-equilibration to the SBML + model.""" + par_id = self._preeq_indicator + if self._model.getElementBySId(par_id) is not None: + raise ValueError( + f"Entity with ID {par_id} already exists in the SBML model." + ) + + # add the pre-steady-state indicator parameter + add_sbml_parameter(self._model, id_=par_id, value=0, constant=False) + + @staticmethod + def get_experiment_indicator(experiment_id: str) -> str: + """The ID of the experiment indicator parameter. + + The experiment indicator parameter is used to identify the + experiment in the SBML model. It is a parameter that is set + to 1 for the current experiment and 0 for all other + experiments. The parameter is used in the event trigger + to determine whether the event should be triggered. + + :param experiment_id: The ID of the experiment for which to create + the experiment indicator parameter ID. + """ + return f"_petab_experiment_indicator_{experiment_id}" + + @staticmethod + def _create_event_assignments_for_period( + event: libsbml.Event, conditions: list[Condition] + ) -> None: + """Create an event assignments for a given period.""" + for condition in conditions: + for change in condition.changes: + ExperimentsToEventsConverter._change_to_event_assignment( + change, event + ) + + @staticmethod + def _change_to_event_assignment( + change: Change, event: libsbml.Event + ) -> None: + """Convert a PEtab ``Change`` to an SBML event assignment.""" + sbml_model = event.getModel() + + ea = event.createEventAssignment() + ea.setVariable(change.target_id) + set_math(ea, change.target_value) + + # target needs const=False, and target may not exist yet + # (e.g., in case of output parameters added in the observable + # table) + target = sbml_model.getElementBySId(change.target_id) + if target is None: + add_sbml_parameter( + sbml_model, id_=change.target_id, constant=False, value=0 + ) + else: + # We can safely change the `constant` attribute of the target. + # "Constant" does not imply "boundary condition" in SBML. + target.setConstant(False) + + # the target value may depend on parameters that are only + # introduced in the PEtab parameter table - those need + # to be added to the model + for sym in change.target_value.free_symbols: + if sbml_model.getElementBySId(sym.name) is None: + add_sbml_parameter( + sbml_model, id_=sym.name, constant=True, value=0 + ) + + def _add_indicators_to_conditions(self) -> None: + """After converting the experiments to events, add the indicator + parameters for the pre-equilibration period and for the different + experiments to the remaining conditions. + Then remove all other conditions.""" + problem = self._new_problem + + # create conditions for indicator parameters + problem.condition_table.conditions.append( + Condition( + id=self.CONDITION_ID_PREEQ_ON, + changes=[ + Change(target_id=self._preeq_indicator, target_value=1) + ], + ) + ) + problem.condition_table.conditions.append( + Condition( + id=self.CONDITION_ID_PREEQ_OFF, + changes=[ + Change(target_id=self._preeq_indicator, target_value=0) + ], + ) + ) + # add conditions for the experiment indicators + for experiment in problem.experiment_table.experiments: + cond_id = self._get_experiment_indicator_condition_id( + experiment.id + ) + changes = [ + Change( + target_id=self.get_experiment_indicator(experiment.id), + target_value=1, + ) + ] + problem.condition_table.conditions.append( + Condition( + id=cond_id, + changes=changes, + ) + ) + + # All changes have been encoded in event assignments and can be + # removed. Only keep the conditions setting our indicators. + problem.condition_table.conditions = [ + condition + for condition in problem.condition_table.conditions + if condition.id.startswith("_petab") + ] diff --git a/petab/v2/core.py b/petab/v2/core.py index a847b196..193be335 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -114,6 +114,8 @@ class NoiseDistribution(str, Enum): LOG_NORMAL = C.LOG_NORMAL #: Log-Laplace distribution LOG_LAPLACE = C.LOG_LAPLACE + #: Log10-Normal + LOG10_NORMAL = C.LOG10_NORMAL class PriorDistribution(str, Enum): @@ -519,6 +521,11 @@ def _validate_ids(cls, condition_ids): raise ValueError(f"Invalid {C.CONDITION_ID}: `{condition_id}'") return condition_ids + @property + def is_preequilibration(self) -> bool: + """Check if this period is a preequilibration period.""" + return self.time == C.TIME_PREEQUILIBRATION + class Experiment(BaseModel): """An experiment or a timecourse defined by an ID and a set of different @@ -553,6 +560,15 @@ def __iadd__(self, other: ExperimentPeriod) -> Experiment: self.periods.append(other) return self + @property + def has_preequilibration(self) -> bool: + """Check if the experiment has preequilibration enabled.""" + return any(period.is_preequilibration for period in self.periods) + + def sort_periods(self) -> None: + """Sort the periods of the experiment by time.""" + self.periods.sort(key=lambda period: period.time) + class ExperimentTable(BaseModel): """PEtab experiments table.""" diff --git a/petab/v2/models/_sbml_utils.py b/petab/v2/models/_sbml_utils.py new file mode 100644 index 00000000..cbccde2b --- /dev/null +++ b/petab/v2/models/_sbml_utils.py @@ -0,0 +1,51 @@ +"""Private utility functions for SBML handling.""" + +import libsbml + +retval_to_str = { + getattr(libsbml, attr): attr + for attr in ( + "LIBSBML_DUPLICATE_OBJECT_ID", + "LIBSBML_INDEX_EXCEEDS_SIZE", + "LIBSBML_INVALID_ATTRIBUTE_VALUE", + "LIBSBML_INVALID_OBJECT", + "LIBSBML_INVALID_XML_OPERATION", + "LIBSBML_LEVEL_MISMATCH", + "LIBSBML_NAMESPACES_MISMATCH", + "LIBSBML_OPERATION_FAILED", + "LIBSBML_UNEXPECTED_ATTRIBUTE", + "LIBSBML_PKG_UNKNOWN", + "LIBSBML_PKG_VERSION_MISMATCH", + "LIBSBML_PKG_CONFLICTED_VERSION", + ) +} + + +def check(res: int): + """Check the return value of a libsbml function that returns a status code. + + :param res: The return value to check. + :raises RuntimeError: If the return value indicates an error. + """ + if res != libsbml.LIBSBML_OPERATION_SUCCESS: + raise RuntimeError(f"libsbml error: {retval_to_str.get(res, res)}") + + +def add_sbml_parameter( + model: libsbml.Model, + id_: str, + value: float = None, + constant: bool = None, +) -> libsbml.Parameter: + """Add a parameter to the SBML model.""" + param = model.createParameter() + + check(param.setId(id_)) + + if value is not None: + check(param.setValue(value)) + + if constant is not None: + check(param.setConstant(constant)) + + return param diff --git a/petab/v2/problem.py b/petab/v2/problem.py index a191942f..97684241 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -838,7 +838,7 @@ def validate( ) validation_results = ValidationResultList() - if self.config.extensions: + if self.config and self.config.extensions: extensions = ",".join(self.config.extensions.keys()) validation_results.append( ValidationIssue( diff --git a/tests/v2/test_converters.py b/tests/v2/test_converters.py new file mode 100644 index 00000000..76ba6a86 --- /dev/null +++ b/tests/v2/test_converters.py @@ -0,0 +1,76 @@ +from math import inf + +from petab.v2 import Change, Condition, Experiment, ExperimentPeriod, Problem +from petab.v2.converters import ExperimentsToEventsConverter +from petab.v2.models.sbml_model import SbmlModel + + +def test_experiments_to_events_converter(): + """Test the ExperimentsToEventsConverter.""" + ant_model = """ + species X = 0 + X' = 1 + """ + problem = Problem() + problem.model = SbmlModel.from_antimony(ant_model) + problem.add_condition("c1", X=1) + problem.add_condition("c2", X=2) + problem.add_experiment("e1", -inf, "c1", 10, "c2") + + converter = ExperimentsToEventsConverter(problem) + converted = converter.convert() + assert converted.validate().has_errors() is False + + assert isinstance(converted.model, SbmlModel) + sbml_model = converted.model.sbml_model + + assert sbml_model.getNumEvents() == 2 + assert converted.condition_table.conditions == [ + Condition( + id="_petab_preequilibration_on", + changes=[ + Change( + target_id="_petab_preequilibration_indicator", + target_value=1, + ) + ], + ), + Condition( + id="_petab_preequilibration_off", + changes=[ + Change( + target_id="_petab_preequilibration_indicator", + target_value=0, + ) + ], + ), + Condition( + id="_petab_experiment_condition_e1", + changes=[ + Change( + target_id="_petab_experiment_indicator_e1", target_value=1 + ) + ], + ), + ] + assert converted.experiment_table.experiments == [ + Experiment( + id="e1", + periods=[ + ExperimentPeriod( + time=-inf, + condition_ids=[ + "_petab_experiment_condition_e1", + "_petab_preequilibration_on", + ], + ), + ExperimentPeriod( + time=10.0, + condition_ids=[ + "_petab_experiment_condition_e1", + "_petab_preequilibration_off", + ], + ), + ], + ), + ] From 153f2bdefa036cec5489db567973f64e9a10731d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 21 Jul 2025 09:08:12 +0200 Subject: [PATCH 066/141] CI: Update branch name --- tests/v2/test_problem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index db169363..e616193d 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -32,7 +32,7 @@ def test_load_remote(): yaml_url = ( "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/update_v2/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" + "/main/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" ) try: From fb36efa568f60a58731f0100a2fdac29f36edb7c Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 21 Jul 2025 16:09:13 +0200 Subject: [PATCH 067/141] v2: Don't merge tables when creating `Problem` (#405) PEtab allows spreading conditions/observables/measurements/... across multiple tables. So far, the different tables of a certain type are merged when creating a `Problem`. This is convenient for simulation, but pretty inconvenient when loading/modifying/saving the problem, where one usually wants to maintain the old structure. This replaces `Problem.${type}_table: ${type}Table` by `Problem.${type}_tables: list[${type}Table]` table and introduces a `Problem.${type}` property that combines them on demand. Closes #404. --- petab/v2/converters.py | 56 ++--- petab/v2/lint.py | 141 +++++------- petab/v2/problem.py | 436 +++++++++++++++++++++++------------- tests/v2/test_conversion.py | 4 +- tests/v2/test_converters.py | 4 +- 5 files changed, 367 insertions(+), 274 deletions(-) diff --git a/petab/v2/converters.py b/petab/v2/converters.py index f0736087..67c7efda 100644 --- a/petab/v2/converters.py +++ b/petab/v2/converters.py @@ -8,7 +8,13 @@ import libsbml from sbmlmath import sbml_math_to_sympy, set_math -from .core import Change, Condition, Experiment, ExperimentPeriod +from .core import ( + Change, + Condition, + ConditionTable, + Experiment, + ExperimentPeriod, +) from .models._sbml_utils import add_sbml_parameter, check from .models.sbml_model import SbmlModel from .problem import Problem @@ -176,7 +182,7 @@ def convert(self) -> Problem: self._add_preequilibration_indicator() - for experiment in self._new_problem.experiment_table.experiments: + for experiment in self._new_problem.experiments: self._convert_experiment(experiment) self._add_indicators_to_conditions() @@ -226,7 +232,7 @@ def _convert_experiment(self, experiment: Experiment) -> None: self._create_event_assignments_for_period( ev, [ - self._new_problem.condition_table[condition_id] + self._new_problem[condition_id] for condition_id in period.condition_ids ], ) @@ -365,24 +371,18 @@ def _add_indicators_to_conditions(self) -> None: problem = self._new_problem # create conditions for indicator parameters - problem.condition_table.conditions.append( - Condition( - id=self.CONDITION_ID_PREEQ_ON, - changes=[ - Change(target_id=self._preeq_indicator, target_value=1) - ], - ) + problem += Condition( + id=self.CONDITION_ID_PREEQ_ON, + changes=[Change(target_id=self._preeq_indicator, target_value=1)], ) - problem.condition_table.conditions.append( - Condition( - id=self.CONDITION_ID_PREEQ_OFF, - changes=[ - Change(target_id=self._preeq_indicator, target_value=0) - ], - ) + + problem += Condition( + id=self.CONDITION_ID_PREEQ_OFF, + changes=[Change(target_id=self._preeq_indicator, target_value=0)], ) + # add conditions for the experiment indicators - for experiment in problem.experiment_table.experiments: + for experiment in problem.experiments: cond_id = self._get_experiment_indicator_condition_id( experiment.id ) @@ -392,17 +392,19 @@ def _add_indicators_to_conditions(self) -> None: target_value=1, ) ] - problem.condition_table.conditions.append( - Condition( - id=cond_id, - changes=changes, - ) + problem += Condition( + id=cond_id, + changes=changes, ) # All changes have been encoded in event assignments and can be # removed. Only keep the conditions setting our indicators. - problem.condition_table.conditions = [ - condition - for condition in problem.condition_table.conditions - if condition.id.startswith("_petab") + problem.condition_tables = [ + ConditionTable( + conditions=[ + condition + for condition in problem.conditions + if condition.id.startswith("_petab") + ] + ) ] diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 2810841a..a8ea848e 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -252,12 +252,8 @@ class CheckMeasuredObservablesDefined(ValidationTask): are defined.""" def run(self, problem: Problem) -> ValidationIssue | None: - used_observables = { - m.observable_id for m in problem.measurement_table.measurements - } - defined_observables = { - o.id for o in problem.observable_table.observables - } + used_observables = {m.observable_id for m in problem.measurements} + defined_observables = {o.id for o in problem.observables} if undefined_observables := (used_observables - defined_observables): return ValidationError( f"Observable(s) {undefined_observables} are used in the " @@ -275,15 +271,14 @@ class CheckOverridesMatchPlaceholders(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: observable_parameters_count = { - o.id: len(o.observable_placeholders) - for o in problem.observable_table.observables + o.id: len(o.observable_placeholders) for o in problem.observables } noise_parameters_count = { - o.id: len(o.noise_placeholders) - for o in problem.observable_table.observables + o.id: len(o.noise_placeholders) for o in problem.observables } messages = [] - for m in problem.measurement_table.measurements: + observables = {o.id: o for o in problem.observables} + for m in problem.measurements: # check observable parameters try: expected = observable_parameters_count[m.observable_id] @@ -297,7 +292,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: actual = len(m.observable_parameters) if actual != expected: - formula = problem.observable_table[m.observable_id].formula + formula = observables[m.observable_id].formula messages.append( f"Mismatch of observable parameter overrides for " f"{m.observable_id} ({formula})" @@ -323,9 +318,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: "noiseParameters column." ) else: - formula = problem.observable_table[ - m.observable_id - ].noise_formula + formula = observables[m.observable_id].noise_formula messages.append( f"Mismatch of noise parameter overrides for " f"{m.observable_id} ({formula})" @@ -348,11 +341,11 @@ def run(self, problem: Problem) -> ValidationIssue | None: log_observables = { o.id - for o in problem.observable_table.observables + for o in problem.observables if o.noise_distribution in [ND.LOG_NORMAL, ND.LOG_LAPLACE] } if log_observables: - for m in problem.measurement_table.measurements: + for m in problem.measurements: if m.measurement <= 0 and m.observable_id in log_observables: return ValidationError( "Measurements with observable " @@ -374,14 +367,12 @@ def run(self, problem: Problem) -> ValidationIssue | None: # to conditions, otherwise it should maximally be a warning used_experiments = { m.experiment_id - for m in problem.measurement_table.measurements + for m in problem.measurements if m.experiment_id is not None } # check that measured experiments exist - available_experiments = { - e.id for e in problem.experiment_table.experiments - } + available_experiments = {e.id for e in problem.experiments} if missing_experiments := (used_experiments - available_experiments): return ValidationError( "Measurement table references experiments that " @@ -403,14 +394,12 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) allowed_targets |= set(get_output_parameters(problem)) allowed_targets |= { - m.petab_id - for m in problem.mapping_table.mappings - if m.model_id is not None + m.petab_id for m in problem.mappings if m.model_id is not None } used_targets = { change.target_id - for cond in problem.condition_table.conditions + for cond in problem.conditions for change in cond.changes } @@ -421,7 +410,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # Check that changes of simultaneously applied conditions don't # intersect - for experiment in problem.experiment_table.experiments: + for experiment in problem.experiments: for period in experiment.periods: if not period.condition_ids: continue @@ -429,7 +418,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: for condition_id in period.condition_ids: condition_targets = { change.target_id - for cond in problem.condition_table.conditions + for cond in problem.conditions if cond.id == condition_id for change in cond.changes } @@ -451,7 +440,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # -- replaces CheckObservablesDoNotShadowModelEntities # check for uniqueness of all primary keys - counter = Counter(c.id for c in problem.condition_table.conditions) + counter = Counter(c.id for c in problem.conditions) duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: @@ -459,7 +448,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Condition table contains duplicate IDs: {duplicates}" ) - counter = Counter(o.id for o in problem.observable_table.observables) + counter = Counter(o.id for o in problem.observables) duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: @@ -467,7 +456,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Observable table contains duplicate IDs: {duplicates}" ) - counter = Counter(e.id for e in problem.experiment_table.experiments) + counter = Counter(e.id for e in problem.experiments) duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: @@ -475,7 +464,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: f"Experiment table contains duplicate IDs: {duplicates}" ) - counter = Counter(p.id for p in problem.parameter_table.parameters) + counter = Counter(p.id for p in problem.parameters) duplicates = {id_ for id_, count in counter.items() if count > 1} if duplicates: @@ -491,12 +480,12 @@ class CheckObservablesDoNotShadowModelEntities(ValidationTask): # TODO: all PEtab entity IDs must be disjoint from the model entity IDs def run(self, problem: Problem) -> ValidationIssue | None: - if not problem.observable_table.observables or problem.model is None: + if not problem.observables or problem.model is None: return None shadowed_entities = [ o.id - for o in problem.observable_table.observables + for o in problem.observables if problem.model.has_entity_with_id(o.id) ] if shadowed_entities: @@ -512,7 +501,7 @@ class CheckExperimentTable(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: messages = [] - for experiment in problem.experiment_table.experiments: + for experiment in problem.experiments: # Check that there are no duplicate timepoints counter = Counter(period.time for period in experiment.periods) duplicates = {time for time, count in counter.items() if count > 1} @@ -534,10 +523,8 @@ class CheckExperimentConditionsExist(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: messages = [] - available_conditions = { - c.id for c in problem.condition_table.conditions - } - for experiment in problem.experiment_table.experiments: + available_conditions = {c.id for c in problem.conditions} + for experiment in problem.experiments: missing_conditions = ( set( chain.from_iterable( @@ -569,7 +556,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: required = get_required_parameters_for_parameter_table(problem) allowed = get_valid_parameters_for_parameter_table(problem) - actual = {p.id for p in problem.parameter_table.parameters} + actual = {p.id for p in problem.parameters} missing = required - actual extraneous = actual - allowed @@ -577,7 +564,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # the mapping table if missing: model_to_petab_mapping = {} - for m in problem.mapping_table.mappings: + for m in problem.mappings: if m.model_id in model_to_petab_mapping: model_to_petab_mapping[m.model_id].append(m.petab_id) else: @@ -620,7 +607,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: ) allowed_in_condition_cols |= { m.petab_id - for m in problem.mapping_table.mappings + for m in problem.mappings if not pd.isna(m.model_id) and ( # mapping table entities mapping to already allowed parameters @@ -636,12 +623,10 @@ def run(self, problem: Problem) -> ValidationIssue | None: entities_in_condition_table = { change.target_id - for cond in problem.condition_table.conditions + for cond in problem.conditions for change in cond.changes } - entities_in_parameter_table = { - p.id for p in problem.parameter_table.parameters - } + entities_in_parameter_table = {p.id for p in problem.parameters} disallowed_in_condition = { x @@ -689,12 +674,10 @@ class CheckUnusedExperiments(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: used_experiments = { m.experiment_id - for m in problem.measurement_table.measurements + for m in problem.measurements if m.experiment_id is not None } - available_experiments = { - e.id for e in problem.experiment_table.experiments - } + available_experiments = {e.id for e in problem.experiments} unused_experiments = available_experiments - used_experiments if unused_experiments: @@ -713,14 +696,10 @@ class CheckUnusedConditions(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: used_conditions = set( chain.from_iterable( - p.condition_ids - for e in problem.experiment_table.experiments - for p in e.periods + p.condition_ids for e in problem.experiments for p in e.periods ) ) - available_conditions = { - c.id for c in problem.condition_table.conditions - } + available_conditions = {c.id for c in problem.conditions} unused_conditions = available_conditions - used_conditions if unused_conditions: @@ -770,7 +749,7 @@ class CheckPriorDistribution(ValidationTask): def run(self, problem: Problem) -> ValidationIssue | None: messages = [] - for parameter in problem.parameter_table.parameters: + for parameter in problem.parameters: if parameter.prior_distribution is None: continue @@ -837,7 +816,7 @@ def get_valid_parameters_for_parameter_table( # condition table targets invalid |= { change.target_id - for cond in problem.condition_table.conditions + for cond in problem.conditions for change in cond.changes } @@ -849,7 +828,7 @@ def get_valid_parameters_for_parameter_table( if p not in invalid ) - for mapping in problem.mapping_table.mappings: + for mapping in problem.mappings: if mapping.model_id and mapping.model_id in parameter_ids.keys(): parameter_ids[mapping.petab_id] = None @@ -866,14 +845,15 @@ def append_overrides(overrides): if isinstance(p, sp.Symbol) and (str_p := str(p)) not in invalid: parameter_ids[str_p] = None - for measurement in problem.measurement_table.measurements: + for measurement in problem.measurements: # we trust that the number of overrides matches append_overrides(measurement.observable_parameters) append_overrides(measurement.noise_parameters) # Append parameter overrides from condition table - for p in problem.condition_table.free_symbols: - parameter_ids[str(p)] = None + for ct in problem.condition_tables: + for p in ct.free_symbols: + parameter_ids[str(p)] = None return set(parameter_ids.keys()) @@ -895,7 +875,7 @@ def get_required_parameters_for_parameter_table( parameter_ids = set() condition_targets = { change.target_id - for cond in problem.condition_table.conditions + for cond in problem.conditions for change in cond.changes } @@ -908,7 +888,7 @@ def append_overrides(overrides): and (str_p := str(p)) not in condition_targets ) - for m in problem.measurement_table.measurements: + for m in problem.measurements: # we trust that the number of overrides matches append_overrides(m.observable_parameters) append_overrides(m.noise_parameters) @@ -916,7 +896,7 @@ def append_overrides(overrides): # TODO remove `observable_ids` when # `get_output_parameters` is updated for PEtab v2/v1.1, where # observable IDs are allowed in observable formulae - observable_ids = {o.id for o in problem.observable_table.observables} + observable_ids = {o.id for o in problem.observables} # Add output parameters except for placeholders for formula_type, placeholder_sources in ( @@ -951,7 +931,8 @@ def append_overrides(overrides): # model parameter_ids.update( str(p) - for p in problem.condition_table.free_symbols + for ct in problem.condition_tables + for p in ct.free_symbols if not problem.model.has_entity_with_id(str(p)) ) @@ -981,13 +962,9 @@ def get_output_parameters( """ formulas = [] if observables: - formulas.extend( - o.formula for o in problem.observable_table.observables - ) + formulas.extend(o.formula for o in problem.observables) if noise: - formulas.extend( - o.noise_formula for o in problem.observable_table.observables - ) + formulas.extend(o.noise_formula for o in problem.observables) output_parameters = OrderedDict() for formula in formulas: @@ -1001,17 +978,15 @@ def get_output_parameters( continue # does it map to a model entity? - - if ( - (mapped := problem.mapping_table.get(sym)) is not None - and mapped.model_id is not None - and problem.model.symbol_allowed_in_observable_formula( - mapped.model_id - ) - ): - continue - - output_parameters[sym] = None + for mapping in problem.mappings: + if mapping.petab_id == sym and mapping.model_id is not None: + if problem.model.symbol_allowed_in_observable_formula( + mapping.model_id + ): + break + else: + # no mapping to a model entity, so it is an output parameter + output_parameters[sym] = None return list(output_parameters.keys()) @@ -1036,7 +1011,7 @@ def get_placeholders( # collect placeholder parameters overwritten by # {observable,noise}Parameters placeholders = [] - for o in problem.observable_table.observables: + for o in problem.observables: if observables: placeholders.extend(map(str, o.observable_placeholders)) if noise: diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 97684241..a41a53b5 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -7,6 +7,7 @@ import tempfile import traceback from collections.abc import Sequence +from itertools import chain from math import nan from numbers import Number from pathlib import Path @@ -18,9 +19,6 @@ from pydantic import AnyUrl, BaseModel, Field, field_validator from ..v1 import ( - mapping, - measurements, - observables, parameter_mapping, parameters, validate_yaml_syntax, @@ -32,7 +30,7 @@ from ..v1.yaml import get_path_prefix from ..v2.C import * # noqa: F403 from ..versions import parse_version -from . import conditions, core, experiments +from . import core if TYPE_CHECKING: from ..v2.lint import ValidationResultList, ValidationTask @@ -63,12 +61,13 @@ class Problem: def __init__( self, model: Model = None, - condition_table: core.ConditionTable = None, - experiment_table: core.ExperimentTable = None, - observable_table: core.ObservableTable = None, - measurement_table: core.MeasurementTable = None, - parameter_table: core.ParameterTable = None, - mapping_table: core.MappingTable = None, + condition_tables: list[core.ConditionTable] = None, + experiment_tables: list[core.ExperimentTable] = None, + observable_tables: list[core.ObservableTable] = None, + measurement_tables: list[core.MeasurementTable] = None, + parameter_tables: list[core.ParameterTable] = None, + mapping_tables: list[core.MappingTable] = None, + # TODO: remove visualization_df: pd.DataFrame = None, config: ProblemConfig = None, ): @@ -80,41 +79,43 @@ def __init__( default_validation_tasks.copy() ) - self.observable_table = observable_table or core.ObservableTable( - observables=[] - ) - self.condition_table = condition_table or core.ConditionTable( - conditions=[] - ) - self.experiment_table = experiment_table or core.ExperimentTable( - experiments=[] - ) - self.measurement_table = measurement_table or core.MeasurementTable( - measurements=[] - ) - self.mapping_table = mapping_table or core.MappingTable(mappings=[]) - self.parameter_table = parameter_table or core.ParameterTable( - parameters=[] - ) + self.observable_tables = observable_tables or [ + core.ObservableTable(observables=[]) + ] + self.condition_tables = condition_tables or [ + core.ConditionTable(conditions=[]) + ] + self.experiment_tables = experiment_tables or [ + core.ExperimentTable(experiments=[]) + ] + self.measurement_tables = measurement_tables or [ + core.MeasurementTable(measurements=[]) + ] + self.mapping_tables = mapping_tables or [ + core.MappingTable(mappings=[]) + ] + self.parameter_tables = parameter_tables or [ + core.ParameterTable(parameters=[]) + ] self.visualization_df = visualization_df def __str__(self): model = f"with model ({self.model})" if self.model else "without model" - ne = len(self.experiment_table.experiments) + ne = len(self.experiments) experiments = f"{ne} experiments" - nc = len(self.condition_table.conditions) + nc = len(self.conditions) conditions = f"{nc} conditions" - no = len(self.observable_table.observables) + no = len(self.observables) observables = f"{no} observables" - nm = len(self.measurement_table.measurements) + nm = len(self.measurements) measurements = f"{nm} measurements" - nest = self.parameter_table.n_estimated + nest = sum(pt.n_estimated for pt in self.parameter_tables) parameters = f"{nest} estimated parameters" return ( @@ -130,15 +131,15 @@ def __getitem__(self, key): Accessing model entities is not currently not supported. """ - for table in ( - self.condition_table, - self.experiment_table, - self.observable_table, - self.measurement_table, - self.parameter_table, - self.mapping_table, + for table_list in ( + self.condition_tables, + self.experiment_tables, + self.observable_tables, + self.measurement_tables, + self.parameter_tables, + self.mapping_tables, ): - if table is not None: + for table in table_list: try: return table[key] except KeyError: @@ -215,9 +216,10 @@ def get_path(filename): config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file ) - parameter_df = parameters.get_parameter_df( - [get_path(f) for f in config.parameter_files] - ) + parameter_tables = [ + core.ParameterTable.from_tsv(get_path(f)) + for f in config.parameter_files + ] if len(config.model_files or []) > 1: # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 @@ -233,27 +235,30 @@ def get_path(filename): model_id=model_id, ) - measurement_files = [get_path(f) for f in config.measurement_files] - # If there are multiple tables, we will merge them - measurement_df = ( - concat_tables(measurement_files, measurements.get_measurement_df) - if measurement_files + measurement_tables = ( + [ + core.MeasurementTable.from_tsv(get_path(f)) + for f in config.measurement_files + ] + if config.measurement_files else None ) - condition_files = [get_path(f) for f in config.condition_files] - # If there are multiple tables, we will merge them - condition_df = ( - concat_tables(condition_files, conditions.get_condition_df) - if condition_files + condition_tables = ( + [ + core.ConditionTable.from_tsv(get_path(f)) + for f in config.condition_files + ] + if config.condition_files else None ) - experiment_files = [get_path(f) for f in config.experiment_files] - # If there are multiple tables, we will merge them - experiment_df = ( - concat_tables(experiment_files, experiments.get_experiment_df) - if experiment_files + experiment_tables = ( + [ + core.ExperimentTable.from_tsv(get_path(f)) + for f in config.experiment_files + ] + if config.experiment_files else None ) @@ -266,32 +271,34 @@ def get_path(filename): else None ) - observable_files = [get_path(f) for f in config.observable_files] - # If there are multiple tables, we will merge them - observable_df = ( - concat_tables(observable_files, observables.get_observable_df) - if observable_files + observable_tables = ( + [ + core.ObservableTable.from_tsv(get_path(f)) + for f in config.observable_files + ] + if config.observable_files else None ) - mapping_files = [get_path(f) for f in config.mapping_files] - # If there are multiple tables, we will merge them - mapping_df = ( - concat_tables(mapping_files, mapping.get_mapping_df) - if mapping_files + mapping_tables = ( + [ + core.MappingTable.from_tsv(get_path(f)) + for f in config.mapping_files + ] + if config.mapping_files else None ) - return Problem.from_dfs( - condition_df=condition_df, - experiment_df=experiment_df, - measurement_df=measurement_df, - parameter_df=parameter_df, - observable_df=observable_df, + return Problem( + config=config, model=model, + condition_tables=condition_tables, + experiment_tables=experiment_tables, + observable_tables=observable_tables, + measurement_tables=measurement_tables, + parameter_tables=parameter_tables, + mapping_tables=mapping_tables, visualization_df=visualization_df, - mapping_df=mapping_df, - config=config, ) @staticmethod @@ -330,12 +337,12 @@ def from_dfs( return Problem( model=model, - condition_table=condition_table, - experiment_table=experiment_table, - observable_table=observable_table, - measurement_table=measurement_table, - parameter_table=parameter_table, - mapping_table=mapping_table, + condition_tables=[condition_table], + experiment_tables=[experiment_table], + observable_tables=[observable_table], + measurement_tables=[measurement_table], + parameter_tables=[parameter_table], + mapping_tables=[mapping_table], visualization_df=visualization_df, config=config, ) @@ -398,73 +405,142 @@ def get_problem(problem: str | Path | Problem) -> Problem: @property def condition_df(self) -> pd.DataFrame | None: - """Condition table as DataFrame.""" - # TODO: return empty df? - return self.condition_table.to_df() if self.condition_table else None + """Combined condition tables as DataFrame.""" + conditions = self.conditions + return ( + core.ConditionTable(conditions=conditions).to_df() + if conditions + else None + ) @condition_df.setter def condition_df(self, value: pd.DataFrame): - self.condition_table = core.ConditionTable.from_df(value) + self.condition_tables = [core.ConditionTable.from_df(value)] @property def experiment_df(self) -> pd.DataFrame | None: """Experiment table as DataFrame.""" - return self.experiment_table.to_df() if self.experiment_table else None + return ( + core.ExperimentTable(experiments=experiments).to_df() + if (experiments := self.experiments) + else None + ) @experiment_df.setter def experiment_df(self, value: pd.DataFrame): - self.experiment_table = core.ExperimentTable.from_df(value) + self.experiment_tables = [core.ExperimentTable.from_df(value)] @property def measurement_df(self) -> pd.DataFrame | None: - """Measurement table as DataFrame.""" + """Combined measurement tables as DataFrame.""" + measurements = self.measurements return ( - self.measurement_table.to_df() if self.measurement_table else None + core.MeasurementTable(measurements=measurements).to_df() + if measurements + else None ) @measurement_df.setter def measurement_df(self, value: pd.DataFrame): - self.measurement_table = core.MeasurementTable.from_df(value) + self.measurement_tables = [core.MeasurementTable.from_df(value)] @property def parameter_df(self) -> pd.DataFrame | None: - """Parameter table as DataFrame.""" - return self.parameter_table.to_df() if self.parameter_table else None + """Combined parameter tables as DataFrame.""" + parameters = self.parameters + return ( + core.ParameterTable(parameters=parameters).to_df() + if parameters + else None + ) @parameter_df.setter def parameter_df(self, value: pd.DataFrame): - self.parameter_table = core.ParameterTable.from_df(value) + self.parameter_tables = [core.ParameterTable.from_df(value)] @property def observable_df(self) -> pd.DataFrame | None: - """Observable table as DataFrame.""" - return self.observable_table.to_df() if self.observable_table else None + """Combined observable tables as DataFrame.""" + observables = self.observables + return ( + core.ObservableTable(observables=observables).to_df() + if observables + else None + ) @observable_df.setter def observable_df(self, value: pd.DataFrame): - self.observable_table = core.ObservableTable.from_df(value) + self.observable_tables = [core.ObservableTable.from_df(value)] @property def mapping_df(self) -> pd.DataFrame | None: - """Mapping table as DataFrame.""" - return self.mapping_table.to_df() if self.mapping_table else None + """Combined mapping tables as DataFrame.""" + mappings = self.mappings + return ( + core.MappingTable(mappings=mappings).to_df() if mappings else None + ) @mapping_df.setter def mapping_df(self, value: pd.DataFrame): - self.mapping_table = core.MappingTable.from_df(value) + self.mapping_tables = [core.MappingTable.from_df(value)] + + @property + def conditions(self) -> list[core.Condition]: + """List of conditions in the condition table(s).""" + return list( + chain.from_iterable(ct.conditions for ct in self.condition_tables) + ) + + @property + def experiments(self) -> list[core.Experiment]: + """List of experiments in the experiment table(s).""" + return list( + chain.from_iterable( + et.experiments for et in self.experiment_tables + ) + ) + + @property + def observables(self) -> list[core.Observable]: + """List of observables in the observable table(s).""" + return list( + chain.from_iterable( + ot.observables for ot in self.observable_tables + ) + ) + + @property + def measurements(self) -> list[core.Measurement]: + """List of measurements in the measurement table(s).""" + return list( + chain.from_iterable( + mt.measurements for mt in self.measurement_tables + ) + ) + + @property + def parameters(self) -> list[core.Parameter]: + """List of parameters in the parameter table(s).""" + return list( + chain.from_iterable(pt.parameters for pt in self.parameter_tables) + ) + + @property + def mappings(self) -> list[core.Mapping]: + """List of mappings in the mapping table(s).""" + return list( + chain.from_iterable(mt.mappings for mt in self.mapping_tables) + ) def get_optimization_parameters(self) -> list[str]: """ Get the list of optimization parameter IDs from parameter table. - Arguments: - parameter_df: PEtab parameter DataFrame - Returns: A list of IDs of parameters selected for optimization (i.e., those with estimate = True). """ - return [p.id for p in self.parameter_table.parameters if p.estimate] + return [p.id for p in self.parameters if p.estimate] def get_optimization_parameter_scales(self) -> dict[str, str]: """ @@ -479,7 +555,7 @@ def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. """ - return [o.id for o in self.observable_table.observables] + return [o.id for o in self.observables] def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): """Apply mask of only free or only fixed values. @@ -521,7 +597,7 @@ def get_x_ids(self, free: bool = True, fixed: bool = True): ------- The parameter IDs. """ - v = [p.id for p in self.parameter_table.parameters] + v = [p.id for p in self.parameters] return self._apply_mask(v, free=free, fixed=fixed) @property @@ -561,7 +637,7 @@ def get_x_nominal( """ v = [ p.nominal_value if p.nominal_value is not None else nan - for p in self.parameter_table.parameters + for p in self.parameters ] if scaled: @@ -624,10 +700,7 @@ def get_lb( ------- The lower parameter bounds. """ - v = [ - p.lb if p.lb is not None else nan - for p in self.parameter_table.parameters - ] + v = [p.lb if p.lb is not None else nan for p in self.parameters] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -664,10 +737,7 @@ def get_ub( ------- The upper parameter bounds. """ - v = [ - p.ub if p.ub is not None else nan - for p in self.parameter_table.parameters - ] + v = [p.ub if p.ub is not None else nan for p in self.parameters] if scaled: v = list( parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) @@ -687,20 +757,12 @@ def ub_scaled(self) -> list: @property def x_free_indices(self) -> list[int]: """Parameter table estimated parameter indices.""" - return [ - i - for i, p in enumerate(self.parameter_table.parameters) - if p.estimate - ] + return [i for i, p in enumerate(self.parameters) if p.estimate] @property def x_fixed_indices(self) -> list[int]: """Parameter table non-estimated parameter indices.""" - return [ - i - for i, p in enumerate(self.parameter_table.parameters) - if not p.estimate - ] + return [i for i, p in enumerate(self.parameters) if not p.estimate] # TODO remove in v2? def get_optimization_to_simulation_parameter_mapping(self, **kwargs): @@ -725,11 +787,7 @@ def get_priors(self) -> dict[str, Distribution]: :returns: The prior distributions for the estimated parameters. """ - return { - p.id: p.prior_dist - for p in self.parameter_table.parameters - if p.estimate - } + return {p.id: p.prior_dist for p in self.parameters if p.estimate} def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): """Create 2D array with starting points for optimization""" @@ -810,15 +868,12 @@ def n_estimated(self) -> int: @property def n_measurements(self) -> int: """Number of measurements.""" - return len(self.measurement_table.measurements) + return sum(len(mt.measurements) for mt in self.measurement_tables) @property def n_priors(self) -> int: """Number of priors.""" - return sum( - p.prior_distribution is not None - for p in self.parameter_table.parameters - ) + return sum(p.prior_distribution is not None for p in self.parameters) def validate( self, validation_tasks: list[ValidationTask] = None @@ -872,9 +927,14 @@ def add_condition( ): """Add a simulation condition to the problem. + If there are more than one condition tables, the condition + is added to the last one. + Arguments: id_: The condition id - name: The condition name + name: The condition name. If given, this will be added to the + last mapping table. If no mapping table exists, + a new mapping table will be created. kwargs: Entities to be added to the condition table in the form `target_id=target_value`. """ @@ -885,16 +945,13 @@ def add_condition( core.Change(target_id=target_id, target_value=target_value) for target_id, target_value in kwargs.items() ] - self.condition_table.conditions.append( + if not self.condition_tables: + self.condition_tables.append(core.ConditionTable(conditions=[])) + self.condition_tables[-1].conditions.append( core.Condition(id=id_, changes=changes) ) if name is not None: - self.mapping_table.mappings.append( - core.Mapping( - petab_id=id_, - name=name, - ) - ) + self.add_mapping(petab_id=id_, name=name) def add_observable( self, @@ -909,6 +966,9 @@ def add_observable( ): """Add an observable to the problem. + If there are more than one observable tables, the observable + is added to the last one. + Arguments: id_: The observable id formula: The observable formula @@ -936,7 +996,10 @@ def add_observable( record[NOISE_PLACEHOLDERS] = noise_placeholders record.update(kwargs) - self.observable_table += core.Observable(**record) + if not self.observable_tables: + self.observable_tables.append(core.ObservableTable(observables=[])) + + self.observable_tables[-1] += core.Observable(**record) def add_parameter( self, @@ -952,6 +1015,9 @@ def add_parameter( ): """Add a parameter to the problem. + If there are more than one parameter tables, the parameter + is added to the last one. + Arguments: id_: The parameter id estimate: Whether the parameter is estimated @@ -986,7 +1052,10 @@ def add_parameter( record[PRIOR_PARAMETERS] = prior_pars record.update(kwargs) - self.parameter_table += core.Parameter(**record) + if not self.parameter_tables: + self.parameter_tables.append(core.ParameterTable(parameters=[])) + + self.parameter_tables[-1] += core.Parameter(**record) def add_measurement( self, @@ -999,6 +1068,9 @@ def add_measurement( ): """Add a measurement to the problem. + If there are more than one measurement tables, the measurement + is added to the last one. + Arguments: obs_id: The observable ID experiment_id: The experiment ID @@ -1016,7 +1088,12 @@ def add_measurement( ): noise_parameters = [noise_parameters] - self.measurement_table.measurements.append( + if not self.measurement_tables: + self.measurement_tables.append( + core.MeasurementTable(measurements=[]) + ) + + self.measurement_tables[-1].measurements.append( core.Measurement( observable_id=obs_id, experiment_id=experiment_id, @@ -1027,20 +1104,31 @@ def add_measurement( ) ) - def add_mapping(self, petab_id: str, model_id: str, name: str = None): + def add_mapping( + self, petab_id: str, model_id: str = None, name: str = None + ): """Add a mapping table entry to the problem. + If there are more than one mapping tables, the mapping + is added to the last one. + Arguments: petab_id: The new PEtab-compatible ID mapping to `model_id` model_id: The ID of some entity in the model + name: A name (any string) for the entity referenced by `petab_id`. """ - self.mapping_table.mappings.append( + if not self.mapping_tables: + self.mapping_tables.append(core.MappingTable(mappings=[])) + self.mapping_tables[-1].mappings.append( core.Mapping(petab_id=petab_id, model_id=model_id, name=name) ) def add_experiment(self, id_: str, *args): """Add an experiment to the problem. + If there are more than one experiment tables, the experiment + is added to the last one. + :param id_: The experiment ID. :param args: Timepoints and associated conditions: ``time_1, condition_id_1, time_2, condition_id_2, ...``. @@ -1060,7 +1148,9 @@ def add_experiment(self, id_: str, *args): for i in range(0, len(args), 2) ] - self.experiment_table.experiments.append( + if not self.experiment_tables: + self.experiment_tables.append(core.ExperimentTable(experiments=[])) + self.experiment_tables[-1].experiments.append( core.Experiment(id=id_, periods=periods) ) @@ -1075,15 +1165,35 @@ def __iadd__(self, other): ) if isinstance(other, Observable): - self.observable_table += other + if not self.observable_tables: + self.observable_tables.append( + core.ObservableTable(observables=[]) + ) + self.observable_tables[-1] += other elif isinstance(other, Parameter): - self.parameter_table += other + if not self.parameter_tables: + self.parameter_tables.append( + core.ParameterTable(parameters=[]) + ) + self.parameter_tables[-1] += other elif isinstance(other, Measurement): - self.measurement_table += other + if not self.measurement_tables: + self.measurement_tables.append( + core.MeasurementTable(measurements=[]) + ) + self.measurement_tables[-1] += other elif isinstance(other, Condition): - self.condition_table += other + if not self.condition_tables: + self.condition_tables.append( + core.ConditionTable(conditions=[]) + ) + self.condition_tables[-1] += other elif isinstance(other, Experiment): - self.experiment_table += other + if not self.experiment_tables: + self.experiment_tables.append( + core.ExperimentTable(experiments=[]) + ) + self.experiment_tables[-1] += other else: raise ValueError( f"Cannot add object of type {type(other)} to Problem." @@ -1136,13 +1246,19 @@ def model_dump(self, **kwargs) -> dict[str, Any]: **kwargs, by_alias=True ), } - res |= self.mapping_table.model_dump(**kwargs) - res |= self.condition_table.model_dump(**kwargs) - res |= self.experiment_table.model_dump(**kwargs) - res |= self.observable_table.model_dump(**kwargs) - res |= self.measurement_table.model_dump(**kwargs) - res |= self.parameter_table.model_dump(**kwargs) - + for field, table_list in ( + ("conditions", self.condition_tables), + ("experiments", self.experiment_tables), + ("observables", self.observable_tables), + ("measurements", self.measurement_tables), + ("parameters", self.parameter_tables), + ("mappings", self.mapping_tables), + ): + res[field] = ( + [table.model_dump(**kwargs) for table in table_list] + if table_list + else [] + ) return res diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index 43e14662..6bcbb22c 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -15,7 +15,7 @@ def test_petab1to2_remote(): problem = petab1to2(yaml_url) assert isinstance(problem, Problem) - assert len(problem.measurement_table.measurements) + assert len(problem.measurements) try: @@ -45,4 +45,4 @@ def test_benchmark_collection(problem_id): except NotImplementedError as e: pytest.skip(str(e)) assert isinstance(problem, Problem) - assert len(problem.measurement_table.measurements) + assert len(problem.measurements) diff --git a/tests/v2/test_converters.py b/tests/v2/test_converters.py index 76ba6a86..8cdbaddf 100644 --- a/tests/v2/test_converters.py +++ b/tests/v2/test_converters.py @@ -25,7 +25,7 @@ def test_experiments_to_events_converter(): sbml_model = converted.model.sbml_model assert sbml_model.getNumEvents() == 2 - assert converted.condition_table.conditions == [ + assert converted.conditions == [ Condition( id="_petab_preequilibration_on", changes=[ @@ -53,7 +53,7 @@ def test_experiments_to_events_converter(): ], ), ] - assert converted.experiment_table.experiments == [ + assert converted.experiments == [ Experiment( id="e1", periods=[ From c94bb1b5618f3044ffd20bb8917faed5cbe65456 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 21 Jul 2025 16:27:20 +0200 Subject: [PATCH 068/141] v2: parameter_file -> parameter_files (#406) Adapt to https://github.com/PEtab-dev/PEtab/pull/641. --- petab/schemas/petab_schema.v2.0.0.yaml | 16 ++++++++-------- petab/v2/C.py | 4 ++-- petab/v2/petab1to2.py | 14 ++++++++++++-- petab/v2/problem.py | 2 +- tests/v2/test_problem.py | 2 +- 5 files changed, 24 insertions(+), 14 deletions(-) diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index d87a8401..7f1b7443 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -23,14 +23,14 @@ properties: - type: integer description: Version of the PEtab format - parameter_file: - oneOf: - - type: string - - type: array + parameter_files: + type: array description: | - File name (absolute or relative) or URL to PEtab parameter table - containing parameters of all models listed in `problems`. A single - table may be split into multiple files and described as an array here. + List of PEtab parameter files. + items: + type: string + description: | + File name (absolute or relative) or URL to a PEtab parameter table. model_files: type: object @@ -95,7 +95,7 @@ properties: required: - format_version - - parameter_file + - parameter_files - model_files - observable_files - measurement_files diff --git a/petab/v2/C.py b/petab/v2/C.py index 99abf343..02bee5dd 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -344,8 +344,8 @@ # YAML #: PEtab version key in the YAML file FORMAT_VERSION = "format_version" -#: Parameter file key in the YAML file -PARAMETER_FILE = "parameter_file" +#: Parameter files key in the YAML file +PARAMETER_FILES = "parameter_files" #: Problems key in the YAML file PROBLEMS = "problems" #: Model files key in the YAML file diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 2b6ec0e3..5a5ac385 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -96,8 +96,9 @@ def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): # parameter table parameter_df = v1v2_parameter_df(petab_problem.parameter_df.copy()) - file = yaml_config[v2.C.PARAMETER_FILE] - v2.write_parameter_df(parameter_df, get_dest_path(file)) + v2.write_parameter_df( + parameter_df, get_dest_path(new_yaml_config.parameter_files[0]) + ) # copy files that don't need conversion # (models, visualizations) @@ -294,6 +295,15 @@ def _update_yaml(yaml_config: dict) -> dict: if file_type in problem: yaml_config[file_type] = problem[file_type] del problem[file_type] + del yaml_config[v1.C.PROBLEMS] + + # parameter_file -> parameter_files + if not isinstance( + (par_files := yaml_config.pop(v1.C.PARAMETER_FILE, [])), list + ): + par_files = [par_files] + yaml_config[v2.C.PARAMETER_FILES] = par_files + return yaml_config diff --git a/petab/v2/problem.py b/petab/v2/problem.py index a41a53b5..6c657cc3 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -1298,7 +1298,7 @@ class ProblemConfig(BaseModel): # rename to parameter_files in yaml for consistency with other files? # always a list? parameter_files: list[str | AnyUrl] = Field( - default=[], alias=PARAMETER_FILE + default=[], alias=PARAMETER_FILES ) # TODO: consider changing str to Path diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index e616193d..73cc3988 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -67,7 +67,7 @@ def test_problem_from_yaml_multiple_files(): """ yaml_config = """ format_version: 2.0.0 - parameter_file: [] + parameter_files: [] condition_files: [conditions1.tsv, conditions2.tsv] measurement_files: [measurements1.tsv, measurements2.tsv] observable_files: [observables1.tsv, observables2.tsv] From bf4055c1c4307ba3e164080acc720dcd9adf145b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 21 Jul 2025 16:53:25 +0200 Subject: [PATCH 069/141] Remove vis info from petab.v2 (#407) Remove visualization tables until there is progress on https://github.com/PEtab-dev/libpetab-python/issues/398. --- petab/v2/C.py | 110 ------------------------------------------ petab/v2/lint.py | 20 -------- petab/v2/petab1to2.py | 10 ++-- petab/v2/problem.py | 24 +-------- 4 files changed, 4 insertions(+), 160 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index 02bee5dd..05e8d3dd 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -233,114 +233,6 @@ #: Supported noise distributions NOISE_DISTRIBUTIONS = [NORMAL, LAPLACE, LOG_NORMAL, LOG_LAPLACE] - -# VISUALIZATION - -#: Plot ID column in the visualization table -PLOT_ID = "plotId" -#: Plot name column in the visualization table -PLOT_NAME = "plotName" -#: Value for plot type 'simulation' in the visualization table -PLOT_TYPE_SIMULATION = "plotTypeSimulation" -#: Value for plot type 'data' in the visualization table -PLOT_TYPE_DATA = "plotTypeData" -#: X values column in the visualization table -X_VALUES = "xValues" -#: X offset column in the visualization table -X_OFFSET = "xOffset" -#: X label column in the visualization table -X_LABEL = "xLabel" -#: X scale column in the visualization table -X_SCALE = "xScale" -#: Y values column in the visualization table -Y_VALUES = "yValues" -#: Y offset column in the visualization table -Y_OFFSET = "yOffset" -#: Y label column in the visualization table -Y_LABEL = "yLabel" -#: Y scale column in the visualization table -Y_SCALE = "yScale" -#: Legend entry column in the visualization table -LEGEND_ENTRY = "legendEntry" - -#: Mandatory columns of visualization table -VISUALIZATION_DF_REQUIRED_COLS = [PLOT_ID] - -#: Optional columns of visualization table -VISUALIZATION_DF_OPTIONAL_COLS = [ - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_VALUES, - X_OFFSET, - X_LABEL, - X_SCALE, - Y_VALUES, - Y_OFFSET, - Y_LABEL, - Y_SCALE, - LEGEND_ENTRY, - DATASET_ID, -] - -#: Visualization table columns -VISUALIZATION_DF_COLS = [ - *VISUALIZATION_DF_REQUIRED_COLS, - *VISUALIZATION_DF_OPTIONAL_COLS, -] - -#: Visualization table columns that contain subplot specifications -VISUALIZATION_DF_SUBPLOT_LEVEL_COLS = [ - PLOT_ID, - PLOT_NAME, - PLOT_TYPE_SIMULATION, - PLOT_TYPE_DATA, - X_LABEL, - X_SCALE, - Y_LABEL, - Y_SCALE, -] - -#: Visualization table columns that contain single plot specifications -VISUALIZATION_DF_SINGLE_PLOT_LEVEL_COLS = [ - X_VALUES, - X_OFFSET, - Y_VALUES, - Y_OFFSET, - LEGEND_ENTRY, - DATASET_ID, -] - -#: Plot type value in the visualization table for line plot -LINE_PLOT = "LinePlot" -#: Plot type value in the visualization table for bar plot -BAR_PLOT = "BarPlot" -#: Plot type value in the visualization table for scatter plot -SCATTER_PLOT = "ScatterPlot" -#: Supported plot types -PLOT_TYPES_SIMULATION = [LINE_PLOT, BAR_PLOT, SCATTER_PLOT] - -#: Supported xScales -X_SCALES = [LIN, LOG, LOG10] - -#: Supported yScales -Y_SCALES = [LIN, LOG, LOG10] - - -#: Plot type "data" value in the visualization table for mean and standard -# deviation -MEAN_AND_SD = "MeanAndSD" -#: Plot type "data" value in the visualization table for mean and standard -# error -MEAN_AND_SEM = "MeanAndSEM" -#: Plot type "data" value in the visualization table for replicates -REPLICATE = "replicate" -#: Plot type "data" value in the visualization table for provided noise values -PROVIDED = "provided" -#: Supported settings for handling replicates -PLOT_TYPES_DATA = [MEAN_AND_SD, MEAN_AND_SEM, REPLICATE, PROVIDED] - - # YAML #: PEtab version key in the YAML file FORMAT_VERSION = "format_version" @@ -388,8 +280,6 @@ SIMULATION = "simulation" #: Residual value column in the residual table RESIDUAL = "residual" -#: ??? -NOISE_VALUE = "noiseValue" #: separator for multiple parameter values (bounds, observableParameters, ...) PARAMETER_SEPARATOR = ";" diff --git a/petab/v2/lint.py b/petab/v2/lint.py index a8ea848e..3a3350e7 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -35,7 +35,6 @@ "CheckExperimentConditionsExist", "CheckAllParametersPresentInParameterTable", "CheckValidParameterInConditionOrParameterTable", - "CheckVisualizationTable", "CheckUnusedExperiments", "CheckObservablesDoNotShadowModelEntities", "CheckUnusedConditions", @@ -711,24 +710,6 @@ def run(self, problem: Problem) -> ValidationIssue | None: return None -class CheckVisualizationTable(ValidationTask): - """A task to validate the visualization table of a PEtab problem.""" - - def run(self, problem: Problem) -> ValidationIssue | None: - if problem.visualization_df is None: - return None - - from ..v1.visualize.lint import validate_visualization_df - - if validate_visualization_df(problem): - return ValidationIssue( - level=ValidationIssueSeverity.ERROR, - message="Visualization table is invalid.", - ) - - return None - - class CheckPriorDistribution(ValidationTask): """A task to validate the prior distribution of a PEtab problem.""" @@ -1039,7 +1020,6 @@ def get_placeholders( CheckUnusedExperiments(), CheckUnusedConditions(), # TODO: atomize checks, update to long condition table, re-enable - # CheckVisualizationTable(), # TODO validate mapping table CheckValidParameterInConditionOrParameterTable(), CheckAllParametersPresentInParameterTable(), diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 5a5ac385..4b040df7 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -5,7 +5,6 @@ import re import shutil from contextlib import suppress -from itertools import chain from pathlib import Path from tempfile import TemporaryDirectory from urllib.parse import urlparse @@ -100,11 +99,9 @@ def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): parameter_df, get_dest_path(new_yaml_config.parameter_files[0]) ) - # copy files that don't need conversion - # (models, visualizations) - for file in chain( - (model.location for model in new_yaml_config.model_files.values()), - new_yaml_config.visualization_files, + # copy files that don't need conversion: models + for file in ( + model.location for model in new_yaml_config.model_files.values() ): _copy_file(get_src_path(file), Path(get_dest_path(file))) @@ -290,7 +287,6 @@ def _update_yaml(yaml_config: dict) -> dict: v1.C.CONDITION_FILES, v1.C.MEASUREMENT_FILES, v1.C.OBSERVABLE_FILES, - v1.C.VISUALIZATION_FILES, ): if file_type in problem: yaml_config[file_type] = problem[file_type] diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 6c657cc3..7f49d279 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -24,7 +24,6 @@ validate_yaml_syntax, yaml, ) -from ..v1.core import concat_tables, get_visualization_df from ..v1.distributions import Distribution from ..v1.models.model import Model, model_factory from ..v1.yaml import get_path_prefix @@ -53,8 +52,6 @@ class Problem: - observable table - mapping table - Optionally, it may contain visualization tables. - See also :doc:`petab:v2/documentation_data_format`. """ @@ -67,8 +64,6 @@ def __init__( measurement_tables: list[core.MeasurementTable] = None, parameter_tables: list[core.ParameterTable] = None, mapping_tables: list[core.MappingTable] = None, - # TODO: remove - visualization_df: pd.DataFrame = None, config: ProblemConfig = None, ): from ..v2.lint import default_validation_tasks @@ -98,8 +93,6 @@ def __init__( core.ParameterTable(parameters=[]) ] - self.visualization_df = visualization_df - def __str__(self): model = f"with model ({self.model})" if self.model else "without model" @@ -262,15 +255,6 @@ def get_path(filename): else None ) - # TODO: remove in v2?! - visualization_files = [get_path(f) for f in config.visualization_files] - # If there are multiple tables, we will merge them - visualization_df = ( - concat_tables(visualization_files, get_visualization_df) - if visualization_files - else None - ) - observable_tables = ( [ core.ObservableTable.from_tsv(get_path(f)) @@ -298,7 +282,6 @@ def get_path(filename): measurement_tables=measurement_tables, parameter_tables=parameter_tables, mapping_tables=mapping_tables, - visualization_df=visualization_df, ) @staticmethod @@ -308,7 +291,6 @@ def from_dfs( experiment_df: pd.DataFrame = None, measurement_df: pd.DataFrame = None, parameter_df: pd.DataFrame = None, - visualization_df: pd.DataFrame = None, observable_df: pd.DataFrame = None, mapping_df: pd.DataFrame = None, config: ProblemConfig = None, @@ -322,7 +304,6 @@ def from_dfs( measurement_df: PEtab measurement table parameter_df: PEtab parameter table observable_df: PEtab observable table - visualization_df: PEtab visualization table mapping_df: PEtab mapping table model: The underlying model config: The PEtab problem configuration @@ -343,7 +324,6 @@ def from_dfs( measurement_tables=[measurement_table], parameter_tables=[parameter_table], mapping_tables=[mapping_table], - visualization_df=visualization_df, config=config, ) @@ -1227,8 +1207,7 @@ def model_dump(self, **kwargs) -> dict[str, Any]: 'measurement_files': [], 'model_files': {}, 'observable_files': [], - 'parameter_file': [], - 'visualization_files': []}, + 'parameter_file': []}, 'experiments': [], 'mappings': [], 'measurements': [], @@ -1307,7 +1286,6 @@ class ProblemConfig(BaseModel): condition_files: list[str | AnyUrl] = [] experiment_files: list[str | AnyUrl] = [] observable_files: list[str | AnyUrl] = [] - visualization_files: list[str | AnyUrl] = [] mapping_files: list[str | AnyUrl] = [] #: Extensions used by the problem. From eeb18ace4ead1b84463198c29eb50ce75338ba77 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 22 Jul 2025 06:12:10 +0200 Subject: [PATCH 070/141] v2: Remove parameter scale (#408) `parameterScale` no longer exists in PEtab v2. --- petab/v2/C.py | 3 -- petab/v2/problem.py | 126 ++------------------------------------------ 2 files changed, 3 insertions(+), 126 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index 05e8d3dd..9630ceff 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -69,8 +69,6 @@ PARAMETER_ID = "parameterId" #: Parameter name column in the parameter table PARAMETER_NAME = "parameterName" -#: Parameter scale column in the parameter table -PARAMETER_SCALE = "parameterScale" #: Lower bound column in the parameter table LOWER_BOUND = "lowerBound" #: Upper bound column in the parameter table @@ -87,7 +85,6 @@ #: Mandatory columns of parameter table PARAMETER_DF_REQUIRED_COLS = [ PARAMETER_ID, - PARAMETER_SCALE, LOWER_BOUND, UPPER_BOUND, ESTIMATE, diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 7f49d279..0b935818 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -20,7 +20,6 @@ from ..v1 import ( parameter_mapping, - parameters, validate_yaml_syntax, yaml, ) @@ -522,15 +521,6 @@ def get_optimization_parameters(self) -> list[str]: """ return [p.id for p in self.parameters if p.estimate] - def get_optimization_parameter_scales(self) -> dict[str, str]: - """ - Return list of optimization parameter scaling strings. - - See :py:func:`petab.parameters.get_optimization_parameters`. - """ - # TODO: to be removed in v2? - return parameters.get_optimization_parameter_scaling(self.parameter_df) - def get_observable_ids(self) -> list[str]: """ Returns dictionary of observable ids. @@ -595,9 +585,7 @@ def x_fixed_ids(self) -> list[str]: """Parameter table parameter IDs, for fixed parameters.""" return self.get_x_ids(free=False) - def get_x_nominal( - self, free: bool = True, fixed: bool = True, scaled: bool = False - ) -> list: + def get_x_nominal(self, free: bool = True, fixed: bool = True) -> list: """Generic function to get parameter nominal values. Parameters @@ -607,9 +595,6 @@ def get_x_nominal( fixed: Whether to return fixed parameters, i.e. parameters not to estimate. - scaled: - Whether to scale the values according to the parameter scale, - or return them on linear scale. Returns ------- @@ -620,10 +605,6 @@ def get_x_nominal( for p in self.parameters ] - if scaled: - v = list( - parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) - ) return self._apply_mask(v, free=free, fixed=fixed) @property @@ -641,28 +622,7 @@ def x_nominal_fixed(self) -> list: """Parameter table nominal values, for fixed parameters.""" return self.get_x_nominal(free=False) - @property - def x_nominal_scaled(self) -> list: - """Parameter table nominal values with applied parameter scaling""" - return self.get_x_nominal(scaled=True) - - @property - def x_nominal_free_scaled(self) -> list: - """Parameter table nominal values with applied parameter scaling, - for free parameters. - """ - return self.get_x_nominal(fixed=False, scaled=True) - - @property - def x_nominal_fixed_scaled(self) -> list: - """Parameter table nominal values with applied parameter scaling, - for fixed parameters. - """ - return self.get_x_nominal(free=False, scaled=True) - - def get_lb( - self, free: bool = True, fixed: bool = True, scaled: bool = False - ): + def get_lb(self, free: bool = True, fixed: bool = True): """Generic function to get lower parameter bounds. Parameters @@ -672,19 +632,12 @@ def get_lb( fixed: Whether to return fixed parameters, i.e. parameters not to estimate. - scaled: - Whether to scale the values according to the parameter scale, - or return them on linear scale. Returns ------- The lower parameter bounds. """ v = [p.lb if p.lb is not None else nan for p in self.parameters] - if scaled: - v = list( - parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) - ) return self._apply_mask(v, free=free, fixed=fixed) @property @@ -692,14 +645,7 @@ def lb(self) -> list: """Parameter table lower bounds.""" return self.get_lb() - @property - def lb_scaled(self) -> list: - """Parameter table lower bounds with applied parameter scaling""" - return self.get_lb(scaled=True) - - def get_ub( - self, free: bool = True, fixed: bool = True, scaled: bool = False - ): + def get_ub(self, free: bool = True, fixed: bool = True): """Generic function to get upper parameter bounds. Parameters @@ -709,19 +655,12 @@ def get_ub( fixed: Whether to return fixed parameters, i.e. parameters not to estimate. - scaled: - Whether to scale the values according to the parameter scale, - or return them on linear scale. Returns ------- The upper parameter bounds. """ v = [p.ub if p.ub is not None else nan for p in self.parameters] - if scaled: - v = list( - parameters.map_scale(v, self.parameter_df[PARAMETER_SCALE]) - ) return self._apply_mask(v, free=free, fixed=fixed) @property @@ -729,11 +668,6 @@ def ub(self) -> list: """Parameter table upper bounds""" return self.get_ub() - @property - def ub_scaled(self) -> list: - """Parameter table upper bounds with applied parameter scaling""" - return self.get_ub(scaled=True) - @property def x_free_indices(self) -> list[int]: """Parameter table estimated parameter indices.""" @@ -790,56 +724,6 @@ def sample_parameter_startpoints_dict( ) ] - # TODO: remove in v2? - def unscale_parameters( - self, - x_dict: dict[str, float], - ) -> dict[str, float]: - """Unscale parameter values. - - Parameters - ---------- - x_dict: - Keys are parameter IDs in the PEtab problem, values are scaled - parameter values. - - Returns - ------- - The unscaled parameter values. - """ - return { - parameter_id: parameters.unscale( - parameter_value, - self.parameter_df[PARAMETER_SCALE][parameter_id], - ) - for parameter_id, parameter_value in x_dict.items() - } - - # TODO: remove in v2? - def scale_parameters( - self, - x_dict: dict[str, float], - ) -> dict[str, float]: - """Scale parameter values. - - Parameters - ---------- - x_dict: - Keys are parameter IDs in the PEtab problem, values are unscaled - parameter values. - - Returns - ------- - The scaled parameter values. - """ - return { - parameter_id: parameters.scale( - parameter_value, - self.parameter_df[PARAMETER_SCALE][parameter_id], - ) - for parameter_id, parameter_value in x_dict.items() - } - @property def n_estimated(self) -> int: """The number of estimated parameters.""" @@ -986,7 +870,6 @@ def add_parameter( id_: str, estimate: bool | str = True, nominal_value: Number | None = None, - scale: str = None, lb: Number = None, ub: Number = None, prior_dist: str = None, @@ -1002,7 +885,6 @@ def add_parameter( id_: The parameter id estimate: Whether the parameter is estimated nominal_value: The nominal value of the parameter - scale: The parameter scale lb: The lower bound of the parameter ub: The upper bound of the parameter prior_dist: The type of the prior distribution @@ -1016,8 +898,6 @@ def add_parameter( record[ESTIMATE] = estimate if nominal_value is not None: record[NOMINAL_VALUE] = nominal_value - if scale is not None: - record[PARAMETER_SCALE] = scale if lb is not None: record[LOWER_BOUND] = lb if ub is not None: From f9bad6abe10a04f61b12818e67bb638343a98434 Mon Sep 17 00:00:00 2001 From: Paul Jonas Jost <70631928+PaulJonasJost@users.noreply.github.com> Date: Tue, 22 Jul 2025 07:09:59 +0200 Subject: [PATCH 071/141] Allow empty string "" in columns to be overridden with default values in priors (#384) The check only looked for whether it was not a string, but empty string values that are passed as `""` were not caught here. This should fix the error. --------- Co-authored-by: Daniel Weindl --- petab/v1/priors.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/petab/v1/priors.py b/petab/v1/priors.py index 6531fa0e..b8bf8dcb 100644 --- a/petab/v1/priors.py +++ b/petab/v1/priors.py @@ -277,9 +277,11 @@ def from_par_dict( at the bounds. **deprecated**. :return: A distribution object. """ - dist_type = d.get(f"{type_}PriorType", C.PARAMETER_SCALE_UNIFORM) - if not isinstance(dist_type, str) and np.isnan(dist_type): - dist_type = C.PARAMETER_SCALE_UNIFORM + dist_type = C.PARAMETER_SCALE_UNIFORM + if (_table_dist_type := d.get(f"{type_}PriorType")) and ( + isinstance(_table_dist_type, str) or not np.isnan(_table_dist_type) + ): + dist_type = _table_dist_type pscale = d.get(C.PARAMETER_SCALE, C.LIN) params = d.get(f"{type_}PriorParameters", None) From 11db39d833656d457987bd8d7800da181a6f443d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 22 Jul 2025 11:01:53 +0200 Subject: [PATCH 072/141] v2: Paths as pathlib.Path & validate assignment (#410) For petab.v2 pydantic models, change path attributes to `pathlib.Path`, and validate assignments. --- petab/v1/yaml.py | 2 +- petab/v2/core.py | 29 ++++++++++++++++----- petab/v2/problem.py | 54 +++++++++++++++++++++++++++++++--------- tests/v2/test_problem.py | 27 ++++++++++++++++++++ 4 files changed, 93 insertions(+), 19 deletions(-) diff --git a/petab/v1/yaml.py b/petab/v1/yaml.py index 0c092049..cefc594c 100644 --- a/petab/v1/yaml.py +++ b/petab/v1/yaml.py @@ -242,7 +242,7 @@ def write_yaml(yaml_config: dict[str, Any], filename: str | Path) -> None: """ Path(filename).parent.mkdir(parents=True, exist_ok=True) with open(filename, "w") as outfile: - yaml.dump( + yaml.safe_dump( yaml_config, outfile, default_flow_style=False, sort_keys=False ) diff --git a/petab/v2/core.py b/petab/v2/core.py index 193be335..6532a52c 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -204,7 +204,10 @@ class Observable(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True, extra="allow" + arbitrary_types_allowed=True, + populate_by_name=True, + extra="allow", + validate_assignment=True, ) @field_validator( @@ -344,6 +347,7 @@ class Change(BaseModel): populate_by_name=True, use_enum_values=True, extra="allow", + validate_assignment=True, ) @field_validator("target_value", mode="before") @@ -385,7 +389,9 @@ class Condition(BaseModel): changes: list[Change] #: :meta private: - model_config = ConfigDict(populate_by_name=True, extra="allow") + model_config = ConfigDict( + populate_by_name=True, extra="allow", validate_assignment=True + ) def __add__(self, other: Change) -> Condition: """Add a change to the set.""" @@ -503,7 +509,9 @@ class ExperimentPeriod(BaseModel): condition_ids: list[str] = Field(default_factory=list) #: :meta private: - model_config = ConfigDict(populate_by_name=True, extra="allow") + model_config = ConfigDict( + populate_by_name=True, extra="allow", validate_assignment=True + ) @field_validator("condition_ids", mode="before") @classmethod @@ -544,7 +552,10 @@ class Experiment(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True, extra="allow" + arbitrary_types_allowed=True, + populate_by_name=True, + extra="allow", + validate_assignment=True, ) def __add__(self, other: ExperimentPeriod) -> Experiment: @@ -682,7 +693,10 @@ class Measurement(BaseModel): #: :meta private: model_config = ConfigDict( - arbitrary_types_allowed=True, populate_by_name=True, extra="allow" + arbitrary_types_allowed=True, + populate_by_name=True, + extra="allow", + validate_assignment=True, ) @field_validator( @@ -806,7 +820,9 @@ class Mapping(BaseModel): ) #: :meta private: - model_config = ConfigDict(populate_by_name=True, extra="allow") + model_config = ConfigDict( + populate_by_name=True, extra="allow", validate_assignment=True + ) class MappingTable(BaseModel): @@ -909,6 +925,7 @@ class Parameter(BaseModel): populate_by_name=True, use_enum_values=True, extra="allow", + validate_assignment=True, ) @field_validator("id") diff --git a/petab/v2/problem.py b/petab/v2/problem.py index 0b935818..93362f62 100644 --- a/petab/v2/problem.py +++ b/petab/v2/problem.py @@ -16,7 +16,13 @@ import numpy as np import pandas as pd import sympy as sp -from pydantic import AnyUrl, BaseModel, Field, field_validator +from pydantic import ( + AnyUrl, + BaseModel, + ConfigDict, + Field, + field_validator, +) from ..v1 import ( parameter_mapping, @@ -1124,9 +1130,13 @@ def model_dump(self, **kwargs) -> dict[str, Any]: class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" - location: str | AnyUrl + location: AnyUrl | Path language: str + model_config = ConfigDict( + validate_assignment=True, + ) + class ExtensionConfig(BaseModel): """The configuration of a PEtab extension.""" @@ -1139,13 +1149,13 @@ class ProblemConfig(BaseModel): """The PEtab problem configuration.""" #: The path to the PEtab problem configuration. - filepath: str | AnyUrl | None = Field( + filepath: AnyUrl | Path | None = Field( None, description="The path to the PEtab problem configuration.", exclude=True, ) #: The base path to resolve relative paths. - base_path: str | AnyUrl | None = Field( + base_path: AnyUrl | Path | None = Field( None, description="The base path to resolve relative paths.", exclude=True, @@ -1156,21 +1166,24 @@ class ProblemConfig(BaseModel): # TODO https://github.com/PEtab-dev/PEtab/pull/641: # rename to parameter_files in yaml for consistency with other files? # always a list? - parameter_files: list[str | AnyUrl] = Field( + parameter_files: list[AnyUrl | Path] = Field( default=[], alias=PARAMETER_FILES ) - # TODO: consider changing str to Path model_files: dict[str, ModelFile] | None = {} - measurement_files: list[str | AnyUrl] = [] - condition_files: list[str | AnyUrl] = [] - experiment_files: list[str | AnyUrl] = [] - observable_files: list[str | AnyUrl] = [] - mapping_files: list[str | AnyUrl] = [] + measurement_files: list[AnyUrl | Path] = [] + condition_files: list[AnyUrl | Path] = [] + experiment_files: list[AnyUrl | Path] = [] + observable_files: list[AnyUrl | Path] = [] + mapping_files: list[AnyUrl | Path] = [] #: Extensions used by the problem. extensions: list[ExtensionConfig] | dict = {} + model_config = ConfigDict( + validate_assignment=True, + ) + # convert parameter_file to list @field_validator( "parameter_files", @@ -1194,7 +1207,24 @@ def to_yaml(self, filename: str | Path): """ from ..v1.yaml import write_yaml - write_yaml(self.model_dump(by_alias=True), filename) + data = self.model_dump(by_alias=True) + # convert Paths to strings for YAML serialization + for key in ( + "measurement_files", + "condition_files", + "experiment_files", + "observable_files", + "mapping_files", + "parameter_files", + ): + data[key] = list(map(str, data[key])) + + for model_id in data.get("model_files", {}): + data["model_files"][model_id][MODEL_LOCATION] = str( + data["model_files"][model_id]["location"] + ) + + write_yaml(data, filename) @property def format_version_tuple(self) -> tuple[int, int, int, str]: diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py index 73cc3988..580c691a 100644 --- a/tests/v2/test_problem.py +++ b/tests/v2/test_problem.py @@ -4,6 +4,7 @@ import numpy as np import pandas as pd from pandas.testing import assert_frame_equal +from pydantic import AnyUrl import petab.v2 as petab from petab.v2 import Problem @@ -198,3 +199,29 @@ def test_sample_startpoint_shape(): n_starts = 10 sp = problem.sample_parameter_startpoints(n_starts=n_starts) assert sp.shape == (n_starts, 2) + + +def test_problem_config_paths(): + """Test handling of URLS and local paths in ProblemConfig.""" + + pc = petab.ProblemConfig( + parameter_files=["https://example.com/params.tsv"], + condition_files=["conditions.tsv"], + measurement_files=["measurements.tsv"], + observable_files=["observables.tsv"], + experiment_files=["experiments.tsv"], + ) + assert isinstance(pc.parameter_files[0], AnyUrl) + assert isinstance(pc.condition_files[0], Path) + assert isinstance(pc.measurement_files[0], Path) + assert isinstance(pc.observable_files[0], Path) + assert isinstance(pc.experiment_files[0], Path) + + # Auto-convert to Path on assignment + pc.parameter_files = ["foo.tsv"] + assert isinstance(pc.parameter_files[0], Path) + + # We can't easily intercept mutations to the list: + # pc.parameter_files[0] = "foo.tsv" + # assert isinstance(pc.parameter_files[0], Path) + # see also https://github.com/pydantic/pydantic/issues/8575 From af3e7076829fb5f54d3327343a5a4d80fde647ef Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 23 Jul 2025 11:06:21 +0200 Subject: [PATCH 073/141] v2: merge .core and .problem (#414) Move `v2.problem.*` to `v2.core`. Keep all pydantic models in the same module. Avoid unnecessary import complexity. --- petab/v2/__init__.py | 1 - petab/v2/converters.py | 2 +- petab/v2/core.py | 1199 +++++++++++++++++++++++++++++++++++- petab/v2/lint.py | 3 +- petab/v2/problem.py | 1233 -------------------------------------- tests/v2/test_core.py | 223 ++++++- tests/v2/test_problem.py | 227 ------- 7 files changed, 1422 insertions(+), 1466 deletions(-) delete mode 100644 petab/v2/problem.py delete mode 100644 tests/v2/test_problem.py diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 68069010..c897c9c4 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -44,4 +44,3 @@ ) from .lint import lint_problem # noqa: F401, E402 from .models import MODEL_TYPE_PYSB, MODEL_TYPE_SBML, Model # noqa: F401, E402 -from .problem import Problem, ProblemConfig # noqa: F401, E402 diff --git a/petab/v2/converters.py b/petab/v2/converters.py index 67c7efda..f6d185b5 100644 --- a/petab/v2/converters.py +++ b/petab/v2/converters.py @@ -14,10 +14,10 @@ ConditionTable, Experiment, ExperimentPeriod, + Problem, ) from .models._sbml_utils import add_sbml_parameter, check from .models.sbml_model import SbmlModel -from .problem import Problem __all__ = ["ExperimentsToEventsConverter"] diff --git a/petab/v2/core.py b/petab/v2/core.py index 6532a52c..4b8a64aa 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,17 +2,24 @@ from __future__ import annotations +import logging +import os +import tempfile +import traceback from collections.abc import Sequence from enum import Enum from itertools import chain +from math import nan +from numbers import Number from pathlib import Path -from typing import Annotated +from typing import TYPE_CHECKING, Annotated, Any import numpy as np import pandas as pd import sympy as sp from pydantic import ( AfterValidator, + AnyUrl, BaseModel, BeforeValidator, ConfigDict, @@ -24,12 +31,26 @@ ) from typing_extensions import Self +from ..v1 import ( + parameter_mapping, + validate_yaml_syntax, + yaml, +) from ..v1.distributions import * from ..v1.lint import is_valid_identifier from ..v1.math import petab_math_str, sympify_petab +from ..v1.models.model import Model, model_factory +from ..v1.yaml import get_path_prefix +from ..versions import parse_version from . import C, get_observable_df +if TYPE_CHECKING: + from ..v2.lint import ValidationResultList, ValidationTask + + __all__ = [ + "Problem", + "ProblemConfig", "Observable", "ObservableTable", "NoiseDistribution", @@ -1115,3 +1136,1179 @@ def __getitem__(self, item) -> Parameter: def n_estimated(self) -> int: """Number of estimated parameters.""" return sum(p.estimate for p in self.parameters) + + +"""PEtab v2 problems.""" + + +class Problem: + """ + PEtab parameter estimation problem + + A PEtab parameter estimation problem as defined by + + - model + - condition table + - experiment table + - measurement table + - parameter table + - observable table + - mapping table + + See also :doc:`petab:v2/documentation_data_format`. + """ + + def __init__( + self, + model: Model = None, + condition_tables: list[ConditionTable] = None, + experiment_tables: list[ExperimentTable] = None, + observable_tables: list[ObservableTable] = None, + measurement_tables: list[MeasurementTable] = None, + parameter_tables: list[ParameterTable] = None, + mapping_tables: list[MappingTable] = None, + config: ProblemConfig = None, + ): + from ..v2.lint import default_validation_tasks + + self.config = config + self.model: Model | None = model + self.validation_tasks: list[ValidationTask] = ( + default_validation_tasks.copy() + ) + + self.observable_tables = observable_tables or [ + ObservableTable(observables=[]) + ] + self.condition_tables = condition_tables or [ + ConditionTable(conditions=[]) + ] + self.experiment_tables = experiment_tables or [ + ExperimentTable(experiments=[]) + ] + self.measurement_tables = measurement_tables or [ + MeasurementTable(measurements=[]) + ] + self.mapping_tables = mapping_tables or [MappingTable(mappings=[])] + self.parameter_tables = parameter_tables or [ + ParameterTable(parameters=[]) + ] + + def __str__(self): + model = f"with model ({self.model})" if self.model else "without model" + + ne = len(self.experiments) + experiments = f"{ne} experiments" + + nc = len(self.conditions) + conditions = f"{nc} conditions" + + no = len(self.observables) + observables = f"{no} observables" + + nm = len(self.measurements) + measurements = f"{nm} measurements" + + nest = sum(pt.n_estimated for pt in self.parameter_tables) + parameters = f"{nest} estimated parameters" + + return ( + f"PEtab Problem {model}, {conditions}, {experiments}, " + f"{observables}, {measurements}, {parameters}" + ) + + def __getitem__(self, key): + """Get PEtab entity by ID. + + This allows accessing PEtab entities such as conditions, experiments, + observables, and parameters by their ID. + + Accessing model entities is not currently not supported. + """ + for table_list in ( + self.condition_tables, + self.experiment_tables, + self.observable_tables, + self.measurement_tables, + self.parameter_tables, + self.mapping_tables, + ): + for table in table_list: + try: + return table[key] + except KeyError: + pass + + raise KeyError( + f"Entity with ID '{key}' not found in the PEtab problem" + ) + + @staticmethod + def from_yaml( + yaml_config: dict | Path | str, base_path: str | Path = None + ) -> Problem: + """ + Factory method to load model and tables as specified by YAML file. + + Arguments: + yaml_config: PEtab configuration as dictionary or YAML file name + base_path: Base directory or URL to resolve relative paths + """ + if isinstance(yaml_config, Path): + yaml_config = str(yaml_config) + + if isinstance(yaml_config, str): + yaml_file = yaml_config + if base_path is None: + base_path = get_path_prefix(yaml_file) + yaml_config = yaml.load_yaml(yaml_file) + else: + yaml_file = None + + validate_yaml_syntax(yaml_config) + + def get_path(filename): + if base_path is None: + return filename + return f"{base_path}/{filename}" + + if (format_version := parse_version(yaml_config[C.FORMAT_VERSION]))[ + 0 + ] != 2: + # If we got a path to a v1 yaml file, try to auto-upgrade + from tempfile import TemporaryDirectory + + from .petab1to2 import petab1to2 + + if format_version[0] == 1 and yaml_file: + logging.debug( + "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" + ) + with TemporaryDirectory() as tmpdirname: + try: + petab1to2(yaml_file, output_dir=tmpdirname) + except Exception as e: + raise ValueError( + "Failed to auto-upgrade PEtab 1.0 problem to " + "PEtab 2.0" + ) from e + return Problem.from_yaml( + Path(tmpdirname) / Path(yaml_file).name + ) + raise ValueError( + "Provided PEtab files are of unsupported version " + f"{yaml_config[C.FORMAT_VERSION]}." + ) + + if len(yaml_config[C.MODEL_FILES]) > 1: + raise ValueError( + "petab.v2.Problem.from_yaml() can only be used for " + "yaml files comprising a single model. " + "Consider using " + "petab.v2.CompositeProblem.from_yaml() instead." + ) + config = ProblemConfig( + **yaml_config, base_path=base_path, filepath=yaml_file + ) + parameter_tables = [ + ParameterTable.from_tsv(get_path(f)) + for f in config.parameter_files + ] + + if len(config.model_files or []) > 1: + # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 + raise NotImplementedError( + "Support for multiple models is not yet implemented." + ) + model = None + if config.model_files: + model_id, model_info = next(iter(config.model_files.items())) + model = model_factory( + get_path(model_info.location), + model_info.language, + model_id=model_id, + ) + + measurement_tables = ( + [ + MeasurementTable.from_tsv(get_path(f)) + for f in config.measurement_files + ] + if config.measurement_files + else None + ) + + condition_tables = ( + [ + ConditionTable.from_tsv(get_path(f)) + for f in config.condition_files + ] + if config.condition_files + else None + ) + + experiment_tables = ( + [ + ExperimentTable.from_tsv(get_path(f)) + for f in config.experiment_files + ] + if config.experiment_files + else None + ) + + observable_tables = ( + [ + ObservableTable.from_tsv(get_path(f)) + for f in config.observable_files + ] + if config.observable_files + else None + ) + + mapping_tables = ( + [MappingTable.from_tsv(get_path(f)) for f in config.mapping_files] + if config.mapping_files + else None + ) + + return Problem( + config=config, + model=model, + condition_tables=condition_tables, + experiment_tables=experiment_tables, + observable_tables=observable_tables, + measurement_tables=measurement_tables, + parameter_tables=parameter_tables, + mapping_tables=mapping_tables, + ) + + @staticmethod + def from_dfs( + model: Model = None, + condition_df: pd.DataFrame = None, + experiment_df: pd.DataFrame = None, + measurement_df: pd.DataFrame = None, + parameter_df: pd.DataFrame = None, + observable_df: pd.DataFrame = None, + mapping_df: pd.DataFrame = None, + config: ProblemConfig = None, + ): + """ + Construct a PEtab problem from dataframes. + + Parameters: + condition_df: PEtab condition table + experiment_df: PEtab experiment table + measurement_df: PEtab measurement table + parameter_df: PEtab parameter table + observable_df: PEtab observable table + mapping_df: PEtab mapping table + model: The underlying model + config: The PEtab problem configuration + """ + + observable_table = ObservableTable.from_df(observable_df) + condition_table = ConditionTable.from_df(condition_df) + experiment_table = ExperimentTable.from_df(experiment_df) + measurement_table = MeasurementTable.from_df(measurement_df) + mapping_table = MappingTable.from_df(mapping_df) + parameter_table = ParameterTable.from_df(parameter_df) + + return Problem( + model=model, + condition_tables=[condition_table], + experiment_tables=[experiment_table], + observable_tables=[observable_table], + measurement_tables=[measurement_table], + parameter_tables=[parameter_table], + mapping_tables=[mapping_table], + config=config, + ) + + @staticmethod + def from_combine(filename: Path | str) -> Problem: + """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). + + See also :py:func:`petab.v2.create_combine_archive`. + + Arguments: + filename: Path to the PEtab-COMBINE archive + + Returns: + A :py:class:`petab.v2.Problem` instance. + """ + # function-level import, because module-level import interfered with + # other SWIG interfaces + try: + import libcombine + except ImportError as e: + raise ImportError( + "To use PEtab's COMBINE functionality, libcombine " + "(python-libcombine) must be installed." + ) from e + + archive = libcombine.CombineArchive() + if archive.initializeFromArchive(str(filename)) is None: + raise ValueError(f"Invalid Combine Archive: {filename}") + + with tempfile.TemporaryDirectory() as tmpdirname: + archive.extractTo(tmpdirname) + problem = Problem.from_yaml( + os.path.join(tmpdirname, archive.getMasterFile().getLocation()) + ) + archive.cleanUp() + + return problem + + @staticmethod + def get_problem(problem: str | Path | Problem) -> Problem: + """Get a PEtab problem from a file or a problem object. + + Arguments: + problem: Path to a PEtab problem file or a PEtab problem object. + + Returns: + A PEtab problem object. + """ + if isinstance(problem, Problem): + return problem + + if isinstance(problem, str | Path): + return Problem.from_yaml(problem) + + raise TypeError( + "The argument `problem` must be a path to a PEtab problem file " + "or a PEtab problem object." + ) + + @property + def condition_df(self) -> pd.DataFrame | None: + """Combined condition tables as DataFrame.""" + conditions = self.conditions + return ( + ConditionTable(conditions=conditions).to_df() + if conditions + else None + ) + + @condition_df.setter + def condition_df(self, value: pd.DataFrame): + self.condition_tables = [ConditionTable.from_df(value)] + + @property + def experiment_df(self) -> pd.DataFrame | None: + """Experiment table as DataFrame.""" + return ( + ExperimentTable(experiments=experiments).to_df() + if (experiments := self.experiments) + else None + ) + + @experiment_df.setter + def experiment_df(self, value: pd.DataFrame): + self.experiment_tables = [ExperimentTable.from_df(value)] + + @property + def measurement_df(self) -> pd.DataFrame | None: + """Combined measurement tables as DataFrame.""" + measurements = self.measurements + return ( + MeasurementTable(measurements=measurements).to_df() + if measurements + else None + ) + + @measurement_df.setter + def measurement_df(self, value: pd.DataFrame): + self.measurement_tables = [MeasurementTable.from_df(value)] + + @property + def parameter_df(self) -> pd.DataFrame | None: + """Combined parameter tables as DataFrame.""" + parameters = self.parameters + return ( + ParameterTable(parameters=parameters).to_df() + if parameters + else None + ) + + @parameter_df.setter + def parameter_df(self, value: pd.DataFrame): + self.parameter_tables = [ParameterTable.from_df(value)] + + @property + def observable_df(self) -> pd.DataFrame | None: + """Combined observable tables as DataFrame.""" + observables = self.observables + return ( + ObservableTable(observables=observables).to_df() + if observables + else None + ) + + @observable_df.setter + def observable_df(self, value: pd.DataFrame): + self.observable_tables = [ObservableTable.from_df(value)] + + @property + def mapping_df(self) -> pd.DataFrame | None: + """Combined mapping tables as DataFrame.""" + mappings = self.mappings + return MappingTable(mappings=mappings).to_df() if mappings else None + + @mapping_df.setter + def mapping_df(self, value: pd.DataFrame): + self.mapping_tables = [MappingTable.from_df(value)] + + @property + def conditions(self) -> list[Condition]: + """List of conditions in the condition table(s).""" + return list( + chain.from_iterable(ct.conditions for ct in self.condition_tables) + ) + + @property + def experiments(self) -> list[Experiment]: + """List of experiments in the experiment table(s).""" + return list( + chain.from_iterable( + et.experiments for et in self.experiment_tables + ) + ) + + @property + def observables(self) -> list[Observable]: + """List of observables in the observable table(s).""" + return list( + chain.from_iterable( + ot.observables for ot in self.observable_tables + ) + ) + + @property + def measurements(self) -> list[Measurement]: + """List of measurements in the measurement table(s).""" + return list( + chain.from_iterable( + mt.measurements for mt in self.measurement_tables + ) + ) + + @property + def parameters(self) -> list[Parameter]: + """List of parameters in the parameter table(s).""" + return list( + chain.from_iterable(pt.parameters for pt in self.parameter_tables) + ) + + @property + def mappings(self) -> list[Mapping]: + """List of mappings in the mapping table(s).""" + return list( + chain.from_iterable(mt.mappings for mt in self.mapping_tables) + ) + + def get_optimization_parameters(self) -> list[str]: + """ + Get the list of optimization parameter IDs from parameter table. + + Returns: + A list of IDs of parameters selected for optimization + (i.e., those with estimate = True). + """ + return [p.id for p in self.parameters if p.estimate] + + def get_observable_ids(self) -> list[str]: + """ + Returns dictionary of observable ids. + """ + return [o.id for o in self.observables] + + def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): + """Apply mask of only free or only fixed values. + + Parameters + ---------- + v: + The full vector the mask is to be applied to. + free: + Whether to return free parameters, i.e., parameters to estimate. + fixed: + Whether to return fixed parameters, i.e., parameters not to + estimate. + + Returns + ------- + The reduced vector with applied mask. + """ + if not free and not fixed: + return [] + if not free: + return [v[ix] for ix in self.x_fixed_indices] + if not fixed: + return [v[ix] for ix in self.x_free_indices] + return v + + def get_x_ids(self, free: bool = True, fixed: bool = True): + """Generic function to get parameter ids. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The parameter IDs. + """ + v = [p.id for p in self.parameters] + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def x_ids(self) -> list[str]: + """Parameter table parameter IDs""" + return self.get_x_ids() + + @property + def x_free_ids(self) -> list[str]: + """Parameter table parameter IDs, for free parameters.""" + return self.get_x_ids(fixed=False) + + @property + def x_fixed_ids(self) -> list[str]: + """Parameter table parameter IDs, for fixed parameters.""" + return self.get_x_ids(free=False) + + def get_x_nominal(self, free: bool = True, fixed: bool = True) -> list: + """Generic function to get parameter nominal values. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The parameter nominal values. + """ + v = [ + p.nominal_value if p.nominal_value is not None else nan + for p in self.parameters + ] + + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def x_nominal(self) -> list: + """Parameter table nominal values""" + return self.get_x_nominal() + + @property + def x_nominal_free(self) -> list: + """Parameter table nominal values, for free parameters.""" + return self.get_x_nominal(fixed=False) + + @property + def x_nominal_fixed(self) -> list: + """Parameter table nominal values, for fixed parameters.""" + return self.get_x_nominal(free=False) + + def get_lb(self, free: bool = True, fixed: bool = True): + """Generic function to get lower parameter bounds. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The lower parameter bounds. + """ + v = [p.lb if p.lb is not None else nan for p in self.parameters] + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def lb(self) -> list: + """Parameter table lower bounds.""" + return self.get_lb() + + def get_ub(self, free: bool = True, fixed: bool = True): + """Generic function to get upper parameter bounds. + + Parameters + ---------- + free: + Whether to return free parameters, i.e. parameters to estimate. + fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + + Returns + ------- + The upper parameter bounds. + """ + v = [p.ub if p.ub is not None else nan for p in self.parameters] + return self._apply_mask(v, free=free, fixed=fixed) + + @property + def ub(self) -> list: + """Parameter table upper bounds""" + return self.get_ub() + + @property + def x_free_indices(self) -> list[int]: + """Parameter table estimated parameter indices.""" + return [i for i, p in enumerate(self.parameters) if p.estimate] + + @property + def x_fixed_indices(self) -> list[int]: + """Parameter table non-estimated parameter indices.""" + return [i for i, p in enumerate(self.parameters) if not p.estimate] + + # TODO remove in v2? + def get_optimization_to_simulation_parameter_mapping(self, **kwargs): + """ + See + :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`, + to which all keyword arguments are forwarded. + """ + return ( + parameter_mapping.get_optimization_to_simulation_parameter_mapping( + condition_df=self.condition_df, + measurement_df=self.measurement_df, + parameter_df=self.parameter_df, + observable_df=self.observable_df, + model=self.model, + **kwargs, + ) + ) + + def get_priors(self) -> dict[str, Distribution]: + """Get prior distributions. + + :returns: The prior distributions for the estimated parameters. + """ + return {p.id: p.prior_dist for p in self.parameters if p.estimate} + + def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): + """Create 2D array with starting points for optimization""" + priors = self.get_priors() + return np.vstack([p.sample(n_starts) for p in priors.values()]).T + + def sample_parameter_startpoints_dict( + self, n_starts: int = 100 + ) -> list[dict[str, float]]: + """Create dictionaries with starting points for optimization + + :returns: + A list of dictionaries with parameter IDs mapping to sampled + parameter values. + """ + return [ + dict(zip(self.x_free_ids, parameter_values, strict=True)) + for parameter_values in self.sample_parameter_startpoints( + n_starts=n_starts + ) + ] + + @property + def n_estimated(self) -> int: + """The number of estimated parameters.""" + return len(self.x_free_indices) + + @property + def n_measurements(self) -> int: + """Number of measurements.""" + return sum(len(mt.measurements) for mt in self.measurement_tables) + + @property + def n_priors(self) -> int: + """Number of priors.""" + return sum(p.prior_distribution is not None for p in self.parameters) + + def validate( + self, validation_tasks: list[ValidationTask] = None + ) -> ValidationResultList: + """Validate the PEtab problem. + + Arguments: + validation_tasks: List of validation tasks to run. If ``None`` + or empty, :attr:`Problem.validation_tasks` are used. + Returns: + A list of validation results. + """ + from ..v2.lint import ( + ValidationIssue, + ValidationIssueSeverity, + ValidationResultList, + ) + + validation_results = ValidationResultList() + if self.config and self.config.extensions: + extensions = ",".join(self.config.extensions.keys()) + validation_results.append( + ValidationIssue( + ValidationIssueSeverity.WARNING, + "Validation of PEtab extensions is not yet implemented, " + "but the given problem uses the following extensions: " + f"{extensions}", + ) + ) + + for task in validation_tasks or self.validation_tasks: + try: + cur_result = task.run(self) + except Exception as e: + cur_result = ValidationIssue( + ValidationIssueSeverity.CRITICAL, + f"Validation task {task} failed with exception: {e}\n" + f"{traceback.format_exc()}", + ) + + if cur_result: + validation_results.append(cur_result) + + if cur_result.level == ValidationIssueSeverity.CRITICAL: + break + + return validation_results + + def add_condition( + self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr + ): + """Add a simulation condition to the problem. + + If there are more than one condition tables, the condition + is added to the last one. + + Arguments: + id_: The condition id + name: The condition name. If given, this will be added to the + last mapping table. If no mapping table exists, + a new mapping table will be created. + kwargs: Entities to be added to the condition table in the form + `target_id=target_value`. + """ + if not kwargs: + raise ValueError("Cannot add condition without any changes") + + changes = [ + Change(target_id=target_id, target_value=target_value) + for target_id, target_value in kwargs.items() + ] + if not self.condition_tables: + self.condition_tables.append(ConditionTable(conditions=[])) + self.condition_tables[-1].conditions.append( + Condition(id=id_, changes=changes) + ) + if name is not None: + self.add_mapping(petab_id=id_, name=name) + + def add_observable( + self, + id_: str, + formula: str, + noise_formula: str | float | int = None, + noise_distribution: str = None, + observable_placeholders: list[str] = None, + noise_placeholders: list[str] = None, + name: str = None, + **kwargs, + ): + """Add an observable to the problem. + + If there are more than one observable tables, the observable + is added to the last one. + + Arguments: + id_: The observable id + formula: The observable formula + noise_formula: The noise formula + noise_distribution: The noise distribution + observable_placeholders: Placeholders for the observable formula + noise_placeholders: Placeholders for the noise formula + name: The observable name + kwargs: additional columns/values to add to the observable table + + """ + record = { + C.OBSERVABLE_ID: id_, + C.OBSERVABLE_FORMULA: formula, + } + if name is not None: + record[C.OBSERVABLE_NAME] = name + if noise_formula is not None: + record[C.NOISE_FORMULA] = noise_formula + if noise_distribution is not None: + record[C.NOISE_DISTRIBUTION] = noise_distribution + if observable_placeholders is not None: + record[C.OBSERVABLE_PLACEHOLDERS] = observable_placeholders + if noise_placeholders is not None: + record[C.NOISE_PLACEHOLDERS] = noise_placeholders + record.update(kwargs) + + if not self.observable_tables: + self.observable_tables.append(ObservableTable(observables=[])) + + self.observable_tables[-1] += Observable(**record) + + def add_parameter( + self, + id_: str, + estimate: bool | str = True, + nominal_value: Number | None = None, + lb: Number = None, + ub: Number = None, + prior_dist: str = None, + prior_pars: str | Sequence = None, + **kwargs, + ): + """Add a parameter to the problem. + + If there are more than one parameter tables, the parameter + is added to the last one. + + Arguments: + id_: The parameter id + estimate: Whether the parameter is estimated + nominal_value: The nominal value of the parameter + lb: The lower bound of the parameter + ub: The upper bound of the parameter + prior_dist: The type of the prior distribution + prior_pars: The parameters of the prior distribution + kwargs: additional columns/values to add to the parameter table + """ + record = { + C.PARAMETER_ID: id_, + } + if estimate is not None: + record[C.ESTIMATE] = estimate + if nominal_value is not None: + record[C.NOMINAL_VALUE] = nominal_value + if lb is not None: + record[C.LOWER_BOUND] = lb + if ub is not None: + record[C.UPPER_BOUND] = ub + if prior_dist is not None: + record[C.PRIOR_DISTRIBUTION] = prior_dist + if prior_pars is not None: + if isinstance(prior_pars, Sequence) and not isinstance( + prior_pars, str + ): + prior_pars = C.PARAMETER_SEPARATOR.join(map(str, prior_pars)) + record[C.PRIOR_PARAMETERS] = prior_pars + record.update(kwargs) + + if not self.parameter_tables: + self.parameter_tables.append(ParameterTable(parameters=[])) + + self.parameter_tables[-1] += Parameter(**record) + + def add_measurement( + self, + obs_id: str, + experiment_id: str, + time: float, + measurement: float, + observable_parameters: Sequence[str | float] | str | float = None, + noise_parameters: Sequence[str | float] | str | float = None, + ): + """Add a measurement to the problem. + + If there are more than one measurement tables, the measurement + is added to the last one. + + Arguments: + obs_id: The observable ID + experiment_id: The experiment ID + time: The measurement time + measurement: The measurement value + observable_parameters: The observable parameters + noise_parameters: The noise parameters + """ + if observable_parameters is not None and not isinstance( + observable_parameters, Sequence + ): + observable_parameters = [observable_parameters] + if noise_parameters is not None and not isinstance( + noise_parameters, Sequence + ): + noise_parameters = [noise_parameters] + + if not self.measurement_tables: + self.measurement_tables.append(MeasurementTable(measurements=[])) + + self.measurement_tables[-1].measurements.append( + Measurement( + observable_id=obs_id, + experiment_id=experiment_id, + time=time, + measurement=measurement, + observable_parameters=observable_parameters, + noise_parameters=noise_parameters, + ) + ) + + def add_mapping( + self, petab_id: str, model_id: str = None, name: str = None + ): + """Add a mapping table entry to the problem. + + If there are more than one mapping tables, the mapping + is added to the last one. + + Arguments: + petab_id: The new PEtab-compatible ID mapping to `model_id` + model_id: The ID of some entity in the model + name: A name (any string) for the entity referenced by `petab_id`. + """ + if not self.mapping_tables: + self.mapping_tables.append(MappingTable(mappings=[])) + self.mapping_tables[-1].mappings.append( + Mapping(petab_id=petab_id, model_id=model_id, name=name) + ) + + def add_experiment(self, id_: str, *args): + """Add an experiment to the problem. + + If there are more than one experiment tables, the experiment + is added to the last one. + + :param id_: The experiment ID. + :param args: Timepoints and associated conditions: + ``time_1, condition_id_1, time_2, condition_id_2, ...``. + """ + if len(args) % 2 != 0: + raise ValueError( + "Arguments must be pairs of timepoints and condition IDs." + ) + + periods = [ + ExperimentPeriod( + time=args[i], + condition_ids=[cond] + if isinstance((cond := args[i + 1]), str) + else cond, + ) + for i in range(0, len(args), 2) + ] + + if not self.experiment_tables: + self.experiment_tables.append(ExperimentTable(experiments=[])) + self.experiment_tables[-1].experiments.append( + Experiment(id=id_, periods=periods) + ) + + def __iadd__(self, other): + """Add Observable, Parameter, Measurement, Condition, or Experiment""" + from .core import ( + Condition, + Experiment, + Measurement, + Observable, + Parameter, + ) + + if isinstance(other, Observable): + if not self.observable_tables: + self.observable_tables.append(ObservableTable(observables=[])) + self.observable_tables[-1] += other + elif isinstance(other, Parameter): + if not self.parameter_tables: + self.parameter_tables.append(ParameterTable(parameters=[])) + self.parameter_tables[-1] += other + elif isinstance(other, Measurement): + if not self.measurement_tables: + self.measurement_tables.append( + MeasurementTable(measurements=[]) + ) + self.measurement_tables[-1] += other + elif isinstance(other, Condition): + if not self.condition_tables: + self.condition_tables.append(ConditionTable(conditions=[])) + self.condition_tables[-1] += other + elif isinstance(other, Experiment): + if not self.experiment_tables: + self.experiment_tables.append(ExperimentTable(experiments=[])) + self.experiment_tables[-1] += other + else: + raise ValueError( + f"Cannot add object of type {type(other)} to Problem." + ) + return self + + def model_dump(self, **kwargs) -> dict[str, Any]: + """Convert this Problem to a dictionary. + + This function is intended for debugging purposes and should not be + used for serialization. The output of this function may change + without notice. + + The output includes all PEtab tables, but not the model itself. + + See `pydantic.BaseModel.model_dump `__ + for details. + + :example: + + >>> from pprint import pprint + >>> p = Problem() + >>> p += Parameter(id="par", lb=0, ub=1) + >>> pprint(p.model_dump()) + {'conditions': [], + 'config': {'condition_files': [], + 'experiment_files': [], + 'extensions': {}, + 'format_version': '2.0.0', + 'mapping_files': [], + 'measurement_files': [], + 'model_files': {}, + 'observable_files': [], + 'parameter_file': []}, + 'experiments': [], + 'mappings': [], + 'measurements': [], + 'observables': [], + 'parameters': [{'estimate': 'true', + 'id': 'par', + 'lb': 0.0, + 'nominal_value': None, + 'prior_distribution': '', + 'prior_parameters': '', + 'ub': 1.0}]} + """ + res = { + "config": (self.config or ProblemConfig()).model_dump( + **kwargs, by_alias=True + ), + } + for field, table_list in ( + ("conditions", self.condition_tables), + ("experiments", self.experiment_tables), + ("observables", self.observable_tables), + ("measurements", self.measurement_tables), + ("parameters", self.parameter_tables), + ("mappings", self.mapping_tables), + ): + res[field] = ( + [table.model_dump(**kwargs) for table in table_list] + if table_list + else [] + ) + return res + + +class ModelFile(BaseModel): + """A file in the PEtab problem configuration.""" + + location: AnyUrl | Path + language: str + + model_config = ConfigDict( + validate_assignment=True, + ) + + +class ExtensionConfig(BaseModel): + """The configuration of a PEtab extension.""" + + version: str + config: dict + + +class ProblemConfig(BaseModel): + """The PEtab problem configuration.""" + + #: The path to the PEtab problem configuration. + filepath: AnyUrl | Path | None = Field( + None, + description="The path to the PEtab problem configuration.", + exclude=True, + ) + #: The base path to resolve relative paths. + base_path: AnyUrl | Path | None = Field( + None, + description="The base path to resolve relative paths.", + exclude=True, + ) + #: The PEtab format version. + format_version: str = "2.0.0" + #: The path to the parameter file, relative to ``base_path``. + # TODO https://github.com/PEtab-dev/PEtab/pull/641: + # rename to parameter_files in yaml for consistency with other files? + # always a list? + parameter_files: list[AnyUrl | Path] = Field( + default=[], alias=C.PARAMETER_FILES + ) + + model_files: dict[str, ModelFile] | None = {} + measurement_files: list[AnyUrl | Path] = [] + condition_files: list[AnyUrl | Path] = [] + experiment_files: list[AnyUrl | Path] = [] + observable_files: list[AnyUrl | Path] = [] + mapping_files: list[AnyUrl | Path] = [] + + #: Extensions used by the problem. + extensions: list[ExtensionConfig] | dict = {} + + model_config = ConfigDict( + validate_assignment=True, + ) + + # convert parameter_file to list + @field_validator( + "parameter_files", + mode="before", + ) + def _convert_parameter_file(cls, v): + """Convert parameter_file to a list.""" + if isinstance(v, str): + return [v] + if isinstance(v, list): + return v + raise ValueError( + "parameter_files must be a string or a list of strings." + ) + + def to_yaml(self, filename: str | Path): + """Write the configuration to a YAML file. + + :param filename: Destination file name. The parent directory will be + created if necessary. + """ + from ..v1.yaml import write_yaml + + data = self.model_dump(by_alias=True) + # convert Paths to strings for YAML serialization + for key in ( + "measurement_files", + "condition_files", + "experiment_files", + "observable_files", + "mapping_files", + "parameter_files", + ): + data[key] = list(map(str, data[key])) + + for model_id in data.get("model_files", {}): + data["model_files"][model_id][C.MODEL_LOCATION] = str( + data["model_files"][model_id]["location"] + ) + + write_yaml(data, filename) + + @property + def format_version_tuple(self) -> tuple[int, int, int, str]: + """The format version as a tuple of major/minor/patch `int`s and a + suffix.""" + return parse_version(self.format_version) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 3a3350e7..f323ef06 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -15,8 +15,7 @@ import sympy as sp from ..v2.C import * -from .core import PriorDistribution -from .problem import Problem +from .core import PriorDistribution, Problem logger = logging.getLogger(__name__) diff --git a/petab/v2/problem.py b/petab/v2/problem.py deleted file mode 100644 index 93362f62..00000000 --- a/petab/v2/problem.py +++ /dev/null @@ -1,1233 +0,0 @@ -"""PEtab v2 problems.""" - -from __future__ import annotations - -import logging -import os -import tempfile -import traceback -from collections.abc import Sequence -from itertools import chain -from math import nan -from numbers import Number -from pathlib import Path -from typing import TYPE_CHECKING, Any - -import numpy as np -import pandas as pd -import sympy as sp -from pydantic import ( - AnyUrl, - BaseModel, - ConfigDict, - Field, - field_validator, -) - -from ..v1 import ( - parameter_mapping, - validate_yaml_syntax, - yaml, -) -from ..v1.distributions import Distribution -from ..v1.models.model import Model, model_factory -from ..v1.yaml import get_path_prefix -from ..v2.C import * # noqa: F403 -from ..versions import parse_version -from . import core - -if TYPE_CHECKING: - from ..v2.lint import ValidationResultList, ValidationTask - - -__all__ = ["Problem", "ProblemConfig"] - - -class Problem: - """ - PEtab parameter estimation problem - - A PEtab parameter estimation problem as defined by - - - model - - condition table - - experiment table - - measurement table - - parameter table - - observable table - - mapping table - - See also :doc:`petab:v2/documentation_data_format`. - """ - - def __init__( - self, - model: Model = None, - condition_tables: list[core.ConditionTable] = None, - experiment_tables: list[core.ExperimentTable] = None, - observable_tables: list[core.ObservableTable] = None, - measurement_tables: list[core.MeasurementTable] = None, - parameter_tables: list[core.ParameterTable] = None, - mapping_tables: list[core.MappingTable] = None, - config: ProblemConfig = None, - ): - from ..v2.lint import default_validation_tasks - - self.config = config - self.model: Model | None = model - self.validation_tasks: list[ValidationTask] = ( - default_validation_tasks.copy() - ) - - self.observable_tables = observable_tables or [ - core.ObservableTable(observables=[]) - ] - self.condition_tables = condition_tables or [ - core.ConditionTable(conditions=[]) - ] - self.experiment_tables = experiment_tables or [ - core.ExperimentTable(experiments=[]) - ] - self.measurement_tables = measurement_tables or [ - core.MeasurementTable(measurements=[]) - ] - self.mapping_tables = mapping_tables or [ - core.MappingTable(mappings=[]) - ] - self.parameter_tables = parameter_tables or [ - core.ParameterTable(parameters=[]) - ] - - def __str__(self): - model = f"with model ({self.model})" if self.model else "without model" - - ne = len(self.experiments) - experiments = f"{ne} experiments" - - nc = len(self.conditions) - conditions = f"{nc} conditions" - - no = len(self.observables) - observables = f"{no} observables" - - nm = len(self.measurements) - measurements = f"{nm} measurements" - - nest = sum(pt.n_estimated for pt in self.parameter_tables) - parameters = f"{nest} estimated parameters" - - return ( - f"PEtab Problem {model}, {conditions}, {experiments}, " - f"{observables}, {measurements}, {parameters}" - ) - - def __getitem__(self, key): - """Get PEtab entity by ID. - - This allows accessing PEtab entities such as conditions, experiments, - observables, and parameters by their ID. - - Accessing model entities is not currently not supported. - """ - for table_list in ( - self.condition_tables, - self.experiment_tables, - self.observable_tables, - self.measurement_tables, - self.parameter_tables, - self.mapping_tables, - ): - for table in table_list: - try: - return table[key] - except KeyError: - pass - - raise KeyError( - f"Entity with ID '{key}' not found in the PEtab problem" - ) - - @staticmethod - def from_yaml( - yaml_config: dict | Path | str, base_path: str | Path = None - ) -> Problem: - """ - Factory method to load model and tables as specified by YAML file. - - Arguments: - yaml_config: PEtab configuration as dictionary or YAML file name - base_path: Base directory or URL to resolve relative paths - """ - if isinstance(yaml_config, Path): - yaml_config = str(yaml_config) - - if isinstance(yaml_config, str): - yaml_file = yaml_config - if base_path is None: - base_path = get_path_prefix(yaml_file) - yaml_config = yaml.load_yaml(yaml_file) - else: - yaml_file = None - - validate_yaml_syntax(yaml_config) - - def get_path(filename): - if base_path is None: - return filename - return f"{base_path}/{filename}" - - if (format_version := parse_version(yaml_config[FORMAT_VERSION]))[ - 0 - ] != 2: - # If we got a path to a v1 yaml file, try to auto-upgrade - from tempfile import TemporaryDirectory - - from .petab1to2 import petab1to2 - - if format_version[0] == 1 and yaml_file: - logging.debug( - "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" - ) - with TemporaryDirectory() as tmpdirname: - try: - petab1to2(yaml_file, output_dir=tmpdirname) - except Exception as e: - raise ValueError( - "Failed to auto-upgrade PEtab 1.0 problem to " - "PEtab 2.0" - ) from e - return Problem.from_yaml( - Path(tmpdirname) / Path(yaml_file).name - ) - raise ValueError( - "Provided PEtab files are of unsupported version " - f"{yaml_config[FORMAT_VERSION]}." - ) - - if len(yaml_config[MODEL_FILES]) > 1: - raise ValueError( - "petab.v2.Problem.from_yaml() can only be used for " - "yaml files comprising a single model. " - "Consider using " - "petab.v2.CompositeProblem.from_yaml() instead." - ) - config = ProblemConfig( - **yaml_config, base_path=base_path, filepath=yaml_file - ) - parameter_tables = [ - core.ParameterTable.from_tsv(get_path(f)) - for f in config.parameter_files - ] - - if len(config.model_files or []) > 1: - # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 - raise NotImplementedError( - "Support for multiple models is not yet implemented." - ) - model = None - if config.model_files: - model_id, model_info = next(iter(config.model_files.items())) - model = model_factory( - get_path(model_info.location), - model_info.language, - model_id=model_id, - ) - - measurement_tables = ( - [ - core.MeasurementTable.from_tsv(get_path(f)) - for f in config.measurement_files - ] - if config.measurement_files - else None - ) - - condition_tables = ( - [ - core.ConditionTable.from_tsv(get_path(f)) - for f in config.condition_files - ] - if config.condition_files - else None - ) - - experiment_tables = ( - [ - core.ExperimentTable.from_tsv(get_path(f)) - for f in config.experiment_files - ] - if config.experiment_files - else None - ) - - observable_tables = ( - [ - core.ObservableTable.from_tsv(get_path(f)) - for f in config.observable_files - ] - if config.observable_files - else None - ) - - mapping_tables = ( - [ - core.MappingTable.from_tsv(get_path(f)) - for f in config.mapping_files - ] - if config.mapping_files - else None - ) - - return Problem( - config=config, - model=model, - condition_tables=condition_tables, - experiment_tables=experiment_tables, - observable_tables=observable_tables, - measurement_tables=measurement_tables, - parameter_tables=parameter_tables, - mapping_tables=mapping_tables, - ) - - @staticmethod - def from_dfs( - model: Model = None, - condition_df: pd.DataFrame = None, - experiment_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, - config: ProblemConfig = None, - ): - """ - Construct a PEtab problem from dataframes. - - Parameters: - condition_df: PEtab condition table - experiment_df: PEtab experiment table - measurement_df: PEtab measurement table - parameter_df: PEtab parameter table - observable_df: PEtab observable table - mapping_df: PEtab mapping table - model: The underlying model - config: The PEtab problem configuration - """ - - observable_table = core.ObservableTable.from_df(observable_df) - condition_table = core.ConditionTable.from_df(condition_df) - experiment_table = core.ExperimentTable.from_df(experiment_df) - measurement_table = core.MeasurementTable.from_df(measurement_df) - mapping_table = core.MappingTable.from_df(mapping_df) - parameter_table = core.ParameterTable.from_df(parameter_df) - - return Problem( - model=model, - condition_tables=[condition_table], - experiment_tables=[experiment_table], - observable_tables=[observable_table], - measurement_tables=[measurement_table], - parameter_tables=[parameter_table], - mapping_tables=[mapping_table], - config=config, - ) - - @staticmethod - def from_combine(filename: Path | str) -> Problem: - """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). - - See also :py:func:`petab.v2.create_combine_archive`. - - Arguments: - filename: Path to the PEtab-COMBINE archive - - Returns: - A :py:class:`petab.v2.Problem` instance. - """ - # function-level import, because module-level import interfered with - # other SWIG interfaces - try: - import libcombine - except ImportError as e: - raise ImportError( - "To use PEtab's COMBINE functionality, libcombine " - "(python-libcombine) must be installed." - ) from e - - archive = libcombine.CombineArchive() - if archive.initializeFromArchive(str(filename)) is None: - raise ValueError(f"Invalid Combine Archive: {filename}") - - with tempfile.TemporaryDirectory() as tmpdirname: - archive.extractTo(tmpdirname) - problem = Problem.from_yaml( - os.path.join(tmpdirname, archive.getMasterFile().getLocation()) - ) - archive.cleanUp() - - return problem - - @staticmethod - def get_problem(problem: str | Path | Problem) -> Problem: - """Get a PEtab problem from a file or a problem object. - - Arguments: - problem: Path to a PEtab problem file or a PEtab problem object. - - Returns: - A PEtab problem object. - """ - if isinstance(problem, Problem): - return problem - - if isinstance(problem, str | Path): - return Problem.from_yaml(problem) - - raise TypeError( - "The argument `problem` must be a path to a PEtab problem file " - "or a PEtab problem object." - ) - - @property - def condition_df(self) -> pd.DataFrame | None: - """Combined condition tables as DataFrame.""" - conditions = self.conditions - return ( - core.ConditionTable(conditions=conditions).to_df() - if conditions - else None - ) - - @condition_df.setter - def condition_df(self, value: pd.DataFrame): - self.condition_tables = [core.ConditionTable.from_df(value)] - - @property - def experiment_df(self) -> pd.DataFrame | None: - """Experiment table as DataFrame.""" - return ( - core.ExperimentTable(experiments=experiments).to_df() - if (experiments := self.experiments) - else None - ) - - @experiment_df.setter - def experiment_df(self, value: pd.DataFrame): - self.experiment_tables = [core.ExperimentTable.from_df(value)] - - @property - def measurement_df(self) -> pd.DataFrame | None: - """Combined measurement tables as DataFrame.""" - measurements = self.measurements - return ( - core.MeasurementTable(measurements=measurements).to_df() - if measurements - else None - ) - - @measurement_df.setter - def measurement_df(self, value: pd.DataFrame): - self.measurement_tables = [core.MeasurementTable.from_df(value)] - - @property - def parameter_df(self) -> pd.DataFrame | None: - """Combined parameter tables as DataFrame.""" - parameters = self.parameters - return ( - core.ParameterTable(parameters=parameters).to_df() - if parameters - else None - ) - - @parameter_df.setter - def parameter_df(self, value: pd.DataFrame): - self.parameter_tables = [core.ParameterTable.from_df(value)] - - @property - def observable_df(self) -> pd.DataFrame | None: - """Combined observable tables as DataFrame.""" - observables = self.observables - return ( - core.ObservableTable(observables=observables).to_df() - if observables - else None - ) - - @observable_df.setter - def observable_df(self, value: pd.DataFrame): - self.observable_tables = [core.ObservableTable.from_df(value)] - - @property - def mapping_df(self) -> pd.DataFrame | None: - """Combined mapping tables as DataFrame.""" - mappings = self.mappings - return ( - core.MappingTable(mappings=mappings).to_df() if mappings else None - ) - - @mapping_df.setter - def mapping_df(self, value: pd.DataFrame): - self.mapping_tables = [core.MappingTable.from_df(value)] - - @property - def conditions(self) -> list[core.Condition]: - """List of conditions in the condition table(s).""" - return list( - chain.from_iterable(ct.conditions for ct in self.condition_tables) - ) - - @property - def experiments(self) -> list[core.Experiment]: - """List of experiments in the experiment table(s).""" - return list( - chain.from_iterable( - et.experiments for et in self.experiment_tables - ) - ) - - @property - def observables(self) -> list[core.Observable]: - """List of observables in the observable table(s).""" - return list( - chain.from_iterable( - ot.observables for ot in self.observable_tables - ) - ) - - @property - def measurements(self) -> list[core.Measurement]: - """List of measurements in the measurement table(s).""" - return list( - chain.from_iterable( - mt.measurements for mt in self.measurement_tables - ) - ) - - @property - def parameters(self) -> list[core.Parameter]: - """List of parameters in the parameter table(s).""" - return list( - chain.from_iterable(pt.parameters for pt in self.parameter_tables) - ) - - @property - def mappings(self) -> list[core.Mapping]: - """List of mappings in the mapping table(s).""" - return list( - chain.from_iterable(mt.mappings for mt in self.mapping_tables) - ) - - def get_optimization_parameters(self) -> list[str]: - """ - Get the list of optimization parameter IDs from parameter table. - - Returns: - A list of IDs of parameters selected for optimization - (i.e., those with estimate = True). - """ - return [p.id for p in self.parameters if p.estimate] - - def get_observable_ids(self) -> list[str]: - """ - Returns dictionary of observable ids. - """ - return [o.id for o in self.observables] - - def _apply_mask(self, v: list, free: bool = True, fixed: bool = True): - """Apply mask of only free or only fixed values. - - Parameters - ---------- - v: - The full vector the mask is to be applied to. - free: - Whether to return free parameters, i.e., parameters to estimate. - fixed: - Whether to return fixed parameters, i.e., parameters not to - estimate. - - Returns - ------- - The reduced vector with applied mask. - """ - if not free and not fixed: - return [] - if not free: - return [v[ix] for ix in self.x_fixed_indices] - if not fixed: - return [v[ix] for ix in self.x_free_indices] - return v - - def get_x_ids(self, free: bool = True, fixed: bool = True): - """Generic function to get parameter ids. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - - Returns - ------- - The parameter IDs. - """ - v = [p.id for p in self.parameters] - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def x_ids(self) -> list[str]: - """Parameter table parameter IDs""" - return self.get_x_ids() - - @property - def x_free_ids(self) -> list[str]: - """Parameter table parameter IDs, for free parameters.""" - return self.get_x_ids(fixed=False) - - @property - def x_fixed_ids(self) -> list[str]: - """Parameter table parameter IDs, for fixed parameters.""" - return self.get_x_ids(free=False) - - def get_x_nominal(self, free: bool = True, fixed: bool = True) -> list: - """Generic function to get parameter nominal values. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - - Returns - ------- - The parameter nominal values. - """ - v = [ - p.nominal_value if p.nominal_value is not None else nan - for p in self.parameters - ] - - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def x_nominal(self) -> list: - """Parameter table nominal values""" - return self.get_x_nominal() - - @property - def x_nominal_free(self) -> list: - """Parameter table nominal values, for free parameters.""" - return self.get_x_nominal(fixed=False) - - @property - def x_nominal_fixed(self) -> list: - """Parameter table nominal values, for fixed parameters.""" - return self.get_x_nominal(free=False) - - def get_lb(self, free: bool = True, fixed: bool = True): - """Generic function to get lower parameter bounds. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - - Returns - ------- - The lower parameter bounds. - """ - v = [p.lb if p.lb is not None else nan for p in self.parameters] - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def lb(self) -> list: - """Parameter table lower bounds.""" - return self.get_lb() - - def get_ub(self, free: bool = True, fixed: bool = True): - """Generic function to get upper parameter bounds. - - Parameters - ---------- - free: - Whether to return free parameters, i.e. parameters to estimate. - fixed: - Whether to return fixed parameters, i.e. parameters not to - estimate. - - Returns - ------- - The upper parameter bounds. - """ - v = [p.ub if p.ub is not None else nan for p in self.parameters] - return self._apply_mask(v, free=free, fixed=fixed) - - @property - def ub(self) -> list: - """Parameter table upper bounds""" - return self.get_ub() - - @property - def x_free_indices(self) -> list[int]: - """Parameter table estimated parameter indices.""" - return [i for i, p in enumerate(self.parameters) if p.estimate] - - @property - def x_fixed_indices(self) -> list[int]: - """Parameter table non-estimated parameter indices.""" - return [i for i, p in enumerate(self.parameters) if not p.estimate] - - # TODO remove in v2? - def get_optimization_to_simulation_parameter_mapping(self, **kwargs): - """ - See - :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`, - to which all keyword arguments are forwarded. - """ - return ( - parameter_mapping.get_optimization_to_simulation_parameter_mapping( - condition_df=self.condition_df, - measurement_df=self.measurement_df, - parameter_df=self.parameter_df, - observable_df=self.observable_df, - model=self.model, - **kwargs, - ) - ) - - def get_priors(self) -> dict[str, Distribution]: - """Get prior distributions. - - :returns: The prior distributions for the estimated parameters. - """ - return {p.id: p.prior_dist for p in self.parameters if p.estimate} - - def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): - """Create 2D array with starting points for optimization""" - priors = self.get_priors() - return np.vstack([p.sample(n_starts) for p in priors.values()]).T - - def sample_parameter_startpoints_dict( - self, n_starts: int = 100 - ) -> list[dict[str, float]]: - """Create dictionaries with starting points for optimization - - :returns: - A list of dictionaries with parameter IDs mapping to sampled - parameter values. - """ - return [ - dict(zip(self.x_free_ids, parameter_values, strict=True)) - for parameter_values in self.sample_parameter_startpoints( - n_starts=n_starts - ) - ] - - @property - def n_estimated(self) -> int: - """The number of estimated parameters.""" - return len(self.x_free_indices) - - @property - def n_measurements(self) -> int: - """Number of measurements.""" - return sum(len(mt.measurements) for mt in self.measurement_tables) - - @property - def n_priors(self) -> int: - """Number of priors.""" - return sum(p.prior_distribution is not None for p in self.parameters) - - def validate( - self, validation_tasks: list[ValidationTask] = None - ) -> ValidationResultList: - """Validate the PEtab problem. - - Arguments: - validation_tasks: List of validation tasks to run. If ``None`` - or empty, :attr:`Problem.validation_tasks` are used. - Returns: - A list of validation results. - """ - from ..v2.lint import ( - ValidationIssue, - ValidationIssueSeverity, - ValidationResultList, - ) - - validation_results = ValidationResultList() - if self.config and self.config.extensions: - extensions = ",".join(self.config.extensions.keys()) - validation_results.append( - ValidationIssue( - ValidationIssueSeverity.WARNING, - "Validation of PEtab extensions is not yet implemented, " - "but the given problem uses the following extensions: " - f"{extensions}", - ) - ) - - for task in validation_tasks or self.validation_tasks: - try: - cur_result = task.run(self) - except Exception as e: - cur_result = ValidationIssue( - ValidationIssueSeverity.CRITICAL, - f"Validation task {task} failed with exception: {e}\n" - f"{traceback.format_exc()}", - ) - - if cur_result: - validation_results.append(cur_result) - - if cur_result.level == ValidationIssueSeverity.CRITICAL: - break - - return validation_results - - def add_condition( - self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr - ): - """Add a simulation condition to the problem. - - If there are more than one condition tables, the condition - is added to the last one. - - Arguments: - id_: The condition id - name: The condition name. If given, this will be added to the - last mapping table. If no mapping table exists, - a new mapping table will be created. - kwargs: Entities to be added to the condition table in the form - `target_id=target_value`. - """ - if not kwargs: - raise ValueError("Cannot add condition without any changes") - - changes = [ - core.Change(target_id=target_id, target_value=target_value) - for target_id, target_value in kwargs.items() - ] - if not self.condition_tables: - self.condition_tables.append(core.ConditionTable(conditions=[])) - self.condition_tables[-1].conditions.append( - core.Condition(id=id_, changes=changes) - ) - if name is not None: - self.add_mapping(petab_id=id_, name=name) - - def add_observable( - self, - id_: str, - formula: str, - noise_formula: str | float | int = None, - noise_distribution: str = None, - observable_placeholders: list[str] = None, - noise_placeholders: list[str] = None, - name: str = None, - **kwargs, - ): - """Add an observable to the problem. - - If there are more than one observable tables, the observable - is added to the last one. - - Arguments: - id_: The observable id - formula: The observable formula - noise_formula: The noise formula - noise_distribution: The noise distribution - observable_placeholders: Placeholders for the observable formula - noise_placeholders: Placeholders for the noise formula - name: The observable name - kwargs: additional columns/values to add to the observable table - - """ - record = { - OBSERVABLE_ID: id_, - OBSERVABLE_FORMULA: formula, - } - if name is not None: - record[OBSERVABLE_NAME] = name - if noise_formula is not None: - record[NOISE_FORMULA] = noise_formula - if noise_distribution is not None: - record[NOISE_DISTRIBUTION] = noise_distribution - if observable_placeholders is not None: - record[OBSERVABLE_PLACEHOLDERS] = observable_placeholders - if noise_placeholders is not None: - record[NOISE_PLACEHOLDERS] = noise_placeholders - record.update(kwargs) - - if not self.observable_tables: - self.observable_tables.append(core.ObservableTable(observables=[])) - - self.observable_tables[-1] += core.Observable(**record) - - def add_parameter( - self, - id_: str, - estimate: bool | str = True, - nominal_value: Number | None = None, - lb: Number = None, - ub: Number = None, - prior_dist: str = None, - prior_pars: str | Sequence = None, - **kwargs, - ): - """Add a parameter to the problem. - - If there are more than one parameter tables, the parameter - is added to the last one. - - Arguments: - id_: The parameter id - estimate: Whether the parameter is estimated - nominal_value: The nominal value of the parameter - lb: The lower bound of the parameter - ub: The upper bound of the parameter - prior_dist: The type of the prior distribution - prior_pars: The parameters of the prior distribution - kwargs: additional columns/values to add to the parameter table - """ - record = { - PARAMETER_ID: id_, - } - if estimate is not None: - record[ESTIMATE] = estimate - if nominal_value is not None: - record[NOMINAL_VALUE] = nominal_value - if lb is not None: - record[LOWER_BOUND] = lb - if ub is not None: - record[UPPER_BOUND] = ub - if prior_dist is not None: - record[PRIOR_DISTRIBUTION] = prior_dist - if prior_pars is not None: - if isinstance(prior_pars, Sequence) and not isinstance( - prior_pars, str - ): - prior_pars = PARAMETER_SEPARATOR.join(map(str, prior_pars)) - record[PRIOR_PARAMETERS] = prior_pars - record.update(kwargs) - - if not self.parameter_tables: - self.parameter_tables.append(core.ParameterTable(parameters=[])) - - self.parameter_tables[-1] += core.Parameter(**record) - - def add_measurement( - self, - obs_id: str, - experiment_id: str, - time: float, - measurement: float, - observable_parameters: Sequence[str | float] | str | float = None, - noise_parameters: Sequence[str | float] | str | float = None, - ): - """Add a measurement to the problem. - - If there are more than one measurement tables, the measurement - is added to the last one. - - Arguments: - obs_id: The observable ID - experiment_id: The experiment ID - time: The measurement time - measurement: The measurement value - observable_parameters: The observable parameters - noise_parameters: The noise parameters - """ - if observable_parameters is not None and not isinstance( - observable_parameters, Sequence - ): - observable_parameters = [observable_parameters] - if noise_parameters is not None and not isinstance( - noise_parameters, Sequence - ): - noise_parameters = [noise_parameters] - - if not self.measurement_tables: - self.measurement_tables.append( - core.MeasurementTable(measurements=[]) - ) - - self.measurement_tables[-1].measurements.append( - core.Measurement( - observable_id=obs_id, - experiment_id=experiment_id, - time=time, - measurement=measurement, - observable_parameters=observable_parameters, - noise_parameters=noise_parameters, - ) - ) - - def add_mapping( - self, petab_id: str, model_id: str = None, name: str = None - ): - """Add a mapping table entry to the problem. - - If there are more than one mapping tables, the mapping - is added to the last one. - - Arguments: - petab_id: The new PEtab-compatible ID mapping to `model_id` - model_id: The ID of some entity in the model - name: A name (any string) for the entity referenced by `petab_id`. - """ - if not self.mapping_tables: - self.mapping_tables.append(core.MappingTable(mappings=[])) - self.mapping_tables[-1].mappings.append( - core.Mapping(petab_id=petab_id, model_id=model_id, name=name) - ) - - def add_experiment(self, id_: str, *args): - """Add an experiment to the problem. - - If there are more than one experiment tables, the experiment - is added to the last one. - - :param id_: The experiment ID. - :param args: Timepoints and associated conditions: - ``time_1, condition_id_1, time_2, condition_id_2, ...``. - """ - if len(args) % 2 != 0: - raise ValueError( - "Arguments must be pairs of timepoints and condition IDs." - ) - - periods = [ - core.ExperimentPeriod( - time=args[i], - condition_ids=[cond] - if isinstance((cond := args[i + 1]), str) - else cond, - ) - for i in range(0, len(args), 2) - ] - - if not self.experiment_tables: - self.experiment_tables.append(core.ExperimentTable(experiments=[])) - self.experiment_tables[-1].experiments.append( - core.Experiment(id=id_, periods=periods) - ) - - def __iadd__(self, other): - """Add Observable, Parameter, Measurement, Condition, or Experiment""" - from .core import ( - Condition, - Experiment, - Measurement, - Observable, - Parameter, - ) - - if isinstance(other, Observable): - if not self.observable_tables: - self.observable_tables.append( - core.ObservableTable(observables=[]) - ) - self.observable_tables[-1] += other - elif isinstance(other, Parameter): - if not self.parameter_tables: - self.parameter_tables.append( - core.ParameterTable(parameters=[]) - ) - self.parameter_tables[-1] += other - elif isinstance(other, Measurement): - if not self.measurement_tables: - self.measurement_tables.append( - core.MeasurementTable(measurements=[]) - ) - self.measurement_tables[-1] += other - elif isinstance(other, Condition): - if not self.condition_tables: - self.condition_tables.append( - core.ConditionTable(conditions=[]) - ) - self.condition_tables[-1] += other - elif isinstance(other, Experiment): - if not self.experiment_tables: - self.experiment_tables.append( - core.ExperimentTable(experiments=[]) - ) - self.experiment_tables[-1] += other - else: - raise ValueError( - f"Cannot add object of type {type(other)} to Problem." - ) - return self - - def model_dump(self, **kwargs) -> dict[str, Any]: - """Convert this Problem to a dictionary. - - This function is intended for debugging purposes and should not be - used for serialization. The output of this function may change - without notice. - - The output includes all PEtab tables, but not the model itself. - - See `pydantic.BaseModel.model_dump `__ - for details. - - :example: - - >>> from pprint import pprint - >>> p = Problem() - >>> p += core.Parameter(id="par", lb=0, ub=1) - >>> pprint(p.model_dump()) - {'conditions': [], - 'config': {'condition_files': [], - 'experiment_files': [], - 'extensions': {}, - 'format_version': '2.0.0', - 'mapping_files': [], - 'measurement_files': [], - 'model_files': {}, - 'observable_files': [], - 'parameter_file': []}, - 'experiments': [], - 'mappings': [], - 'measurements': [], - 'observables': [], - 'parameters': [{'estimate': 'true', - 'id': 'par', - 'lb': 0.0, - 'nominal_value': None, - 'prior_distribution': '', - 'prior_parameters': '', - 'ub': 1.0}]} - """ - res = { - "config": (self.config or ProblemConfig()).model_dump( - **kwargs, by_alias=True - ), - } - for field, table_list in ( - ("conditions", self.condition_tables), - ("experiments", self.experiment_tables), - ("observables", self.observable_tables), - ("measurements", self.measurement_tables), - ("parameters", self.parameter_tables), - ("mappings", self.mapping_tables), - ): - res[field] = ( - [table.model_dump(**kwargs) for table in table_list] - if table_list - else [] - ) - return res - - -class ModelFile(BaseModel): - """A file in the PEtab problem configuration.""" - - location: AnyUrl | Path - language: str - - model_config = ConfigDict( - validate_assignment=True, - ) - - -class ExtensionConfig(BaseModel): - """The configuration of a PEtab extension.""" - - version: str - config: dict - - -class ProblemConfig(BaseModel): - """The PEtab problem configuration.""" - - #: The path to the PEtab problem configuration. - filepath: AnyUrl | Path | None = Field( - None, - description="The path to the PEtab problem configuration.", - exclude=True, - ) - #: The base path to resolve relative paths. - base_path: AnyUrl | Path | None = Field( - None, - description="The base path to resolve relative paths.", - exclude=True, - ) - #: The PEtab format version. - format_version: str = "2.0.0" - #: The path to the parameter file, relative to ``base_path``. - # TODO https://github.com/PEtab-dev/PEtab/pull/641: - # rename to parameter_files in yaml for consistency with other files? - # always a list? - parameter_files: list[AnyUrl | Path] = Field( - default=[], alias=PARAMETER_FILES - ) - - model_files: dict[str, ModelFile] | None = {} - measurement_files: list[AnyUrl | Path] = [] - condition_files: list[AnyUrl | Path] = [] - experiment_files: list[AnyUrl | Path] = [] - observable_files: list[AnyUrl | Path] = [] - mapping_files: list[AnyUrl | Path] = [] - - #: Extensions used by the problem. - extensions: list[ExtensionConfig] | dict = {} - - model_config = ConfigDict( - validate_assignment=True, - ) - - # convert parameter_file to list - @field_validator( - "parameter_files", - mode="before", - ) - def _convert_parameter_file(cls, v): - """Convert parameter_file to a list.""" - if isinstance(v, str): - return [v] - if isinstance(v, list): - return v - raise ValueError( - "parameter_files must be a string or a list of strings." - ) - - def to_yaml(self, filename: str | Path): - """Write the configuration to a YAML file. - - :param filename: Destination file name. The parent directory will be - created if necessary. - """ - from ..v1.yaml import write_yaml - - data = self.model_dump(by_alias=True) - # convert Paths to strings for YAML serialization - for key in ( - "measurement_files", - "condition_files", - "experiment_files", - "observable_files", - "mapping_files", - "parameter_files", - ): - data[key] = list(map(str, data[key])) - - for model_id in data.get("model_files", {}): - data["model_files"][model_id][MODEL_LOCATION] = str( - data["model_files"][model_id]["location"] - ) - - write_yaml(data, filename) - - @property - def format_version_tuple(self) -> tuple[int, int, int, str]: - """The format version as a tuple of major/minor/patch `int`s and a - suffix.""" - return parse_version(self.format_version) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 2d55f219..5ed011e0 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -1,11 +1,32 @@ import tempfile from pathlib import Path +import numpy as np +import pandas as pd import pytest import sympy as sp -from pydantic import ValidationError +from pandas.testing import assert_frame_equal +from pydantic import AnyUrl, ValidationError from sympy.abc import x, y +import petab.v2 as petab +from petab.v2 import Problem +from petab.v2.C import ( + CONDITION_ID, + ESTIMATE, + LOWER_BOUND, + MODEL_ENTITY_ID, + NAME, + NOISE_FORMULA, + NOMINAL_VALUE, + OBSERVABLE_FORMULA, + OBSERVABLE_ID, + PARAMETER_ID, + PETAB_ENTITY_ID, + TARGET_ID, + TARGET_VALUE, + UPPER_BOUND, +) from petab.v2.core import * from petab.v2.petab1to2 import petab1to2 @@ -270,3 +291,203 @@ def test_condition_table(): ) ] ).free_symbols == {x, y} + + +def test_load_remote(): + """Test loading remote files""" + from jsonschema.exceptions import ValidationError + + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" + ) + + try: + petab_problem = Problem.from_yaml(yaml_url) + + assert ( + petab_problem.measurement_df is not None + and not petab_problem.measurement_df.empty + ) + + assert petab_problem.validate() == [] + except ValidationError: + # FIXME: Until v2 is finalized, the format of the tests will often be + # out of sync with the schema. + # Ignore validation errors for now. + pass + + +def test_auto_upgrade(): + yaml_url = ( + "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" + "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" + ) + problem = Problem.from_yaml(yaml_url) + # TODO check something specifically different in a v2 problem + assert isinstance(problem, Problem) + + +def test_problem_from_yaml_multiple_files(): + """Test loading PEtab version 2 yaml with multiple condition / measurement + / observable files + """ + yaml_config = """ + format_version: 2.0.0 + parameter_files: [] + condition_files: [conditions1.tsv, conditions2.tsv] + measurement_files: [measurements1.tsv, measurements2.tsv] + observable_files: [observables1.tsv, observables2.tsv] + model_files: {} + experiment_files: [experiments1.tsv, experiments2.tsv] + """ + with tempfile.TemporaryDirectory() as tmpdir: + yaml_path = Path(tmpdir, "problem.yaml") + with open(yaml_path, "w") as f: + f.write(yaml_config) + + for i in (1, 2): + problem = Problem() + problem.add_condition(f"condition{i}", parameter1=i) + petab.write_condition_df( + problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") + ) + + problem.add_experiment(f"experiment{i}", 0, f"condition{i}") + petab.write_experiment_df( + problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") + ) + + problem.add_measurement(f"observable{i}", f"experiment{i}", 1, 1) + petab.write_measurement_df( + problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") + ) + + problem.add_observable(f"observable{i}", 1, 1) + petab.write_observable_df( + problem.observable_df, Path(tmpdir, f"observables{i}.tsv") + ) + + petab_problem1 = petab.Problem.from_yaml(yaml_path) + + # test that we can load the problem from a dict with a custom base path + yaml_config = petab.load_yaml(yaml_path) + petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) + + for petab_problem in (petab_problem1, petab_problem2): + assert petab_problem.measurement_df.shape[0] == 2 + assert petab_problem.observable_df.shape[0] == 2 + assert petab_problem.condition_df.shape[0] == 2 + assert petab_problem.experiment_df.shape[0] == 2 + + +def test_modify_problem(): + """Test modifying a problem via the API.""" + problem = Problem() + problem.add_condition("condition1", parameter1=1) + problem.add_condition("condition2", parameter2=2) + + exp_condition_df = pd.DataFrame( + data={ + CONDITION_ID: ["condition1", "condition2"], + TARGET_ID: ["parameter1", "parameter2"], + TARGET_VALUE: [1.0, 2.0], + } + ) + assert_frame_equal( + problem.condition_df, exp_condition_df, check_dtype=False + ) + + problem.add_observable("observable1", "1") + problem.add_observable("observable2", "2", noise_formula=2.2) + + exp_observable_df = pd.DataFrame( + data={ + OBSERVABLE_ID: ["observable1", "observable2"], + OBSERVABLE_FORMULA: [1, 2], + NOISE_FORMULA: [np.nan, 2.2], + } + ).set_index([OBSERVABLE_ID]) + assert_frame_equal( + problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]].map( + lambda x: float(x) if x != "" else None + ), + exp_observable_df, + check_dtype=False, + ) + + problem.add_parameter("parameter1", True, 0, lb=1, ub=2) + problem.add_parameter("parameter2", False, 2) + + exp_parameter_df = pd.DataFrame( + data={ + PARAMETER_ID: ["parameter1", "parameter2"], + ESTIMATE: ["true", "false"], + NOMINAL_VALUE: [0.0, 2.0], + LOWER_BOUND: [1.0, np.nan], + UPPER_BOUND: [2.0, np.nan], + } + ).set_index([PARAMETER_ID]) + assert_frame_equal( + problem.parameter_df[ + [ESTIMATE, NOMINAL_VALUE, LOWER_BOUND, UPPER_BOUND] + ], + exp_parameter_df, + check_dtype=False, + ) + + problem.add_mapping("new_petab_id", "some_model_entity_id") + + exp_mapping_df = pd.DataFrame( + data={ + PETAB_ENTITY_ID: ["new_petab_id"], + MODEL_ENTITY_ID: ["some_model_entity_id"], + NAME: [None], + } + ).set_index([PETAB_ENTITY_ID]) + assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) + + +def test_sample_startpoint_shape(): + """Test startpoint sampling.""" + problem = Problem() + problem += Parameter(id="p1", estimate=True, lb=1, ub=2) + problem += Parameter( + id="p2", + estimate=True, + lb=2, + ub=3, + prior_distribution="normal", + prior_parameters=[2.5, 0.5], + ) + problem += Parameter(id="p3", estimate=False, nominal_value=1) + + n_starts = 10 + sp = problem.sample_parameter_startpoints(n_starts=n_starts) + assert sp.shape == (n_starts, 2) + + +def test_problem_config_paths(): + """Test handling of URLS and local paths in ProblemConfig.""" + + pc = petab.ProblemConfig( + parameter_files=["https://example.com/params.tsv"], + condition_files=["conditions.tsv"], + measurement_files=["measurements.tsv"], + observable_files=["observables.tsv"], + experiment_files=["experiments.tsv"], + ) + assert isinstance(pc.parameter_files[0], AnyUrl) + assert isinstance(pc.condition_files[0], Path) + assert isinstance(pc.measurement_files[0], Path) + assert isinstance(pc.observable_files[0], Path) + assert isinstance(pc.experiment_files[0], Path) + + # Auto-convert to Path on assignment + pc.parameter_files = ["foo.tsv"] + assert isinstance(pc.parameter_files[0], Path) + + # We can't easily intercept mutations to the list: + # pc.parameter_files[0] = "foo.tsv" + # assert isinstance(pc.parameter_files[0], Path) + # see also https://github.com/pydantic/pydantic/issues/8575 diff --git a/tests/v2/test_problem.py b/tests/v2/test_problem.py deleted file mode 100644 index 580c691a..00000000 --- a/tests/v2/test_problem.py +++ /dev/null @@ -1,227 +0,0 @@ -import tempfile -from pathlib import Path - -import numpy as np -import pandas as pd -from pandas.testing import assert_frame_equal -from pydantic import AnyUrl - -import petab.v2 as petab -from petab.v2 import Problem -from petab.v2.C import ( - CONDITION_ID, - ESTIMATE, - LOWER_BOUND, - MODEL_ENTITY_ID, - NAME, - NOISE_FORMULA, - NOMINAL_VALUE, - OBSERVABLE_FORMULA, - OBSERVABLE_ID, - PARAMETER_ID, - PETAB_ENTITY_ID, - TARGET_ID, - TARGET_VALUE, - UPPER_BOUND, -) -from petab.v2.core import * - - -def test_load_remote(): - """Test loading remote files""" - from jsonschema.exceptions import ValidationError - - yaml_url = ( - "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/main/petabtests/cases/v2.0.0/sbml/0010/_0010.yaml" - ) - - try: - petab_problem = Problem.from_yaml(yaml_url) - - assert ( - petab_problem.measurement_df is not None - and not petab_problem.measurement_df.empty - ) - - assert petab_problem.validate() == [] - except ValidationError: - # FIXME: Until v2 is finalized, the format of the tests will often be - # out of sync with the schema. - # Ignore validation errors for now. - pass - - -def test_auto_upgrade(): - yaml_url = ( - "https://raw.githubusercontent.com/PEtab-dev/petab_test_suite" - "/main/petabtests/cases/v1.0.0/sbml/0001/_0001.yaml" - ) - problem = Problem.from_yaml(yaml_url) - # TODO check something specifically different in a v2 problem - assert isinstance(problem, Problem) - - -def test_problem_from_yaml_multiple_files(): - """Test loading PEtab version 2 yaml with multiple condition / measurement - / observable files - """ - yaml_config = """ - format_version: 2.0.0 - parameter_files: [] - condition_files: [conditions1.tsv, conditions2.tsv] - measurement_files: [measurements1.tsv, measurements2.tsv] - observable_files: [observables1.tsv, observables2.tsv] - model_files: {} - experiment_files: [experiments1.tsv, experiments2.tsv] - """ - with tempfile.TemporaryDirectory() as tmpdir: - yaml_path = Path(tmpdir, "problem.yaml") - with open(yaml_path, "w") as f: - f.write(yaml_config) - - for i in (1, 2): - problem = Problem() - problem.add_condition(f"condition{i}", parameter1=i) - petab.write_condition_df( - problem.condition_df, Path(tmpdir, f"conditions{i}.tsv") - ) - - problem.add_experiment(f"experiment{i}", 0, f"condition{i}") - petab.write_experiment_df( - problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") - ) - - problem.add_measurement(f"observable{i}", f"experiment{i}", 1, 1) - petab.write_measurement_df( - problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") - ) - - problem.add_observable(f"observable{i}", 1, 1) - petab.write_observable_df( - problem.observable_df, Path(tmpdir, f"observables{i}.tsv") - ) - - petab_problem1 = petab.Problem.from_yaml(yaml_path) - - # test that we can load the problem from a dict with a custom base path - yaml_config = petab.load_yaml(yaml_path) - petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) - - for petab_problem in (petab_problem1, petab_problem2): - assert petab_problem.measurement_df.shape[0] == 2 - assert petab_problem.observable_df.shape[0] == 2 - assert petab_problem.condition_df.shape[0] == 2 - assert petab_problem.experiment_df.shape[0] == 2 - - -def test_modify_problem(): - """Test modifying a problem via the API.""" - problem = Problem() - problem.add_condition("condition1", parameter1=1) - problem.add_condition("condition2", parameter2=2) - - exp_condition_df = pd.DataFrame( - data={ - CONDITION_ID: ["condition1", "condition2"], - TARGET_ID: ["parameter1", "parameter2"], - TARGET_VALUE: [1.0, 2.0], - } - ) - assert_frame_equal( - problem.condition_df, exp_condition_df, check_dtype=False - ) - - problem.add_observable("observable1", "1") - problem.add_observable("observable2", "2", noise_formula=2.2) - - exp_observable_df = pd.DataFrame( - data={ - OBSERVABLE_ID: ["observable1", "observable2"], - OBSERVABLE_FORMULA: [1, 2], - NOISE_FORMULA: [np.nan, 2.2], - } - ).set_index([OBSERVABLE_ID]) - assert_frame_equal( - problem.observable_df[[OBSERVABLE_FORMULA, NOISE_FORMULA]].map( - lambda x: float(x) if x != "" else None - ), - exp_observable_df, - check_dtype=False, - ) - - problem.add_parameter("parameter1", True, 0, lb=1, ub=2) - problem.add_parameter("parameter2", False, 2) - - exp_parameter_df = pd.DataFrame( - data={ - PARAMETER_ID: ["parameter1", "parameter2"], - ESTIMATE: ["true", "false"], - NOMINAL_VALUE: [0.0, 2.0], - LOWER_BOUND: [1.0, np.nan], - UPPER_BOUND: [2.0, np.nan], - } - ).set_index([PARAMETER_ID]) - assert_frame_equal( - problem.parameter_df[ - [ESTIMATE, NOMINAL_VALUE, LOWER_BOUND, UPPER_BOUND] - ], - exp_parameter_df, - check_dtype=False, - ) - - problem.add_mapping("new_petab_id", "some_model_entity_id") - - exp_mapping_df = pd.DataFrame( - data={ - PETAB_ENTITY_ID: ["new_petab_id"], - MODEL_ENTITY_ID: ["some_model_entity_id"], - NAME: [None], - } - ).set_index([PETAB_ENTITY_ID]) - assert_frame_equal(problem.mapping_df, exp_mapping_df, check_dtype=False) - - -def test_sample_startpoint_shape(): - """Test startpoint sampling.""" - problem = Problem() - problem += Parameter(id="p1", estimate=True, lb=1, ub=2) - problem += Parameter( - id="p2", - estimate=True, - lb=2, - ub=3, - prior_distribution="normal", - prior_parameters=[2.5, 0.5], - ) - problem += Parameter(id="p3", estimate=False, nominal_value=1) - - n_starts = 10 - sp = problem.sample_parameter_startpoints(n_starts=n_starts) - assert sp.shape == (n_starts, 2) - - -def test_problem_config_paths(): - """Test handling of URLS and local paths in ProblemConfig.""" - - pc = petab.ProblemConfig( - parameter_files=["https://example.com/params.tsv"], - condition_files=["conditions.tsv"], - measurement_files=["measurements.tsv"], - observable_files=["observables.tsv"], - experiment_files=["experiments.tsv"], - ) - assert isinstance(pc.parameter_files[0], AnyUrl) - assert isinstance(pc.condition_files[0], Path) - assert isinstance(pc.measurement_files[0], Path) - assert isinstance(pc.observable_files[0], Path) - assert isinstance(pc.experiment_files[0], Path) - - # Auto-convert to Path on assignment - pc.parameter_files = ["foo.tsv"] - assert isinstance(pc.parameter_files[0], Path) - - # We can't easily intercept mutations to the list: - # pc.parameter_files[0] = "foo.tsv" - # assert isinstance(pc.parameter_files[0], Path) - # see also https://github.com/pydantic/pydantic/issues/8575 From 79166e0e5309152ae201e867d81a1466c3c2acc9 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 23 Jul 2025 19:16:32 +0200 Subject: [PATCH 074/141] v1v2-converter: handle log10-normal (#413) PEtab v2 does not support log10-type distributions. When upconverting PEtab v1 problems, replace log10-normal by log-normal and emit a warning. --- petab/v2/petab1to2.py | 11 +++++++++++ tests/v2/test_conversion.py | 3 +++ 2 files changed, 14 insertions(+) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 4b040df7..5e5c5ae7 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -4,6 +4,7 @@ import re import shutil +import warnings from contextlib import suppress from pathlib import Path from tempfile import TemporaryDirectory @@ -386,6 +387,16 @@ def update_noise_dist(row): else: new_dist = f"{trans}-{dist}" + if new_dist == "log10-normal": + warnings.warn( + f"Noise distribution `{new_dist}' for " + f"observable `{row[v1.C.OBSERVABLE_ID]}'" + f" is not supported in PEtab v2. " + "Using `log-normal` instead.", + stacklevel=2, + ) + new_dist = v2.C.LOG_NORMAL + if new_dist not in v2.C.NOISE_DISTRIBUTIONS: raise NotImplementedError( f"Noise distribution `{new_dist}' for " diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index 6bcbb22c..eb8f9d45 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -30,6 +30,9 @@ def test_petab1to2_remote(): ) +@pytest.mark.filterwarnings( + "ignore:.*Using `log-normal` instead.*:UserWarning" +) @parametrize_or_skip def test_benchmark_collection(problem_id): """Test that we can upgrade all benchmark collection models.""" From d14023047624c4931dc2dbc0d63e51adcabe5f00 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 23 Jul 2025 19:28:44 +0200 Subject: [PATCH 075/141] Add `v2.Problem.get_{changes_for_period,measurements_for_experiment}` (#411) Easier access to changes associated with a given period, and measurements associated with a given experiment. --- petab/v2/core.py | 26 +++++++++++++++ tests/v2/test_core.py | 78 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 104 insertions(+) diff --git a/petab/v2/core.py b/petab/v2/core.py index 4b8a64aa..43ed7732 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2207,6 +2207,32 @@ def model_dump(self, **kwargs) -> dict[str, Any]: ) return res + def get_changes_for_period(self, period: ExperimentPeriod) -> list[Change]: + """Get the changes for a given experiment period. + + :param period: The experiment period to get the changes for. + :return: A list of changes for the given period. + """ + return list( + chain.from_iterable( + self[condition].changes for condition in period.condition_ids + ) + ) + + def get_measurements_for_experiment( + self, experiment: Experiment + ) -> list[Measurement]: + """Get the measurements for a given experiment. + + :param experiment: The experiment to get the measurements for. + :return: A list of measurements for the given experiment. + """ + return [ + measurement + for measurement in self.measurements + if measurement.experiment_id == experiment.id + ] + class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 5ed011e0..643f9172 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -491,3 +491,81 @@ def test_problem_config_paths(): # pc.parameter_files[0] = "foo.tsv" # assert isinstance(pc.parameter_files[0], Path) # see also https://github.com/pydantic/pydantic/issues/8575 + + +def test_get_changes_for_period(): + """Test getting changes for a specific period.""" + problem = Problem() + ch1 = Change(target_id="target1", target_value=1.0) + ch2 = Change(target_id="target2", target_value=2.0) + ch3 = Change(target_id="target3", target_value=3.0) + cond1 = Condition(id="condition1_1", changes=[ch1]) + cond2 = Condition(id="condition1_2", changes=[ch2]) + cond3 = Condition(id="condition2", changes=[ch3]) + problem += cond1 + problem += cond2 + problem += cond3 + + p1 = ExperimentPeriod( + id="p1", time=0, condition_ids=["condition1_1", "condition1_2"] + ) + p2 = ExperimentPeriod(id="p2", time=1, condition_ids=["condition2"]) + problem += Experiment( + id="exp1", + periods=[p1, p2], + ) + assert problem.get_changes_for_period(p1) == [ch1, ch2] + assert problem.get_changes_for_period(p2) == [ch3] + + +def test_get_measurements_for_experiment(): + """Test getting measurements for an experiment.""" + problem = Problem() + problem += Condition( + id="condition1", + changes=[Change(target_id="target1", target_value=1.0)], + ) + problem += Condition( + id="condition2", + changes=[Change(target_id="target2", target_value=2.0)], + ) + + e1 = Experiment( + id="exp1", + periods=[ + ExperimentPeriod(id="p1", time=0, condition_ids=["condition1"]), + ], + ) + e2 = Experiment( + id="exp2", + periods=[ + ExperimentPeriod(id="p2", time=1, condition_ids=["condition2"]), + ], + ) + problem += e1 + problem += e2 + + m1 = Measurement( + observable_id="observable1", + experiment_id="exp1", + time=0, + measurement=10.0, + ) + m2 = Measurement( + observable_id="observable2", + experiment_id="exp1", + time=1, + measurement=20.0, + ) + m3 = Measurement( + observable_id="observable3", + experiment_id="exp2", + time=1, + measurement=30.0, + ) + problem += m1 + problem += m2 + problem += m3 + + assert problem.get_measurements_for_experiment(e1) == [m1, m2] + assert problem.get_measurements_for_experiment(e2) == [m3] From 8977adb5fc3757847a2872200c4e5499a01b9951 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 29 Jul 2025 06:57:20 +0200 Subject: [PATCH 076/141] Remove parameter mapping from v2.Problem (#419) Leftover from v1. Never worked. Not planned to implement. --- petab/v2/core.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 43ed7732..8170bd53 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -32,7 +32,6 @@ from typing_extensions import Self from ..v1 import ( - parameter_mapping, validate_yaml_syntax, yaml, ) @@ -1775,24 +1774,6 @@ def x_fixed_indices(self) -> list[int]: """Parameter table non-estimated parameter indices.""" return [i for i, p in enumerate(self.parameters) if not p.estimate] - # TODO remove in v2? - def get_optimization_to_simulation_parameter_mapping(self, **kwargs): - """ - See - :py:func:`petab.parameter_mapping.get_optimization_to_simulation_parameter_mapping`, - to which all keyword arguments are forwarded. - """ - return ( - parameter_mapping.get_optimization_to_simulation_parameter_mapping( - condition_df=self.condition_df, - measurement_df=self.measurement_df, - parameter_df=self.parameter_df, - observable_df=self.observable_df, - model=self.model, - **kwargs, - ) - ) - def get_priors(self) -> dict[str, Distribution]: """Get prior distributions. From 5e267691ee1d76a244e4a1eb4db935965b14c67b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 29 Jul 2025 07:21:32 +0200 Subject: [PATCH 077/141] Refactor `v2.*Tables` (#417) DRY Introduce `BaseTable` to implement common functionality of `{Observable,Parameter,...}Table`. --- doc/modules.rst | 1 - petab/v2/converters.py | 2 +- petab/v2/core.py | 443 +++++++++++++++++------------------------ tests/v2/test_core.py | 4 +- 4 files changed, 181 insertions(+), 269 deletions(-) diff --git a/doc/modules.rst b/doc/modules.rst index 6dacba5a..1eb0220c 100644 --- a/doc/modules.rst +++ b/doc/modules.rst @@ -37,5 +37,4 @@ API Reference petab.v2.experiments petab.v2.lint petab.v2.models - petab.v2.problem petab.v2.petab1to2 diff --git a/petab/v2/converters.py b/petab/v2/converters.py index f6d185b5..ae4f5888 100644 --- a/petab/v2/converters.py +++ b/petab/v2/converters.py @@ -401,7 +401,7 @@ def _add_indicators_to_conditions(self) -> None: # removed. Only keep the conditions setting our indicators. problem.condition_tables = [ ConditionTable( - conditions=[ + [ condition for condition in problem.conditions if condition.id.startswith("_petab") diff --git a/petab/v2/core.py b/petab/v2/core.py index 8170bd53..ff1efb28 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -6,13 +6,14 @@ import os import tempfile import traceback +from abc import abstractmethod from collections.abc import Sequence from enum import Enum from itertools import chain from math import nan from numbers import Number from pathlib import Path -from typing import TYPE_CHECKING, Annotated, Any +from typing import TYPE_CHECKING, Annotated, Any, Generic, TypeVar, get_args import numpy as np import pandas as pd @@ -112,7 +113,7 @@ def _valid_petab_id(v: str) -> str: class ParameterScale(str, Enum): """Parameter scales. - Parameter scales as used in the PEtab parameters table. + Parameter scales as used in the PEtab parameter table. """ LIN = C.LIN @@ -123,7 +124,7 @@ class ParameterScale(str, Enum): class NoiseDistribution(str, Enum): """Noise distribution types. - Noise distributions as used in the PEtab observables table. + Noise distributions as used in the PEtab observable table. """ #: Normal distribution @@ -141,7 +142,7 @@ class NoiseDistribution(str, Enum): class PriorDistribution(str, Enum): """Prior types. - Prior types as used in the PEtab parameters table. + Prior types as used in the PEtab parameter table. """ #: Cauchy distribution. @@ -196,6 +197,89 @@ class PriorDistribution(str, Enum): ) +T = TypeVar("T", bound=BaseModel) + + +class BaseTable(BaseModel, Generic[T]): + """Base class for PEtab tables.""" + + elements: list[T] + + def __init__(self, elements: list[T] = None) -> None: + """Initialize the BaseTable with a list of elements.""" + if elements is None: + elements = [] + super().__init__(elements=elements) + + def __getitem__(self, id_: str) -> T: + """Get an element by ID. + + :param id_: The ID of the element to retrieve. + :return: The element with the given ID. + :raises KeyError: If no element with the given ID exists. + :raises NotImplementedError: + If the element type does not have an ID attribute. + """ + if "id" not in self._element_class().model_fields: + raise NotImplementedError( + f"__getitem__ is not implemented for {self.__class__.__name__}" + ) + + for element in self.elements: + if element.id == id_: + return element + + raise KeyError(f"{T.__name__} ID {id_} not found") + + @classmethod + @abstractmethod + def from_df(cls, df: pd.DataFrame) -> BaseTable[T]: + """Create a table from a DataFrame.""" + pass + + @abstractmethod + def to_df(self) -> pd.DataFrame: + """Convert the table to a DataFrame.""" + pass + + @classmethod + def from_tsv(cls, file_path: str | Path) -> BaseTable[T]: + """Create table from a TSV file.""" + df = pd.read_csv(file_path, sep="\t") + return cls.from_df(df) + + def to_tsv(self, file_path: str | Path) -> None: + """Write the table to a TSV file.""" + df = self.to_df() + df.to_csv( + file_path, sep="\t", index=not isinstance(df.index, pd.RangeIndex) + ) + + @classmethod + def _element_class(cls) -> type[T]: + """Get the class of the elements in the table.""" + return get_args(cls.model_fields["elements"].annotation)[0] + + def __add__(self, other: T) -> BaseTable[T]: + """Add an item to the table.""" + if not isinstance(other, self._element_class()): + raise TypeError( + f"Can only add {self._element_class().__name__} " + f"to {self.__class__.__name__}" + ) + return self.__class__(elements=self.elements + [other]) + + def __iadd__(self, other: T) -> BaseTable[T]: + """Add an item to the table in place.""" + if not isinstance(other, self._element_class()): + raise TypeError( + f"Can only add {self._element_class().__name__} " + f"to {self.__class__.__name__}" + ) + self.elements.append(other) + return self + + class Observable(BaseModel): """Observable definition.""" @@ -273,24 +357,19 @@ def _sympify_id_list(cls, v): return [sympify_petab(_valid_petab_id(pid)) for pid in v if pid] -class ObservableTable(BaseModel): +class ObservableTable(BaseTable[Observable]): """PEtab observable table.""" - #: List of observables. - observables: list[Observable] - - def __getitem__(self, observable_id: str) -> Observable: - """Get an observable by ID.""" - for observable in self.observables: - if observable.id == observable_id: - return observable - raise KeyError(f"Observable ID {observable_id} not found") + @property + def observables(self) -> list[Observable]: + """List of observables.""" + return self.elements @classmethod def from_df(cls, df: pd.DataFrame) -> ObservableTable: """Create an ObservableTable from a DataFrame.""" if df is None: - return cls(observables=[]) + return cls() df = get_observable_df(df) observables = [ @@ -298,11 +377,11 @@ def from_df(cls, df: pd.DataFrame) -> ObservableTable: for _, row in df.reset_index().iterrows() ] - return cls(observables=observables) + return cls(observables) def to_df(self) -> pd.DataFrame: """Convert the ObservableTable to a DataFrame.""" - records = self.model_dump(by_alias=True)["observables"] + records = self.model_dump(by_alias=True)["elements"] for record in records: obs = record[C.OBSERVABLE_FORMULA] noise = record[C.NOISE_FORMULA] @@ -316,30 +395,6 @@ def to_df(self) -> pd.DataFrame: ) return pd.DataFrame(records).set_index([C.OBSERVABLE_ID]) - @classmethod - def from_tsv(cls, file_path: str | Path) -> ObservableTable: - """Create an ObservableTable from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) - - def to_tsv(self, file_path: str | Path) -> None: - """Write the ObservableTable to a TSV file.""" - df = self.to_df() - df.to_csv(file_path, sep="\t", index=True) - - def __add__(self, other: Observable) -> ObservableTable: - """Add an observable to the table.""" - if not isinstance(other, Observable): - raise TypeError("Can only add Observable to ObservableTable") - return ObservableTable(observables=self.observables + [other]) - - def __iadd__(self, other: Observable) -> ObservableTable: - """Add an observable to the table in place.""" - if not isinstance(other, Observable): - raise TypeError("Can only add Observable to ObservableTable") - self.observables.append(other) - return self - class Change(BaseModel): """A change to the model or model state. @@ -386,7 +441,7 @@ class Condition(BaseModel): A set of simultaneously occurring changes to the model or model state, corresponding to a perturbation of the underlying system. This corresponds - to all rows of the PEtab conditions table with the same condition ID. + to all rows of the PEtab condition table with the same condition ID. >>> Condition( ... id="condition1", @@ -427,31 +482,26 @@ def __iadd__(self, other: Change) -> Condition: return self -class ConditionTable(BaseModel): - """PEtab conditions table.""" +class ConditionTable(BaseTable[Condition]): + """PEtab condition table.""" - #: List of conditions. - conditions: list[Condition] = [] - - def __getitem__(self, condition_id: str) -> Condition: - """Get a condition by ID.""" - for condition in self.conditions: - if condition.id == condition_id: - return condition - raise KeyError(f"Condition ID {condition_id} not found") + @property + def conditions(self) -> list[Condition]: + """List of conditions.""" + return self.elements @classmethod def from_df(cls, df: pd.DataFrame) -> ConditionTable: """Create a ConditionTable from a DataFrame.""" if df is None or df.empty: - return cls(conditions=[]) + return cls() conditions = [] for condition_id, sub_df in df.groupby(C.CONDITION_ID): changes = [Change(**row) for row in sub_df.to_dict("records")] conditions.append(Condition(id=condition_id, changes=changes)) - return cls(conditions=conditions) + return cls(conditions) def to_df(self) -> pd.DataFrame: """Convert the ConditionTable to a DataFrame.""" @@ -472,30 +522,6 @@ def to_df(self) -> pd.DataFrame: else pd.DataFrame(columns=C.CONDITION_DF_REQUIRED_COLS) ) - @classmethod - def from_tsv(cls, file_path: str | Path) -> ConditionTable: - """Create a ConditionTable from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) - - def to_tsv(self, file_path: str | Path) -> None: - """Write the ConditionTable to a TSV file.""" - df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) - - def __add__(self, other: Condition) -> ConditionTable: - """Add a condition to the table.""" - if not isinstance(other, Condition): - raise TypeError("Can only add Condition to ConditionTable") - return ConditionTable(conditions=self.conditions + [other]) - - def __iadd__(self, other: Condition) -> ConditionTable: - """Add a condition to the table in place.""" - if not isinstance(other, Condition): - raise TypeError("Can only add Condition to ConditionTable") - self.conditions.append(other) - return self - @property def free_symbols(self) -> set[sp.Symbol]: """Get all free symbols in the condition table. @@ -518,7 +544,7 @@ class ExperimentPeriod(BaseModel): """A period of a timecourse or experiment defined by a start time and a list of condition IDs. - This corresponds to a row of the PEtab experiments table. + This corresponds to a row of the PEtab experiment table. """ #: The start time of the period in time units as defined in the model. @@ -559,7 +585,7 @@ class Experiment(BaseModel): """An experiment or a timecourse defined by an ID and a set of different periods. - Corresponds to a group of rows of the PEtab experiments table with the same + Corresponds to a group of rows of the PEtab experiment table with the same experiment ID. """ @@ -601,17 +627,19 @@ def sort_periods(self) -> None: self.periods.sort(key=lambda period: period.time) -class ExperimentTable(BaseModel): - """PEtab experiments table.""" +class ExperimentTable(BaseTable[Experiment]): + """PEtab experiment table.""" - #: List of experiments. - experiments: list[Experiment] + @property + def experiments(self) -> list[Experiment]: + """List of experiments.""" + return self.elements @classmethod def from_df(cls, df: pd.DataFrame) -> ExperimentTable: """Create an ExperimentTable from a DataFrame.""" if df is None: - return cls(experiments=[]) + return cls() experiments = [] for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): @@ -631,7 +659,7 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentTable: ) experiments.append(Experiment(id=experiment_id, periods=periods)) - return cls(experiments=experiments) + return cls(experiments) def to_df(self) -> pd.DataFrame: """Convert the ExperimentTable to a DataFrame.""" @@ -651,37 +679,6 @@ def to_df(self) -> pd.DataFrame: else pd.DataFrame(columns=C.EXPERIMENT_DF_REQUIRED_COLS) ) - @classmethod - def from_tsv(cls, file_path: str | Path) -> ExperimentTable: - """Create an ExperimentTable from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) - - def to_tsv(self, file_path: str | Path) -> None: - """Write the ExperimentTable to a TSV file.""" - df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) - - def __add__(self, other: Experiment) -> ExperimentTable: - """Add an experiment to the table.""" - if not isinstance(other, Experiment): - raise TypeError("Can only add Experiment to ExperimentTable") - return ExperimentTable(experiments=self.experiments + [other]) - - def __iadd__(self, other: Experiment) -> ExperimentTable: - """Add an experiment to the table in place.""" - if not isinstance(other, Experiment): - raise TypeError("Can only add Experiment to ExperimentTable") - self.experiments.append(other) - return self - - def __getitem__(self, item): - """Get an experiment by ID.""" - for experiment in self.experiments: - if experiment.id == item: - return experiment - raise KeyError(f"Experiment ID {item} not found") - class Measurement(BaseModel): """A measurement. @@ -761,11 +758,13 @@ def _sympify_list(cls, v): return [sympify_petab(x) for x in v] -class MeasurementTable(BaseModel): +class MeasurementTable(BaseTable[Measurement]): """PEtab measurement table.""" - #: List of measurements. - measurements: list[Measurement] + @property + def measurements(self) -> list[Measurement]: + """List of measurements.""" + return self.elements @classmethod def from_df( @@ -774,7 +773,7 @@ def from_df( ) -> MeasurementTable: """Create a MeasurementTable from a DataFrame.""" if df is None: - return cls(measurements=[]) + return cls() measurements = [ Measurement( @@ -783,11 +782,11 @@ def from_df( for _, row in df.reset_index().iterrows() ] - return cls(measurements=measurements) + return cls(measurements) def to_df(self) -> pd.DataFrame: """Convert the MeasurementTable to a DataFrame.""" - records = self.model_dump(by_alias=True)["measurements"] + records = self.model_dump(by_alias=True)["elements"] for record in records: record[C.OBSERVABLE_PARAMETERS] = C.PARAMETER_SEPARATOR.join( map(str, record[C.OBSERVABLE_PARAMETERS]) @@ -798,30 +797,6 @@ def to_df(self) -> pd.DataFrame: return pd.DataFrame(records) - @classmethod - def from_tsv(cls, file_path: str | Path) -> MeasurementTable: - """Create a MeasurementTable from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) - - def to_tsv(self, file_path: str | Path) -> None: - """Write the MeasurementTable to a TSV file.""" - df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) - - def __add__(self, other: Measurement) -> MeasurementTable: - """Add a measurement to the table.""" - if not isinstance(other, Measurement): - raise TypeError("Can only add Measurement to MeasurementTable") - return MeasurementTable(measurements=self.measurements + [other]) - - def __iadd__(self, other: Measurement) -> MeasurementTable: - """Add a measurement to the table in place.""" - if not isinstance(other, Measurement): - raise TypeError("Can only add Measurement to MeasurementTable") - self.measurements.append(other) - return self - class Mapping(BaseModel): """Mapping PEtab entities to model entities.""" @@ -845,57 +820,35 @@ class Mapping(BaseModel): ) -class MappingTable(BaseModel): +class MappingTable(BaseTable[Mapping]): """PEtab mapping table.""" - #: List of mappings. - mappings: list[Mapping] + @property + def mappings(self) -> list[Mapping]: + """List of mappings.""" + return self.elements @classmethod def from_df(cls, df: pd.DataFrame) -> MappingTable: """Create a MappingTable from a DataFrame.""" if df is None: - return cls(mappings=[]) + return cls() mappings = [ Mapping(**row.to_dict()) for _, row in df.reset_index().iterrows() ] - return cls(mappings=mappings) + return cls(mappings) def to_df(self) -> pd.DataFrame: """Convert the MappingTable to a DataFrame.""" res = ( - pd.DataFrame(self.model_dump(by_alias=True)["mappings"]) + pd.DataFrame(self.model_dump(by_alias=True)["elements"]) if self.mappings else pd.DataFrame(columns=C.MAPPING_DF_REQUIRED_COLS) ) return res.set_index([C.PETAB_ENTITY_ID]) - @classmethod - def from_tsv(cls, file_path: str | Path) -> MappingTable: - """Create a MappingTable from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) - - def to_tsv(self, file_path: str | Path) -> None: - """Write the MappingTable to a TSV file.""" - df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) - - def __add__(self, other: Mapping) -> MappingTable: - """Add a mapping to the table.""" - if not isinstance(other, Mapping): - raise TypeError("Can only add Mapping to MappingTable") - return MappingTable(mappings=self.mappings + [other]) - - def __iadd__(self, other: Mapping) -> MappingTable: - """Add a mapping to the table in place.""" - if not isinstance(other, Mapping): - raise TypeError("Can only add Mapping to MappingTable") - self.mappings.append(other) - return self - def __getitem__(self, petab_id: str) -> Mapping: """Get a mapping by PEtab ID.""" for mapping in self.mappings: @@ -1075,71 +1028,39 @@ def prior_dist(self) -> Distribution: return cls(*self.prior_parameters, log=log, trunc=[self.lb, self.ub]) -class ParameterTable(BaseModel): +class ParameterTable(BaseTable[Parameter]): """PEtab parameter table.""" - #: List of parameters. - parameters: list[Parameter] + @property + def parameters(self) -> list[Parameter]: + """List of parameters.""" + return self.elements @classmethod def from_df(cls, df: pd.DataFrame) -> ParameterTable: """Create a ParameterTable from a DataFrame.""" if df is None: - return cls(parameters=[]) + return cls() parameters = [ Parameter(**row.to_dict()) for _, row in df.reset_index().iterrows() ] - return cls(parameters=parameters) + return cls(parameters) def to_df(self) -> pd.DataFrame: """Convert the ParameterTable to a DataFrame.""" return pd.DataFrame( - self.model_dump(by_alias=True)["parameters"] + self.model_dump(by_alias=True)["elements"] ).set_index([C.PARAMETER_ID]) - @classmethod - def from_tsv(cls, file_path: str | Path) -> ParameterTable: - """Create a ParameterTable from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) - - def to_tsv(self, file_path: str | Path) -> None: - """Write the ParameterTable to a TSV file.""" - df = self.to_df() - df.to_csv(file_path, sep="\t", index=False) - - def __add__(self, other: Parameter) -> ParameterTable: - """Add a parameter to the table.""" - if not isinstance(other, Parameter): - raise TypeError("Can only add Parameter to ParameterTable") - return ParameterTable(parameters=self.parameters + [other]) - - def __iadd__(self, other: Parameter) -> ParameterTable: - """Add a parameter to the table in place.""" - if not isinstance(other, Parameter): - raise TypeError("Can only add Parameter to ParameterTable") - self.parameters.append(other) - return self - - def __getitem__(self, item) -> Parameter: - """Get a parameter by ID.""" - for parameter in self.parameters: - if parameter.id == item: - return parameter - raise KeyError(f"Parameter ID {item} not found") - @property def n_estimated(self) -> int: """Number of estimated parameters.""" return sum(p.estimate for p in self.parameters) -"""PEtab v2 problems.""" - - class Problem: """ PEtab parameter estimation problem @@ -1176,22 +1097,12 @@ def __init__( default_validation_tasks.copy() ) - self.observable_tables = observable_tables or [ - ObservableTable(observables=[]) - ] - self.condition_tables = condition_tables or [ - ConditionTable(conditions=[]) - ] - self.experiment_tables = experiment_tables or [ - ExperimentTable(experiments=[]) - ] - self.measurement_tables = measurement_tables or [ - MeasurementTable(measurements=[]) - ] - self.mapping_tables = mapping_tables or [MappingTable(mappings=[])] - self.parameter_tables = parameter_tables or [ - ParameterTable(parameters=[]) - ] + self.observable_tables = observable_tables or [ObservableTable()] + self.condition_tables = condition_tables or [ConditionTable()] + self.experiment_tables = experiment_tables or [ExperimentTable()] + self.measurement_tables = measurement_tables or [MeasurementTable()] + self.mapping_tables = mapping_tables or [MappingTable()] + self.parameter_tables = parameter_tables or [ParameterTable()] def __str__(self): model = f"with model ({self.model})" if self.model else "without model" @@ -1235,7 +1146,7 @@ def __getitem__(self, key): for table in table_list: try: return table[key] - except KeyError: + except (KeyError, NotImplementedError): pass raise KeyError( @@ -1483,10 +1394,9 @@ def get_problem(problem: str | Path | Problem) -> Problem: @property def condition_df(self) -> pd.DataFrame | None: """Combined condition tables as DataFrame.""" - conditions = self.conditions return ( - ConditionTable(conditions=conditions).to_df() - if conditions + ConditionTable(conditions).to_df() + if (conditions := self.conditions) else None ) @@ -1498,7 +1408,7 @@ def condition_df(self, value: pd.DataFrame): def experiment_df(self) -> pd.DataFrame | None: """Experiment table as DataFrame.""" return ( - ExperimentTable(experiments=experiments).to_df() + ExperimentTable(experiments).to_df() if (experiments := self.experiments) else None ) @@ -1510,10 +1420,9 @@ def experiment_df(self, value: pd.DataFrame): @property def measurement_df(self) -> pd.DataFrame | None: """Combined measurement tables as DataFrame.""" - measurements = self.measurements return ( - MeasurementTable(measurements=measurements).to_df() - if measurements + MeasurementTable(measurements).to_df() + if (measurements := self.measurements) else None ) @@ -1524,10 +1433,9 @@ def measurement_df(self, value: pd.DataFrame): @property def parameter_df(self) -> pd.DataFrame | None: """Combined parameter tables as DataFrame.""" - parameters = self.parameters return ( - ParameterTable(parameters=parameters).to_df() - if parameters + ParameterTable(parameters).to_df() + if (parameters := self.parameters) else None ) @@ -1538,10 +1446,9 @@ def parameter_df(self, value: pd.DataFrame): @property def observable_df(self) -> pd.DataFrame | None: """Combined observable tables as DataFrame.""" - observables = self.observables return ( - ObservableTable(observables=observables).to_df() - if observables + ObservableTable(observables).to_df() + if (observables := self.observables) else None ) @@ -1552,8 +1459,11 @@ def observable_df(self, value: pd.DataFrame): @property def mapping_df(self) -> pd.DataFrame | None: """Combined mapping tables as DataFrame.""" - mappings = self.mappings - return MappingTable(mappings=mappings).to_df() if mappings else None + return ( + MappingTable(mappings).to_df() + if (mappings := self.mappings) + else None + ) @mapping_df.setter def mapping_df(self, value: pd.DataFrame): @@ -1888,7 +1798,7 @@ def add_condition( for target_id, target_value in kwargs.items() ] if not self.condition_tables: - self.condition_tables.append(ConditionTable(conditions=[])) + self.condition_tables.append(ConditionTable()) self.condition_tables[-1].conditions.append( Condition(id=id_, changes=changes) ) @@ -1939,7 +1849,7 @@ def add_observable( record.update(kwargs) if not self.observable_tables: - self.observable_tables.append(ObservableTable(observables=[])) + self.observable_tables.append(ObservableTable()) self.observable_tables[-1] += Observable(**record) @@ -1991,7 +1901,7 @@ def add_parameter( record.update(kwargs) if not self.parameter_tables: - self.parameter_tables.append(ParameterTable(parameters=[])) + self.parameter_tables.append(ParameterTable()) self.parameter_tables[-1] += Parameter(**record) @@ -2027,7 +1937,7 @@ def add_measurement( noise_parameters = [noise_parameters] if not self.measurement_tables: - self.measurement_tables.append(MeasurementTable(measurements=[])) + self.measurement_tables.append(MeasurementTable()) self.measurement_tables[-1].measurements.append( Measurement( @@ -2054,7 +1964,7 @@ def add_mapping( name: A name (any string) for the entity referenced by `petab_id`. """ if not self.mapping_tables: - self.mapping_tables.append(MappingTable(mappings=[])) + self.mapping_tables.append(MappingTable()) self.mapping_tables[-1].mappings.append( Mapping(petab_id=petab_id, model_id=model_id, name=name) ) @@ -2085,7 +1995,7 @@ def add_experiment(self, id_: str, *args): ] if not self.experiment_tables: - self.experiment_tables.append(ExperimentTable(experiments=[])) + self.experiment_tables.append(ExperimentTable()) self.experiment_tables[-1].experiments.append( Experiment(id=id_, periods=periods) ) @@ -2102,25 +2012,23 @@ def __iadd__(self, other): if isinstance(other, Observable): if not self.observable_tables: - self.observable_tables.append(ObservableTable(observables=[])) + self.observable_tables.append(ObservableTable()) self.observable_tables[-1] += other elif isinstance(other, Parameter): if not self.parameter_tables: - self.parameter_tables.append(ParameterTable(parameters=[])) + self.parameter_tables.append(ParameterTable()) self.parameter_tables[-1] += other elif isinstance(other, Measurement): if not self.measurement_tables: - self.measurement_tables.append( - MeasurementTable(measurements=[]) - ) + self.measurement_tables.append(MeasurementTable()) self.measurement_tables[-1] += other elif isinstance(other, Condition): if not self.condition_tables: - self.condition_tables.append(ConditionTable(conditions=[])) + self.condition_tables.append(ConditionTable()) self.condition_tables[-1] += other elif isinstance(other, Experiment): if not self.experiment_tables: - self.experiment_tables.append(ExperimentTable(experiments=[])) + self.experiment_tables.append(ExperimentTable()) self.experiment_tables[-1] += other else: raise ValueError( @@ -2155,7 +2063,7 @@ def model_dump(self, **kwargs) -> dict[str, Any]: 'measurement_files': [], 'model_files': {}, 'observable_files': [], - 'parameter_file': []}, + 'parameter_files': []}, 'experiments': [], 'mappings': [], 'measurements': [], @@ -2182,7 +2090,12 @@ def model_dump(self, **kwargs) -> dict[str, Any]: ("mappings", self.mapping_tables), ): res[field] = ( - [table.model_dump(**kwargs) for table in table_list] + list( + chain.from_iterable( + table.model_dump(**kwargs)["elements"] + for table in table_list + ) + ) if table_list else [] ) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 643f9172..da75dccd 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -273,7 +273,7 @@ def test_condition_table(): assert ( ConditionTable( - conditions=[ + [ Condition( id="condition1", changes=[Change(target_id="k1", target_value="true")], @@ -284,7 +284,7 @@ def test_condition_table(): ) assert ConditionTable( - conditions=[ + [ Condition( id="condition1", changes=[Change(target_id="k1", target_value=x / y)], From f6361676ed7524ae3cdc555a4f43efc50a1a2960 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 29 Jul 2025 07:29:52 +0200 Subject: [PATCH 078/141] Fix PetabStrPrinter for powers (#421) Power expressions with non-atomic bases or exponents were not printed correctly. Fixed here. --- petab/v1/math/printer.py | 8 +++++++- tests/v1/math/test_math.py | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/petab/v1/math/printer.py b/petab/v1/math/printer.py index 347c1328..a421989c 100644 --- a/petab/v1/math/printer.py +++ b/petab/v1/math/printer.py @@ -37,7 +37,13 @@ def _print_BooleanFalse(self, expr): def _print_Pow(self, expr: sp.Pow): """Custom printing for the power operator""" base, exp = expr.as_base_exp() - return f"{self._print(base)} ^ {self._print(exp)}" + str_base = self._print(base) + str_exp = self._print(exp) + if not base.is_Atom: + str_base = f"({str_base})" + if not exp.is_Atom: + str_exp = f"({str_exp})" + return f"{str_base} ^ {str_exp}" def _print_Infinity(self, expr): """Custom printing for infinity""" diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 940c5340..60bb04b5 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -38,9 +38,11 @@ def test_assumptions(): def test_printer(): + a, b, c, d = sp.symbols("a b c d", real=True) assert petab_math_str(None) == "" assert petab_math_str(BooleanTrue()) == "true" assert petab_math_str(BooleanFalse()) == "false" + assert petab_math_str((a + b) ** (c + d)) == "(a + b) ^ (c + d)" def read_cases(): From 3156df2f893239e78ff450466b998f849f8653b3 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 29 Jul 2025 07:30:41 +0200 Subject: [PATCH 079/141] v2: Basic support for multiple models (#418) Related to #392. * Let v2.Problem have a list of models * Support constructing v2.Problem from files with multiple models * Move some validators to Annotated * Add some TODOs. --- petab/v2/C.py | 4 ++ petab/v2/converters.py | 5 ++ petab/v2/core.py | 140 ++++++++++++++++++++++++++--------------- petab/v2/lint.py | 4 ++ tests/v2/test_core.py | 14 ++++- 5 files changed, 115 insertions(+), 52 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index 9630ceff..5bb73980 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -38,6 +38,9 @@ #: Replicate ID column in the measurement table REPLICATE_ID = "replicateId" +#: The model ID column in the measurement table +MODEL_ID = "modelId" + #: Mandatory columns of measurement table MEASUREMENT_DF_REQUIRED_COLS = [ OBSERVABLE_ID, @@ -52,6 +55,7 @@ NOISE_PARAMETERS, DATASET_ID, REPLICATE_ID, + MODEL_ID, ] #: Measurement table columns diff --git a/petab/v2/converters.py b/petab/v2/converters.py index ae4f5888..cdb8434c 100644 --- a/petab/v2/converters.py +++ b/petab/v2/converters.py @@ -71,6 +71,11 @@ def __init__(self, problem: Problem, default_priority: float = None): To ensure that the PEtab condition-start-events are executed before any other events, all events should have a priority set. """ + if len(problem.models) > 1: + # https://github.com/PEtab-dev/libpetab-python/issues/392 + raise NotImplementedError( + "Only single-model PEtab problems are supported." + ) if not isinstance(problem.model, SbmlModel): raise ValueError("Only SBML models are supported.") diff --git a/petab/v2/core.py b/petab/v2/core.py index ff1efb28..349778aa 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -110,6 +110,15 @@ def _valid_petab_id(v: str) -> str: return v +def _valid_petab_id_or_none(v: str) -> str: + """Field validator for optional PEtab IDs.""" + if not v: + return None + if not is_valid_identifier(v): + raise ValueError(f"Invalid ID: {v}") + return v + + class ParameterScale(str, Enum): """Parameter scales. @@ -687,10 +696,18 @@ class Measurement(BaseModel): experiment. """ + #: The model ID. + model_id: Annotated[ + str | None, BeforeValidator(_valid_petab_id_or_none) + ] = Field(alias=C.MODEL_ID, default=None) #: The observable ID. - observable_id: str = Field(alias=C.OBSERVABLE_ID) + observable_id: Annotated[str, BeforeValidator(_valid_petab_id)] = Field( + alias=C.OBSERVABLE_ID + ) #: The experiment ID. - experiment_id: str | None = Field(alias=C.EXPERIMENT_ID, default=None) + experiment_id: Annotated[ + str | None, BeforeValidator(_valid_petab_id_or_none) + ] = Field(alias=C.EXPERIMENT_ID, default=None) #: The time point of the measurement in time units as defined in the model. time: Annotated[float, AfterValidator(_is_finite_or_pos_inf)] = Field( alias=C.TIME @@ -728,17 +745,6 @@ def convert_nan_to_none(cls, v, info: ValidationInfo): return cls.model_fields[info.field_name].default return v - @field_validator("observable_id", "experiment_id") - @classmethod - def _validate_id(cls, v, info: ValidationInfo): - if not v: - if info.field_name == "experiment_id": - return None - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - @field_validator( "observable_parameters", "noise_parameters", mode="before" ) @@ -775,6 +781,9 @@ def from_df( if df is None: return cls() + if C.MODEL_ID in df.columns: + df[C.MODEL_ID] = df[C.MODEL_ID].apply(_convert_nan_to_none) + measurements = [ Measurement( **row.to_dict(), @@ -868,7 +877,9 @@ class Parameter(BaseModel): """Parameter definition.""" #: Parameter ID. - id: str = Field(alias=C.PARAMETER_ID) + id: Annotated[str, BeforeValidator(_valid_petab_id)] = Field( + alias=C.PARAMETER_ID + ) #: Lower bound. lb: Annotated[float | None, BeforeValidator(_convert_nan_to_none)] = Field( alias=C.LOWER_BOUND, default=None @@ -901,15 +912,6 @@ class Parameter(BaseModel): validate_assignment=True, ) - @field_validator("id") - @classmethod - def _validate_id(cls, v): - if not v: - raise ValueError("ID must not be empty.") - if not is_valid_identifier(v): - raise ValueError(f"Invalid ID: {v}") - return v - @field_validator("prior_parameters", mode="before") @classmethod def _validate_prior_parameters( @@ -1067,20 +1069,20 @@ class Problem: A PEtab parameter estimation problem as defined by - - model - - condition table - - experiment table - - measurement table - - parameter table - - observable table - - mapping table + - models + - condition tables + - experiment tables + - measurement tables + - parameter tables + - observable tables + - mapping tables See also :doc:`petab:v2/documentation_data_format`. """ def __init__( self, - model: Model = None, + models: list[Model] = None, condition_tables: list[ConditionTable] = None, experiment_tables: list[ExperimentTable] = None, observable_tables: list[ObservableTable] = None, @@ -1092,7 +1094,7 @@ def __init__( from ..v2.lint import default_validation_tasks self.config = config - self.model: Model | None = model + self.models: list[Model] = models or [] self.validation_tasks: list[ValidationTask] = ( default_validation_tasks.copy() ) @@ -1210,13 +1212,6 @@ def get_path(filename): f"{yaml_config[C.FORMAT_VERSION]}." ) - if len(yaml_config[C.MODEL_FILES]) > 1: - raise ValueError( - "petab.v2.Problem.from_yaml() can only be used for " - "yaml files comprising a single model. " - "Consider using " - "petab.v2.CompositeProblem.from_yaml() instead." - ) config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file ) @@ -1225,19 +1220,14 @@ def get_path(filename): for f in config.parameter_files ] - if len(config.model_files or []) > 1: - # TODO https://github.com/PEtab-dev/libpetab-python/issues/6 - raise NotImplementedError( - "Support for multiple models is not yet implemented." - ) - model = None - if config.model_files: - model_id, model_info = next(iter(config.model_files.items())) - model = model_factory( + models = [ + model_factory( get_path(model_info.location), model_info.language, model_id=model_id, ) + for model_id, model_info in (config.model_files or {}).items() + ] measurement_tables = ( [ @@ -1283,7 +1273,7 @@ def get_path(filename): return Problem( config=config, - model=model, + models=models, condition_tables=condition_tables, experiment_tables=experiment_tables, observable_tables=observable_tables, @@ -1316,6 +1306,7 @@ def from_dfs( model: The underlying model config: The PEtab problem configuration """ + # TODO: do we really need this? observable_table = ObservableTable.from_df(observable_df) condition_table = ConditionTable.from_df(condition_df) @@ -1325,7 +1316,7 @@ def from_dfs( parameter_table = ParameterTable.from_df(parameter_df) return Problem( - model=model, + models=[model], condition_tables=[condition_table], experiment_tables=[experiment_table], observable_tables=[observable_table], @@ -1391,6 +1382,39 @@ def get_problem(problem: str | Path | Problem) -> Problem: "or a PEtab problem object." ) + @property + def model(self) -> Model | None: + """The model of the problem. + + This is a convenience property for `Problem`s with only one single + model. + + :return: + The model of the problem, or None if no model is defined. + :raises: + ValueError: If the problem has more than one model defined. + """ + if len(self.models) == 1: + return self.models[0] + + if len(self.models) == 0: + return None + + raise ValueError( + "Problem contains more than one model. " + "Use `Problem.models` to access all models." + ) + + @model.setter + def model(self, value: Model): + """Set the model of the problem. + + This is a convenience setter for `Problem`s with only one single + model. This will replace any existing models in the problem with the + provided model. + """ + self.models = [value] + @property def condition_df(self) -> pd.DataFrame | None: """Combined condition tables as DataFrame.""" @@ -1745,6 +1769,7 @@ def validate( ) validation_results = ValidationResultList() + if self.config and self.config.extensions: extensions = ",".join(self.config.extensions.keys()) validation_results.append( @@ -1756,6 +1781,19 @@ def validate( ) ) + if len(self.models) > 1: + # TODO https://github.com/PEtab-dev/libpetab-python/issues/392 + # We might just want to split the problem into multiple + # problems, one for each model, and then validate each + # problem separately. + validation_results.append( + ValidationIssue( + ValidationIssueSeverity.WARNING, + "Problem contains multiple models. " + "Validation is not yet fully supported.", + ) + ) + for task in validation_tasks or self.validation_tasks: try: cur_result = task.run(self) @@ -2043,7 +2081,7 @@ def model_dump(self, **kwargs) -> dict[str, Any]: used for serialization. The output of this function may change without notice. - The output includes all PEtab tables, but not the model itself. + The output includes all PEtab tables, but not the models. See `pydantic.BaseModel.model_dump `__ for details. diff --git a/petab/v2/lint.py b/petab/v2/lint.py index f323ef06..f7a8daec 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -769,6 +769,10 @@ def run(self, problem: Problem) -> ValidationIssue | None: return None +# TODO: check that Measurements model IDs match the available ones +# https://github.com/PEtab-dev/libpetab-python/issues/392 + + def get_valid_parameters_for_parameter_table( problem: Problem, ) -> set[str]: diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index da75dccd..0363d162 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -28,6 +28,7 @@ UPPER_BOUND, ) from petab.v2.core import * +from petab.v2.models.sbml_model import SbmlModel from petab.v2.petab1to2 import petab1to2 example_dir_fujita = Path(__file__).parents[2] / "doc/example/example_Fujita" @@ -335,10 +336,16 @@ def test_problem_from_yaml_multiple_files(): yaml_config = """ format_version: 2.0.0 parameter_files: [] + model_files: + model1: + location: model1.xml + language: sbml + model2: + location: model2.xml + language: sbml condition_files: [conditions1.tsv, conditions2.tsv] measurement_files: [measurements1.tsv, measurements2.tsv] observable_files: [observables1.tsv, observables2.tsv] - model_files: {} experiment_files: [experiments1.tsv, experiments2.tsv] """ with tempfile.TemporaryDirectory() as tmpdir: @@ -347,6 +354,10 @@ def test_problem_from_yaml_multiple_files(): f.write(yaml_config) for i in (1, 2): + SbmlModel.from_antimony("a = 1;").to_file( + Path(tmpdir, f"model{i}.xml") + ) + problem = Problem() problem.add_condition(f"condition{i}", parameter1=i) petab.write_condition_df( @@ -375,6 +386,7 @@ def test_problem_from_yaml_multiple_files(): petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) for petab_problem in (petab_problem1, petab_problem2): + assert len(petab_problem.models) == 2 assert petab_problem.measurement_df.shape[0] == 2 assert petab_problem.observable_df.shape[0] == 2 assert petab_problem.condition_df.shape[0] == 2 From aeced6d5e56e31159345169f205d0fe132cb4ada Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 29 Jul 2025 07:38:31 +0200 Subject: [PATCH 080/141] Add `Experiment.sorted_periods` (#420) Add `Experiment.sorted_periods` and extend tests. --- petab/v2/core.py | 5 +++++ tests/v2/test_core.py | 26 ++++++++++++++++++++++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 349778aa..6b09c679 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -631,6 +631,11 @@ def has_preequilibration(self) -> bool: """Check if the experiment has preequilibration enabled.""" return any(period.is_preequilibration for period in self.periods) + @property + def sorted_periods(self) -> list[ExperimentPeriod]: + """Get the periods of the experiment sorted by time.""" + return sorted(self.periods, key=lambda period: period.time) + def sort_periods(self) -> None: """Sort the periods of the experiment by time.""" self.periods.sort(key=lambda period: period.time) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 0363d162..c646d52a 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -10,7 +10,7 @@ from sympy.abc import x, y import petab.v2 as petab -from petab.v2 import Problem +from petab.v2 import C, Problem from petab.v2.C import ( CONDITION_ID, ESTIMATE, @@ -256,18 +256,36 @@ def test_parameter(): def test_experiment(): Experiment(id="experiment1") - Experiment( - id="experiment1", periods=[ExperimentPeriod(time=1, condition_id="c1")] - ) + # extra fields allowed assert Experiment(id="experiment1", non_petab=1).non_petab == 1 + # ID required with pytest.raises(ValidationError, match="Field required"): Experiment() + # valid ID required with pytest.raises(ValidationError, match="Invalid ID"): Experiment(id="experiment 1") + periods = [ + ExperimentPeriod(time=C.TIME_PREEQUILIBRATION, condition_ids=["c1"]), + ExperimentPeriod(time=-1, condition_id="c1"), + ExperimentPeriod(time=1, condition_id="c1"), + ] + e = Experiment(id="experiment1", periods=list(reversed(periods))) + + assert e.has_preequilibration is True + + assert e.sorted_periods == periods + assert e.periods != periods + + e.sort_periods() + assert e.periods == periods + + e.periods.pop(0) + assert e.has_preequilibration is False + def test_condition_table(): assert ConditionTable().free_symbols == set() From 1fcb377a6c13f71c004454988c8610040a362a4c Mon Sep 17 00:00:00 2001 From: Polina Lakrisenko Date: Fri, 8 Aug 2025 17:11:48 +0200 Subject: [PATCH 081/141] Goodness of fit plot, fix axes labels (#422) * gof plot, change to singular in axes labels * Update petab/v1/visualize/plot_residuals.py Co-authored-by: Maren Philipps <55318391+m-philipps@users.noreply.github.com> --------- Co-authored-by: Maren Philipps <55318391+m-philipps@users.noreply.github.com> --- petab/v1/visualize/plot_residuals.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 14a62f8a..230c8605 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -212,6 +212,6 @@ def plot_goodness_of_fit( ) ax.set_title("Goodness of fit") - ax.set_xlabel("simulated values") - ax.set_ylabel("measurements") + ax.set_xlabel("Simulated value") + ax.set_ylabel("Measurement") return ax From 101710fe505ac655ab92e172722019b04bd3f8d2 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 13 Aug 2025 15:36:11 +0200 Subject: [PATCH 082/141] build(deps): bump actions/checkout from 4 to 5 (#423) Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- .github/workflows/deploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index a719b3ef..0892ceec 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Prepare python ${{ matrix.python-version }} uses: actions/setup-python@v5 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 46d280d4..cbe9d647 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -15,7 +15,7 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v4 + uses: actions/checkout@v5 - name: Set up python uses: actions/setup-python@v5 with: From 876c781c046e78a8909ae2301989ee12a0273232 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 19 Aug 2025 10:36:04 +0200 Subject: [PATCH 083/141] v2: Store path info in *Table objects (#416) Store path info in `*Table` and `Model` objects to make it easier to read, modify, write complete PEtab problem. Add `Problem.to_files()`. Closes #412. --------- Co-authored-by: Maren Philipps <55318391+m-philipps@users.noreply.github.com> --- petab/_utils.py | 35 +++++++ petab/v1/models/model.py | 28 ++++-- petab/v1/models/pysb_model.py | 31 +++++- petab/v1/models/sbml_model.py | 19 +++- petab/v2/core.py | 174 +++++++++++++++++++++++++--------- tests/v2/test_core.py | 72 ++++++++++++-- 6 files changed, 288 insertions(+), 71 deletions(-) create mode 100644 petab/_utils.py diff --git a/petab/_utils.py b/petab/_utils.py new file mode 100644 index 00000000..808cebe7 --- /dev/null +++ b/petab/_utils.py @@ -0,0 +1,35 @@ +"""Private, version-independent utility functions for PEtab.""" + +from pathlib import Path + +from pydantic import AnyUrl, TypeAdapter + +PathOrUrlAdapter = TypeAdapter(AnyUrl | Path) + + +def _generate_path( + file_path: str | Path | AnyUrl, + base_path: Path | str | AnyUrl | None = None, +) -> str: + """ + Generate a local path or URL from a file path and an optional base path. + + :return: A string representing the relative or absolute path or URL. + Absolute if `file_path` or `base_path` is an absolute path or URL, + relative otherwise. + """ + if base_path is None: + return str(file_path) + + file_path = PathOrUrlAdapter.validate_python(file_path) + if isinstance(file_path, AnyUrl): + # if URL, this is absolute + return str(file_path) + + base_path = PathOrUrlAdapter.validate_python(base_path) + if isinstance(base_path, Path): + # if file_path is absolute, base_path will be ignored + return str(base_path / file_path) + + # combine URL parts + return f"{base_path}/{file_path}" diff --git a/petab/v1/models/model.py b/petab/v1/models/model.py index e25ca0b2..96613757 100644 --- a/petab/v1/models/model.py +++ b/petab/v1/models/model.py @@ -21,17 +21,22 @@ def __repr__(self): @staticmethod @abc.abstractmethod - def from_file(filepath_or_buffer: Any, model_id: str) -> Model: + def from_file( + filepath_or_buffer: Any, model_id: str, base_path: str | Path = None + ) -> Model: """Load the model from the given path/URL - :param filepath_or_buffer: URL or path of the model + :param filepath_or_buffer: + Absolute or relative path/URL to the model file. + If relative, it is interpreted relative to `base_path`, if given. + :param base_path: Base path for relative paths in the model file. :param model_id: Model ID :returns: A ``Model`` instance holding the given model """ ... @abc.abstractmethod - def to_file(self, filename: [str, Path]): + def to_file(self, filename: str | Path | None = None): """Save the model to the given file :param filename: Destination filename @@ -131,11 +136,16 @@ def is_state_variable(self, id_: str) -> bool: def model_factory( - filepath_or_buffer: Any, model_language: str, model_id: str = None + filepath_or_buffer: Any, + model_language: str, + model_id: str = None, + base_path: str | Path = None, ) -> Model: """Create a PEtab model instance from the given model - :param filepath_or_buffer: Path/URL of the model + :param filepath_or_buffer: Path/URL of the model. + Absolute or relative to `base_path` if given. + :param base_path: Base path for relative paths in the model file. :param model_language: PEtab model language ID for the given model :param model_id: PEtab model ID for the given model :returns: A :py:class:`Model` instance representing the given model @@ -145,12 +155,16 @@ def model_factory( if model_language == MODEL_TYPE_SBML: from .sbml_model import SbmlModel - return SbmlModel.from_file(filepath_or_buffer, model_id=model_id) + return SbmlModel.from_file( + filepath_or_buffer, model_id=model_id, base_path=base_path + ) if model_language == MODEL_TYPE_PYSB: from .pysb_model import PySBModel - return PySBModel.from_file(filepath_or_buffer, model_id=model_id) + return PySBModel.from_file( + filepath_or_buffer, model_id=model_id, base_path=base_path + ) if model_language in known_model_types: raise NotImplementedError( diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py index 0b69d797..1a615e0f 100644 --- a/petab/v1/models/pysb_model.py +++ b/petab/v1/models/pysb_model.py @@ -1,5 +1,7 @@ """Functions for handling PySB models""" +from __future__ import annotations + import itertools import re import sys @@ -9,6 +11,7 @@ import pysb +from ..._utils import _generate_path from .. import is_valid_identifier from . import MODEL_TYPE_PYSB from .model import Model @@ -54,9 +57,18 @@ class PySBModel(Model): type_id = MODEL_TYPE_PYSB - def __init__(self, model: pysb.Model, model_id: str = None): + def __init__( + self, + model: pysb.Model, + model_id: str = None, + rel_path: Path | str | None = None, + base_path: str | Path | None = None, + ): super().__init__() + self.rel_path = rel_path + self.base_path = base_path + self.model = model self._model_id = model_id or self.model.name @@ -68,16 +80,25 @@ def __init__(self, model: pysb.Model, model_id: str = None): ) @staticmethod - def from_file(filepath_or_buffer, model_id: str = None): + def from_file( + filepath_or_buffer, model_id: str = None, base_path: str | Path = None + ) -> PySBModel: return PySBModel( - model=_pysb_model_from_path(filepath_or_buffer), model_id=model_id + model=_pysb_model_from_path( + _generate_path(filepath_or_buffer, base_path) + ), + model_id=model_id, + rel_path=filepath_or_buffer, + base_path=base_path, ) - def to_file(self, filename: [str, Path]): + def to_file(self, filename: str | Path | None = None) -> None: from pysb.export import export model_source = export(self.model, "pysb_flat") - with open(filename, "w") as f: + with open( + filename or _generate_path(self.rel_path, self.base_path), "w" + ) as f: f.write(model_source) @property diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index c6957ca6..e6fffcca 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -10,6 +10,7 @@ import sympy as sp from sympy.abc import _clash +from ..._utils import _generate_path from ..sbml import ( get_sbml_model, is_sbml_consistent, @@ -33,6 +34,8 @@ def __init__( sbml_reader: libsbml.SBMLReader = None, sbml_document: libsbml.SBMLDocument = None, model_id: str = None, + rel_path: Path | str | None = None, + base_path: str | Path | None = None, ): """Constructor. @@ -42,6 +45,9 @@ def __init__( :param model_id: Model ID. Defaults to the SBML model ID.""" super().__init__() + self.rel_path = rel_path + self.base_path = base_path + if sbml_model is None and sbml_document is None: raise ValueError( "Either sbml_model or sbml_document must be given." @@ -87,15 +93,19 @@ def __setstate__(self, state): self.__dict__.update(state) @staticmethod - def from_file(filepath_or_buffer, model_id: str = None) -> SbmlModel: + def from_file( + filepath_or_buffer, model_id: str = None, base_path: str | Path = None + ) -> SbmlModel: sbml_reader, sbml_document, sbml_model = get_sbml_model( - filepath_or_buffer + _generate_path(filepath_or_buffer, base_path=base_path) ) return SbmlModel( sbml_model=sbml_model, sbml_reader=sbml_reader, sbml_document=sbml_document, model_id=model_id, + rel_path=filepath_or_buffer, + base_path=base_path, ) @staticmethod @@ -159,9 +169,10 @@ def model_id(self): def model_id(self, model_id): self._model_id = model_id - def to_file(self, filename: [str, Path]): + def to_file(self, filename: str | Path | None = None) -> None: write_sbml( - self.sbml_document or self.sbml_model.getSBMLDocument(), filename + self.sbml_document or self.sbml_model.getSBMLDocument(), + filename or _generate_path(self.rel_path, self.base_path), ) def get_parameter_value(self, id_: str) -> float: diff --git a/petab/v2/core.py b/petab/v2/core.py index 6b09c679..8207c573 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2,6 +2,7 @@ from __future__ import annotations +import copy import logging import os import tempfile @@ -32,6 +33,7 @@ ) from typing_extensions import Self +from .._utils import _generate_path from ..v1 import ( validate_yaml_syntax, yaml, @@ -212,13 +214,21 @@ class PriorDistribution(str, Enum): class BaseTable(BaseModel, Generic[T]): """Base class for PEtab tables.""" + #: The table elements elements: list[T] - - def __init__(self, elements: list[T] = None) -> None: + #: The path to the table file, if applicable. + #: Relative to the base path, if the base path is set and rel_path is not + #: an absolute path. + rel_path: AnyUrl | Path | None = Field(exclude=True, default=None) + #: The base path for the table file, if applicable. + #: This is usually the directory of the PEtab YAML file. + base_path: AnyUrl | Path | None = Field(exclude=True, default=None) + + def __init__(self, elements: list[T] = None, **kwargs) -> None: """Initialize the BaseTable with a list of elements.""" if elements is None: elements = [] - super().__init__(elements=elements) + super().__init__(elements=elements, **kwargs) def __getitem__(self, id_: str) -> T: """Get an element by ID. @@ -252,16 +262,20 @@ def to_df(self) -> pd.DataFrame: pass @classmethod - def from_tsv(cls, file_path: str | Path) -> BaseTable[T]: + def from_tsv( + cls, file_path: str | Path, base_path: str | Path | None = None + ) -> BaseTable[T]: """Create table from a TSV file.""" - df = pd.read_csv(file_path, sep="\t") - return cls.from_df(df) + df = pd.read_csv(_generate_path(file_path, base_path), sep="\t") + return cls.from_df(df, rel_path=file_path, base_path=base_path) - def to_tsv(self, file_path: str | Path) -> None: + def to_tsv(self, file_path: str | Path = None) -> None: """Write the table to a TSV file.""" df = self.to_df() df.to_csv( - file_path, sep="\t", index=not isinstance(df.index, pd.RangeIndex) + file_path or _generate_path(self.rel_path, self.base_path), + sep="\t", + index=not isinstance(df.index, pd.RangeIndex), ) @classmethod @@ -375,18 +389,17 @@ def observables(self) -> list[Observable]: return self.elements @classmethod - def from_df(cls, df: pd.DataFrame) -> ObservableTable: + def from_df(cls, df: pd.DataFrame, **kwargs) -> ObservableTable: """Create an ObservableTable from a DataFrame.""" if df is None: - return cls() + return cls(**kwargs) df = get_observable_df(df) observables = [ Observable(**row.to_dict()) for _, row in df.reset_index().iterrows() ] - - return cls(observables) + return cls(observables, **kwargs) def to_df(self) -> pd.DataFrame: """Convert the ObservableTable to a DataFrame.""" @@ -500,17 +513,17 @@ def conditions(self) -> list[Condition]: return self.elements @classmethod - def from_df(cls, df: pd.DataFrame) -> ConditionTable: + def from_df(cls, df: pd.DataFrame, **kwargs) -> ConditionTable: """Create a ConditionTable from a DataFrame.""" if df is None or df.empty: - return cls() + return cls(**kwargs) conditions = [] for condition_id, sub_df in df.groupby(C.CONDITION_ID): changes = [Change(**row) for row in sub_df.to_dict("records")] conditions.append(Condition(id=condition_id, changes=changes)) - return cls(conditions) + return cls(conditions, **kwargs) def to_df(self) -> pd.DataFrame: """Convert the ConditionTable to a DataFrame.""" @@ -650,10 +663,10 @@ def experiments(self) -> list[Experiment]: return self.elements @classmethod - def from_df(cls, df: pd.DataFrame) -> ExperimentTable: + def from_df(cls, df: pd.DataFrame, **kwargs) -> ExperimentTable: """Create an ExperimentTable from a DataFrame.""" if df is None: - return cls() + return cls(**kwargs) experiments = [] for experiment_id, cur_exp_df in df.groupby(C.EXPERIMENT_ID): @@ -668,12 +681,13 @@ def from_df(cls, df: pd.DataFrame) -> ExperimentTable: ] periods.append( ExperimentPeriod( - time=timepoint, condition_ids=condition_ids + time=timepoint, + condition_ids=condition_ids, ) ) experiments.append(Experiment(id=experiment_id, periods=periods)) - return cls(experiments) + return cls(experiments, **kwargs) def to_df(self) -> pd.DataFrame: """Convert the ExperimentTable to a DataFrame.""" @@ -778,13 +792,10 @@ def measurements(self) -> list[Measurement]: return self.elements @classmethod - def from_df( - cls, - df: pd.DataFrame, - ) -> MeasurementTable: + def from_df(cls, df: pd.DataFrame, **kwargs) -> MeasurementTable: """Create a MeasurementTable from a DataFrame.""" if df is None: - return cls() + return cls(**kwargs) if C.MODEL_ID in df.columns: df[C.MODEL_ID] = df[C.MODEL_ID].apply(_convert_nan_to_none) @@ -796,7 +807,7 @@ def from_df( for _, row in df.reset_index().iterrows() ] - return cls(measurements) + return cls(measurements, **kwargs) def to_df(self) -> pd.DataFrame: """Convert the MeasurementTable to a DataFrame.""" @@ -843,16 +854,15 @@ def mappings(self) -> list[Mapping]: return self.elements @classmethod - def from_df(cls, df: pd.DataFrame) -> MappingTable: + def from_df(cls, df: pd.DataFrame, **kwargs) -> MappingTable: """Create a MappingTable from a DataFrame.""" if df is None: - return cls() + return cls(**kwargs) mappings = [ Mapping(**row.to_dict()) for _, row in df.reset_index().iterrows() ] - - return cls(mappings) + return cls(mappings, **kwargs) def to_df(self) -> pd.DataFrame: """Convert the MappingTable to a DataFrame.""" @@ -1044,17 +1054,17 @@ def parameters(self) -> list[Parameter]: return self.elements @classmethod - def from_df(cls, df: pd.DataFrame) -> ParameterTable: + def from_df(cls, df: pd.DataFrame, **kwargs) -> ParameterTable: """Create a ParameterTable from a DataFrame.""" if df is None: - return cls() + return cls(**kwargs) parameters = [ Parameter(**row.to_dict()) for _, row in df.reset_index().iterrows() ] - return cls(parameters) + return cls(parameters, **kwargs) def to_df(self) -> pd.DataFrame: """Convert the ParameterTable to a DataFrame.""" @@ -1184,11 +1194,6 @@ def from_yaml( validate_yaml_syntax(yaml_config) - def get_path(filename): - if base_path is None: - return filename - return f"{base_path}/{filename}" - if (format_version := parse_version(yaml_config[C.FORMAT_VERSION]))[ 0 ] != 2: @@ -1220,15 +1225,17 @@ def get_path(filename): config = ProblemConfig( **yaml_config, base_path=base_path, filepath=yaml_file ) + parameter_tables = [ - ParameterTable.from_tsv(get_path(f)) + ParameterTable.from_tsv(f, base_path=base_path) for f in config.parameter_files ] models = [ model_factory( - get_path(model_info.location), - model_info.language, + model_info.location, + base_path=base_path, + model_language=model_info.language, model_id=model_id, ) for model_id, model_info in (config.model_files or {}).items() @@ -1236,7 +1243,7 @@ def get_path(filename): measurement_tables = ( [ - MeasurementTable.from_tsv(get_path(f)) + MeasurementTable.from_tsv(f, base_path) for f in config.measurement_files ] if config.measurement_files @@ -1245,7 +1252,7 @@ def get_path(filename): condition_tables = ( [ - ConditionTable.from_tsv(get_path(f)) + ConditionTable.from_tsv(f, base_path) for f in config.condition_files ] if config.condition_files @@ -1254,7 +1261,7 @@ def get_path(filename): experiment_tables = ( [ - ExperimentTable.from_tsv(get_path(f)) + ExperimentTable.from_tsv(f, base_path) for f in config.experiment_files ] if config.experiment_files @@ -1263,7 +1270,7 @@ def get_path(filename): observable_tables = ( [ - ObservableTable.from_tsv(get_path(f)) + ObservableTable.from_tsv(f, base_path) for f in config.observable_files ] if config.observable_files @@ -1271,7 +1278,7 @@ def get_path(filename): ) mapping_tables = ( - [MappingTable.from_tsv(get_path(f)) for f in config.mapping_files] + [MappingTable.from_tsv(f, base_path) for f in config.mapping_files] if config.mapping_files else None ) @@ -1387,6 +1394,83 @@ def get_problem(problem: str | Path | Problem) -> Problem: "or a PEtab problem object." ) + def to_files(self, base_path: str | Path | None) -> None: + """Write the PEtab problem to files. + + Writes the model, condition, experiment, measurement, parameter, + observable, and mapping tables to their respective files as specified + by the `rel_path` and `base_path` of their respective objects. + + This expects that all objects have their `rel_path` and `base_path` + set correctly, which is usually done by :meth:`Problem.from_yaml`. + + :param base_path: + The base path the yaml file and tables will be written to. + If ``None``, the `base_path` of the individual tables and + :obj:`Problem.config.base_path` will be used. + """ + config = copy.deepcopy(self.config) or ProblemConfig( + format_version="2.0.0" + ) + + for model in self.models: + model.to_file( + _generate_path(model.rel_path, base_path or model.base_path) + ) + + config.model_files = { + model.model_id: ModelFile( + location=model.rel_path, language=model.type_id + ) + for model in self.models + } + + config.condition_files = [ + table.rel_path for table in self.condition_tables if table.rel_path + ] + config.experiment_files = [ + table.rel_path + for table in self.experiment_tables + if table.rel_path + ] + config.observable_files = [ + table.rel_path + for table in self.observable_tables + if table.rel_path + ] + config.measurement_files = [ + table.rel_path + for table in self.measurement_tables + if table.rel_path + ] + config.parameter_files = [ + table.rel_path for table in self.parameter_tables if table.rel_path + ] + config.mapping_files = [ + table.rel_path for table in self.mapping_tables if table.rel_path + ] + + for table in chain( + self.condition_tables, + self.experiment_tables, + self.observable_tables, + self.measurement_tables, + self.parameter_tables, + self.mapping_tables, + ): + if table.rel_path: + table.to_tsv( + _generate_path( + table.rel_path, base_path or table.base_path + ) + ) + + config.to_yaml( + _generate_path( + Path(str(config.filepath)).name, base_path or config.base_path + ) + ) + @property def model(self) -> Model | None: """The model of the problem. diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index c646d52a..7a93ecf1 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -10,7 +10,7 @@ from sympy.abc import x, y import petab.v2 as petab -from petab.v2 import C, Problem +from petab.v2 import C from petab.v2.C import ( CONDITION_ID, ESTIMATE, @@ -40,7 +40,8 @@ def test_observable_table_round_trip(): with tempfile.TemporaryDirectory() as tmp_dir: tmp_file = Path(tmp_dir) / "observables.tsv" - observables.to_tsv(tmp_file) + observables.rel_path = tmp_file + observables.to_tsv() observables2 = ObservableTable.from_tsv(tmp_file) assert observables == observables2 @@ -51,7 +52,8 @@ def test_condition_table_round_trip(): file = Path(tmp_dir, "Fujita_experimentalCondition.tsv") conditions = ConditionTable.from_tsv(file) tmp_file = Path(tmp_dir) / "conditions.tsv" - conditions.to_tsv(tmp_file) + conditions.rel_path = tmp_file + conditions.to_tsv() conditions2 = ConditionTable.from_tsv(tmp_file) assert conditions == conditions2 @@ -353,7 +355,6 @@ def test_problem_from_yaml_multiple_files(): """ yaml_config = """ format_version: 2.0.0 - parameter_files: [] model_files: model1: location: model1.xml @@ -361,6 +362,7 @@ def test_problem_from_yaml_multiple_files(): model2: location: model2.xml language: sbml + parameter_files: [parameters1.tsv, parameters2.tsv] condition_files: [conditions1.tsv, conditions2.tsv] measurement_files: [measurements1.tsv, measurements2.tsv] observable_files: [observables1.tsv, observables2.tsv] @@ -396,6 +398,10 @@ def test_problem_from_yaml_multiple_files(): petab.write_observable_df( problem.observable_df, Path(tmpdir, f"observables{i}.tsv") ) + problem.add_parameter(f"parameter{i}", False, nominal_value=i) + petab.write_parameter_df( + problem.parameter_df, Path(tmpdir, f"parameters{i}.tsv") + ) petab_problem1 = petab.Problem.from_yaml(yaml_path) @@ -403,12 +409,25 @@ def test_problem_from_yaml_multiple_files(): yaml_config = petab.load_yaml(yaml_path) petab_problem2 = petab.Problem.from_yaml(yaml_config, base_path=tmpdir) - for petab_problem in (petab_problem1, petab_problem2): - assert len(petab_problem.models) == 2 - assert petab_problem.measurement_df.shape[0] == 2 - assert petab_problem.observable_df.shape[0] == 2 - assert petab_problem.condition_df.shape[0] == 2 - assert petab_problem.experiment_df.shape[0] == 2 + # test that we can save the problem to a new directory + with tempfile.TemporaryDirectory() as tmpdir2: + petab_problem1.to_files(tmpdir2) + # check the same files are created + assert { + file.relative_to(tmpdir) for file in Path(tmpdir).iterdir() + } == { + file.relative_to(tmpdir2) for file in Path(tmpdir2).iterdir() + } + petab_problem3 = petab.Problem.from_yaml( + Path(tmpdir2, "problem.yaml") + ) + + for petab_problem in (petab_problem1, petab_problem2, petab_problem3): + assert len(petab_problem.models) == 2 + assert petab_problem.measurement_df.shape[0] == 2 + assert petab_problem.observable_df.shape[0] == 2 + assert petab_problem.condition_df.shape[0] == 2 + assert petab_problem.experiment_df.shape[0] == 2 def test_modify_problem(): @@ -599,3 +618,36 @@ def test_get_measurements_for_experiment(): assert problem.get_measurements_for_experiment(e1) == [m1, m2] assert problem.get_measurements_for_experiment(e2) == [m3] + + +def test_generate_path(): + import platform + + from petab._utils import _generate_path as gp + + assert gp("foo") == "foo" + assert gp(Path("foo")) == "foo" + assert gp("https://example.com/foo") == "https://example.com/foo" + assert gp(AnyUrl("https://example.com/foo")) == "https://example.com/foo" + + assert gp("foo", "bar") == str(Path("bar", "foo")) + assert gp(Path("foo"), "bar") == str(Path("bar", "foo")) + assert gp(Path("foo"), Path("bar")) == str(Path("bar", "foo")) + assert ( + gp("bar", AnyUrl("https://example.com/foo")) + == "https://example.com/foo/bar" + ) + assert ( + gp("bar", "https://example.com/foo") == "https://example.com/foo/bar" + ) + assert ( + gp("https://example.com/foo", "https://example.com/bar") + == "https://example.com/foo" + ) + + if platform.system() == "Windows": + assert gp(Path("foo"), "c:/bar") == "c:/bar/foo" + assert gp("c:/foo", "c:/bar") == "c:/foo" + else: + assert gp(Path("foo"), "/bar") == "/bar/foo" + assert gp("/foo", "bar") == "/foo" From 294a5dcdd2e706d368763edcd5841e9cab823947 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 25 Aug 2025 20:12:41 +0200 Subject: [PATCH 084/141] Fix missing sbmlmath dependency --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 703bae6e..3484e6cf 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -65,7 +65,8 @@ doc = [ # https://github.com/spatialaudio/nbsphinx/issues/687#issuecomment-1339271312 "ipython>=7.21.0, !=8.7.0", "pysb", - "antimony>=2.14.0" + "antimony>=2.14.0", + "sbmlmath>=0.4.0", ] vis = [ "matplotlib>=3.6.0", From 697e87376120cc3210c2a8f4f7cb227b1ba5abf2 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 25 Aug 2025 20:13:01 +0200 Subject: [PATCH 085/141] doc: Intersphinx pydantic --- doc/conf.py | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/conf.py b/doc/conf.py index 99838616..975cad03 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -52,6 +52,7 @@ "numpy": ("https://numpy.org/devdocs/", None), "sympy": ("https://docs.sympy.org/latest/", None), "python": ("https://docs.python.org/3", None), + "pydantic": ("https://docs.pydantic.dev/latest/", None), } # Add any paths that contain templates here, relative to this directory. From ee0bce4bbe0b9deafe782e2b40078a052d3f2aa2 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 25 Aug 2025 20:14:14 +0200 Subject: [PATCH 086/141] doc: remove private v1getattr (#433) This shouldn't show up in the docs. --- petab/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/petab/__init__.py b/petab/__init__.py index 031ca811..c6a40871 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -36,7 +36,7 @@ def __getattr__(name): return getattr(importlib.import_module("petab.v1"), name) -def v1getattr(name, module): +def _v1getattr(name, module): if name not in ("__path__", "__all__"): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " @@ -67,7 +67,7 @@ def v1getattr(name, module): real_module = importlib.import_module( f"petab.v1.{'.'.join(v1_object_parts)}" ) - real_module.__getattr__ = partial(v1getattr, module=real_module) + real_module.__getattr__ = partial(_v1getattr, module=real_module) sys.modules[module_name] = real_module except ModuleNotFoundError: pass From 668fdf18965ff89a57ab79482c8e4ff3aad4c691 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 26 Aug 2025 13:19:28 +0200 Subject: [PATCH 087/141] `write_sbml`: mkdir (#429) Create output directory in `write_sbml` if necessary. --- petab/v1/sbml.py | 1 + 1 file changed, 1 insertion(+) diff --git a/petab/v1/sbml.py b/petab/v1/sbml.py index 9e5549d2..b939e45e 100644 --- a/petab/v1/sbml.py +++ b/petab/v1/sbml.py @@ -169,6 +169,7 @@ def write_sbml(sbml_doc: libsbml.SBMLDocument, filename: Path | str) -> None: sbml_doc: SBML document containing the SBML model filename: Destination file name """ + Path(filename).parent.mkdir(parents=True, exist_ok=True) sbml_writer = libsbml.SBMLWriter() ret = sbml_writer.writeSBMLToFile(sbml_doc, str(filename)) if not ret: From a78fa0a1c36079dbb9a882eace5b17543d80c55b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 26 Aug 2025 13:23:06 +0200 Subject: [PATCH 088/141] Optional `experimentId` in `Problem.add_measurement` (#431) Measurements don't require an experiment ID. Therefore, make `experimentId` optional in `Problem.add_measurement`. Also, require keyword arguments there. --- petab/v2/core.py | 3 ++- tests/v2/test_core.py | 7 ++++++- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 8207c573..86f847cc 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2035,9 +2035,10 @@ def add_parameter( def add_measurement( self, obs_id: str, - experiment_id: str, + *, time: float, measurement: float, + experiment_id: str | None = None, observable_parameters: Sequence[str | float] | str | float = None, noise_parameters: Sequence[str | float] | str | float = None, ): diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 7a93ecf1..cab1671b 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -389,7 +389,12 @@ def test_problem_from_yaml_multiple_files(): problem.experiment_df, Path(tmpdir, f"experiments{i}.tsv") ) - problem.add_measurement(f"observable{i}", f"experiment{i}", 1, 1) + problem.add_measurement( + f"observable{i}", + experiment_id=f"experiment{i}", + time=1, + measurement=1, + ) petab.write_measurement_df( problem.measurement_df, Path(tmpdir, f"measurements{i}.tsv") ) From 8795dbfe4c5cf42d9f9eae2e88f4b536810cf45d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 26 Aug 2025 14:44:38 +0200 Subject: [PATCH 089/141] Validate modelId in measurements (#426) Check that we have a valid model ID. Related to https://github.com/PEtab-dev/libpetab-python/issues/392. --- petab/v1/models/sbml_model.py | 6 ++++-- petab/v2/lint.py | 40 +++++++++++++++++++++++++++++++++-- tests/v2/test_lint.py | 27 +++++++++++++++++++++++ 3 files changed, 69 insertions(+), 4 deletions(-) diff --git a/petab/v1/models/sbml_model.py b/petab/v1/models/sbml_model.py index e6fffcca..2d31c0b9 100644 --- a/petab/v1/models/sbml_model.py +++ b/petab/v1/models/sbml_model.py @@ -130,16 +130,18 @@ def from_string(sbml_string, model_id: str = None) -> SbmlModel: ) @staticmethod - def from_antimony(ant_model: str | Path) -> SbmlModel: + def from_antimony(ant_model: str | Path, **kwargs) -> SbmlModel: """Create SBML model from an Antimony model. Requires the `antimony` package (https://github.com/sys-bio/antimony). :param ant_model: Antimony model as string or path to file. Strings are interpreted as Antimony model strings. + :param kwargs: Additional keyword arguments passed to + :meth:`SbmlModel.from_string`. """ sbml_str = antimony2sbml(ant_model) - return SbmlModel.from_string(sbml_str) + return SbmlModel.from_string(sbml_str, **kwargs) def to_antimony(self) -> str: """Convert the SBML model to an Antimony string.""" diff --git a/petab/v2/lint.py b/petab/v2/lint.py index f7a8daec..0780b340 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -27,6 +27,10 @@ "ValidationTask", "CheckModel", "CheckProblemConfig", + "CheckMeasuredObservablesDefined", + "CheckOverridesMatchPlaceholders", + "CheckMeasuredExperimentsDefined", + "CheckMeasurementModelId", "CheckPosLogMeasurements", "CheckValidConditionTargets", "CheckUniquePrimaryKeys", @@ -769,8 +773,39 @@ def run(self, problem: Problem) -> ValidationIssue | None: return None -# TODO: check that Measurements model IDs match the available ones -# https://github.com/PEtab-dev/libpetab-python/issues/392 +class CheckMeasurementModelId(ValidationTask): + """Validate model IDs of measurements.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + messages = [] + available_models = {m.model_id for m in problem.models} + + for measurement in problem.measurements: + if not measurement.model_id: + if len(available_models) < 2: + # If there is only one model, it is not required to specify + # the model ID in the measurement table. + continue + + messages.append( + f"Measurement `{measurement}' does not have a model ID, " + "but there are multiple models available. " + "Please specify the model ID in the measurement table." + ) + continue + + if measurement.model_id not in available_models: + messages.append( + f"Measurement `{measurement}' has model ID " + f"`{measurement.model_id}' which does not match " + "any of the available models: " + f"{available_models}." + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None def get_valid_parameters_for_parameter_table( @@ -1011,6 +1046,7 @@ def get_placeholders( CheckProblemConfig(), CheckModel(), CheckUniquePrimaryKeys(), + CheckMeasurementModelId(), CheckMeasuredObservablesDefined(), CheckPosLogMeasurements(), CheckOverridesMatchPlaceholders(), diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index 74aaaa29..12973d86 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -37,3 +37,30 @@ def test_check_incompatible_targets(): problem["e1"].periods[0].condition_ids.append("c2") assert (error := check.run(problem)) is not None assert "overlapping targets {'p1'}" in error.message + + +def test_invalid_model_id_in_measurements(): + """Test that measurements with an invalid model ID are caught.""" + problem = Problem() + problem.models.append(SbmlModel.from_antimony("p1 = 1", model_id="model1")) + problem.add_observable("obs1", "A") + problem.add_measurement("obs1", experiment_id="e1", time=0, measurement=1) + + check = CheckMeasurementModelId() + + # Single model -> model ID is optional + assert (error := check.run(problem)) is None, error + + # Two models -> model ID must be set + problem.models.append(SbmlModel.from_antimony("p2 = 2", model_id="model2")) + assert (error := check.run(problem)) is not None + assert "multiple models" in error.message + + # Set model ID to a non-existing model ID + problem.measurements[0].model_id = "invalid_model_id" + assert (error := check.run(problem)) is not None + assert "does not match" in error.message + + # Use a valid model ID + problem.measurements[0].model_id = "model1" + assert (error := check.run(problem)) is None, error From 5c964a1c384cbd888afab21b9b7f12f7ec4ec082 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 30 Aug 2025 10:47:49 +0200 Subject: [PATCH 090/141] Add `Problem.assert_valid` (#430) One-liner to check that a given `Problem` is valid. --- petab/v2/core.py | 21 +++++++++++++++++++++ tests/v2/test_core.py | 10 ++++++++++ 2 files changed, 31 insertions(+) diff --git a/petab/v2/core.py b/petab/v2/core.py index 86f847cc..e886be8b 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -1901,6 +1901,27 @@ def validate( return validation_results + def assert_valid(self, **kwargs) -> None: + """Assert that the PEtab problem is valid. + + :param kwargs: Additional arguments passed to :meth:`Problem.validate`. + + :raises AssertionError: If the PEtab problem is not valid. + """ + from ..v2.lint import ValidationIssueSeverity + + validation_results = self.validate(**kwargs) + errors = [ + r + for r in validation_results + if r.level >= ValidationIssueSeverity.ERROR + ] + if errors: + raise AssertionError( + "PEtab problem is not valid:\n" + + "\n".join(e.message for e in errors) + ) + def add_condition( self, id_: str, name: str = None, **kwargs: Number | str | sp.Expr ): diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index cab1671b..fc774e76 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -58,6 +58,16 @@ def test_condition_table_round_trip(): assert conditions == conditions2 +def test_assert_valid(): + problem = petab1to2(example_dir_fujita / "Fujita.yaml") + problem.assert_valid() + problem.observable_tables[0] = ObservableTable() + with pytest.raises( + AssertionError, match="not defined in the observable table" + ): + problem.assert_valid() + + def test_experiment_add_periods(): """Test operators for Experiment""" exp = Experiment(id="exp1") From 3cf8dace1c6a2cb428c813409af80070b1b408d7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Sat, 13 Sep 2025 19:35:09 +0200 Subject: [PATCH 091/141] build(deps): bump actions/setup-python from 5 to 6 (#436) Bumps [actions/setup-python](https://github.com/actions/setup-python) from 5 to 6. - [Release notes](https://github.com/actions/setup-python/releases) - [Commits](https://github.com/actions/setup-python/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/setup-python dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- .github/workflows/deploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 0892ceec..ba6b3c6a 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -20,7 +20,7 @@ jobs: uses: actions/checkout@v5 - name: Prepare python ${{ matrix.python-version }} - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: ${{ matrix.python-version }} diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index cbe9d647..c77a94a7 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -17,7 +17,7 @@ jobs: - name: Check out repository uses: actions/checkout@v5 - name: Set up python - uses: actions/setup-python@v5 + uses: actions/setup-python@v6 with: python-version: 3.11 From 89e9fec4bb3642daf62edb269942983217a2d2a8 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 13 Sep 2025 20:04:14 +0200 Subject: [PATCH 092/141] v2 linter: check measurement.experimentId (#435) Check for undefined experiments referenced in the measurement table. Closes #434. --- petab/v2/lint.py | 24 ++++++++++++++++++++++++ tests/v2/test_lint.py | 18 ++++++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 0780b340..6bfcfe50 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -42,6 +42,7 @@ "CheckObservablesDoNotShadowModelEntities", "CheckUnusedConditions", "CheckPriorDistribution", + "CheckUndefinedExperiments", "lint_problem", "default_validation_tasks", ] @@ -691,6 +692,28 @@ def run(self, problem: Problem) -> ValidationIssue | None: return None +class CheckUndefinedExperiments(ValidationTask): + """A task to check for experiments that are used in the measurement + table but not defined in the experiment table.""" + + def run(self, problem: Problem) -> ValidationIssue | None: + used_experiments = { + m.experiment_id + for m in problem.measurements + if m.experiment_id is not None + } + available_experiments = {e.id for e in problem.experiments} + + if undefined_experiments := used_experiments - available_experiments: + return ValidationWarning( + f"Experiments {undefined_experiments} are used in the " + "measurements table but are not defined in the experiments " + "table." + ) + + return None + + class CheckUnusedConditions(ValidationTask): """A task to check for conditions that are not used in the experiment table.""" @@ -1053,6 +1076,7 @@ def get_placeholders( CheckValidConditionTargets(), CheckExperimentTable(), CheckExperimentConditionsExist(), + CheckUndefinedExperiments(), CheckObservablesDoNotShadowModelEntities(), CheckAllParametersPresentInParameterTable(), CheckValidParameterInConditionOrParameterTable(), diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index 12973d86..1013670f 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -64,3 +64,21 @@ def test_invalid_model_id_in_measurements(): # Use a valid model ID problem.measurements[0].model_id = "model1" assert (error := check.run(problem)) is None, error + + +def test_undefined_experiment_id_in_measurements(): + """Test that measurements with an undefined experiment ID are caught.""" + problem = Problem() + problem.add_experiment("e1", 0, "c1") + problem.add_observable("obs1", "A") + problem.add_measurement("obs1", experiment_id="e1", time=0, measurement=1) + + check = CheckUndefinedExperiments() + + # Valid experiment ID + assert (error := check.run(problem)) is None, error + + # Invalid experiment ID + problem.measurements[0].experiment_id = "invalid_experiment_id" + assert (error := check.run(problem)) is not None + assert "not defined" in error.message From de974baad4d646daef86222af3a668d58133a758 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 19 Sep 2025 22:33:27 +0200 Subject: [PATCH 093/141] Fix petablint for v2 problems (#432) Skip `validate_yaml_semantics` for PEtab v2. Those errors will be caught elsewhere. Closes #428. --- petab/petablint.py | 6 ++++-- tests/v2/test_core.py | 41 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+), 2 deletions(-) diff --git a/petab/petablint.py b/petab/petablint.py index b3c2ef87..afc481e2 100755 --- a/petab/petablint.py +++ b/petab/petablint.py @@ -12,8 +12,8 @@ from jsonschema.exceptions import ValidationError as SchemaValidationError import petab.v1 as petab +from petab.v1 import validate_yaml_semantics, validate_yaml_syntax from petab.v1.C import FORMAT_VERSION -from petab.v1.yaml import validate from petab.versions import get_major_version logger = logging.getLogger(__name__) @@ -159,7 +159,7 @@ def main(): if args.yaml_file_name: try: - validate(args.yaml_file_name) + validate_yaml_syntax(args.yaml_file_name) except SchemaValidationError as e: path = "" if e.absolute_path: @@ -181,6 +181,8 @@ def main(): match get_major_version(args.yaml_file_name): case 1: + validate_yaml_semantics(args.yaml_file_name) + if petab.is_composite_problem(args.yaml_file_name): # TODO: further checking: # https://github.com/ICB-DCM/PEtab/issues/191 diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index fc774e76..0a9e2429 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -1,3 +1,4 @@ +import subprocess import tempfile from pathlib import Path @@ -666,3 +667,43 @@ def test_generate_path(): else: assert gp(Path("foo"), "/bar") == "/bar/foo" assert gp("/foo", "bar") == "/foo" + + +def test_petablint_v2(tmpdir): + """Test that petablint runs on a valid v2 problem without errors.""" + problem = Problem() + problem.model = SbmlModel.from_antimony(""" + model conversion + species A, B; + A = 10; + B = 0; + k1 = 1; + k2 = 0.5; + R1: A -> B; k1 * A; + R2: B -> A; k2 * B; + end + """) + problem.add_observable("obs_A", "A", noise_formula="sd_A") + problem.add_parameter( + "k1", estimate=True, lb=1e-5, ub=1e5, nominal_value=1 + ) + problem.add_parameter( + "k2", estimate=True, lb=1e-5, ub=1e5, nominal_value=0.5 + ) + problem.add_parameter( + "sd_A", estimate=True, lb=0.01, ub=10, nominal_value=1 + ) + problem.add_measurement( + "obs_A", time=10, measurement=2.5, experiment_id="" + ) + assert problem.validate() == [] + + problem.config = ProblemConfig(filepath="problem.yaml") + problem.models[0].rel_path = "model.xml" + problem.parameter_tables[0].rel_path = "parameters.tsv" + problem.observable_tables[0].rel_path = "observables.tsv" + problem.measurement_tables[0].rel_path = "measurements.tsv" + problem.to_files(Path(tmpdir)) + + result = subprocess.run(["petablint", str(Path(tmpdir, "problem.yaml"))]) # noqa: S603,S607 + assert result.returncode == 0 From 900f8b615e891ff5a26a5ecd7e58e4aece9e846b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 19 Sep 2025 23:02:31 +0200 Subject: [PATCH 094/141] Update v2 condition table `targetValue` validation (#427) After https://github.com/PEtab-dev/PEtab/pull/645, any condition that is used as a first condition cannot refer to any symbols other then the parameters listed in the parameter table, or `time'. Closes https://github.com/PEtab-dev/libpetab-python/issues/424. --- petab/v2/C.py | 3 ++ petab/v2/core.py | 19 +++++++++++-- petab/v2/lint.py | 64 +++++++++++++++++++++++++++++++++++++++---- tests/v2/test_lint.py | 25 +++++++++++++++++ 4 files changed, 104 insertions(+), 7 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index 5bb73980..e680450e 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -285,6 +285,9 @@ #: separator for multiple parameter values (bounds, observableParameters, ...) PARAMETER_SEPARATOR = ";" +#: The time symbol for use in any PEtab-specific mathematical expressions +TIME_SYMBOL = "time" + __all__ = [ x diff --git a/petab/v2/core.py b/petab/v2/core.py index e886be8b..0f55ea8e 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -2125,8 +2125,23 @@ def add_experiment(self, id_: str, *args): is added to the last one. :param id_: The experiment ID. - :param args: Timepoints and associated conditions: - ``time_1, condition_id_1, time_2, condition_id_2, ...``. + :param args: Timepoints and associated conditions + (single condition ID as string or multiple condition IDs as lists + of strings). + + :example: + >>> p = Problem() + >>> p.add_experiment( + ... "experiment1", + ... 1, + ... "condition1", + ... 2, + ... ["condition2a", "condition2b"], + ... ) + >>> p.experiments[0] # doctest: +NORMALIZE_WHITESPACE + Experiment(id='experiment1', periods=[\ +ExperimentPeriod(time=1.0, condition_ids=['condition1']), \ +ExperimentPeriod(time=2.0, condition_ids=['condition2a', 'condition2b'])]) """ if len(args) % 2 != 0: raise ValueError( diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 6bfcfe50..4d864b57 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -43,6 +43,7 @@ "CheckUnusedConditions", "CheckPriorDistribution", "CheckUndefinedExperiments", + "CheckInitialChangeSymbols", "lint_problem", "default_validation_tasks", ] @@ -736,6 +737,62 @@ def run(self, problem: Problem) -> ValidationIssue | None: return None +class CheckInitialChangeSymbols(ValidationTask): + """ + Check that changes of any first period of any experiment only refers to + allowed symbols. + + The only allowed symbols are those that are present in the parameter table. + """ + + def run(self, problem: Problem) -> ValidationIssue | None: + if not problem.experiments: + return None + + if not problem.conditions: + return None + + allowed_symbols = {p.id for p in problem.parameters} + allowed_symbols.add(TIME_SYMBOL) + # IDs of conditions that have already been checked + valid_conditions = set() + id_to_condition = {c.id: c for c in problem.conditions} + + messages = [] + for experiment in problem.experiments: + if not experiment.periods: + continue + + first_period = experiment.sorted_periods[0] + for condition_id in first_period.condition_ids: + if condition_id in valid_conditions: + continue + + # we assume that all referenced condition IDs are valid + condition = id_to_condition[condition_id] + + used_symbols = { + str(sym) + for change in condition.changes + for sym in change.target_value.free_symbols + } + invalid_symbols = used_symbols - allowed_symbols + if invalid_symbols: + messages.append( + f"Condition {condition.id} is applied at the start of " + f"experiment {experiment.id}, and thus, its " + f"target value expressions must only contain " + f"symbols from the parameter table, or `time`. " + "However, it contains additional symbols: " + f"{invalid_symbols}. " + ) + + if messages: + return ValidationError("\n".join(messages)) + + return None + + class CheckPriorDistribution(ValidationTask): """A task to validate the prior distribution of a PEtab problem.""" @@ -1082,10 +1139,7 @@ def get_placeholders( CheckValidParameterInConditionOrParameterTable(), CheckUnusedExperiments(), CheckUnusedConditions(), - # TODO: atomize checks, update to long condition table, re-enable - # TODO validate mapping table - CheckValidParameterInConditionOrParameterTable(), - CheckAllParametersPresentInParameterTable(), - CheckValidConditionTargets(), CheckPriorDistribution(), + CheckInitialChangeSymbols(), + # TODO validate mapping table ] diff --git a/tests/v2/test_lint.py b/tests/v2/test_lint.py index 1013670f..7eb6dc91 100644 --- a/tests/v2/test_lint.py +++ b/tests/v2/test_lint.py @@ -82,3 +82,28 @@ def test_undefined_experiment_id_in_measurements(): problem.measurements[0].experiment_id = "invalid_experiment_id" assert (error := check.run(problem)) is not None assert "not defined" in error.message + + +def test_validate_initial_change_symbols(): + """Test validation of symbols in target value expressions for changes + applied at the start of an experiment.""" + problem = Problem() + problem.model = SbmlModel.from_antimony("p1 = 1; p2 = 2") + problem.add_experiment("e1", 0, "c1", 1, "c2") + problem.add_condition("c1", p1="p2 + time") + problem.add_condition("c2", p1="p2", p2="p1") + problem.add_parameter("p1", nominal_value=1, estimate=False) + problem.add_parameter("p2", nominal_value=2, estimate=False) + + check = CheckInitialChangeSymbols() + assert check.run(problem) is None + + # removing `p1` from the parameter table is okay, as `c2` is never + # used at the start of an experiment + problem.parameter_tables[0].parameters.remove(problem["p1"]) + assert check.run(problem) is None + + # removing `p2` is not okay, as it is used at the start of an experiment + problem.parameter_tables[0].parameters.remove(problem["p2"]) + assert (error := check.run(problem)) is not None + assert "contains additional symbols: {'p2'}" in error.message From 64011e723b52a0b786f3eb3789ef08b9ec3c74f2 Mon Sep 17 00:00:00 2001 From: Polina Lakrisenko Date: Sat, 20 Sep 2025 11:33:59 +0200 Subject: [PATCH 095/141] fix p-value in the goodness of fit plot (#437) * fix pvalue in goodness of fit plot Co-authored-by: Daniel Weindl --------- Co-authored-by: Daniel Weindl --- petab/v1/visualize/plot_residuals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 230c8605..18f0f9e9 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -206,7 +206,7 @@ def plot_goodness_of_fit( f"$R^2$: {r_value**2:.2f}\n" f"slope: {slope:.2f}\n" f"intercept: {intercept:.2f}\n" - f"pvalue: {std_err:.2e}\n" + f"p-value: {p_value:.2e}\n" f"mean squared error: {mse:.2e}\n", transform=ax.transAxes, ) From 4eb95e17dd942ee8df786c2611a9c5a4fdee7756 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 20 Sep 2025 11:41:57 +0200 Subject: [PATCH 096/141] Fix plot_goodness_of_fit scatter (#440) Measurements and simulation were swapped in the scatter plot. The regression line was correct. --- petab/v1/visualize/plot_residuals.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 18f0f9e9..46e83fb9 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -184,8 +184,8 @@ def plot_goodness_of_fit( fig.set_layout_engine("tight") ax.scatter( - petab_problem.measurement_df["measurement"], simulations_df["simulation"], + petab_problem.measurement_df["measurement"], c=color, ) From f79bba5e32576b463dc0cc097de6a5a463ad722f Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 22 Sep 2025 14:06:21 +0200 Subject: [PATCH 097/141] Update changelog; bump version (#415) --- CHANGELOG.md | 76 ++++++++++++++++++++++++++++++++++++++++++++++++ petab/version.py | 2 +- 2 files changed, 77 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4d12d40d..8712d235 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,81 @@ # PEtab changelog +## 0.6 series + +### 0.6.0 + +**Fixes** + +* Implement proper truncation for prior distributions + (parameter bounds now truncate the prior distribution instead of putting + extra probability mass on the bounds) + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/335) +* Fixed `get_required_parameters_for_parameter_table` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/340) +* Fixed `Prior.from_par_dict` for missing `priorParameters` columns + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/341) +* Fixed petablint v2 warning + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/342) +* Fixed linter failing on missing `noiseFormula` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/367) +* Fixed v2 import Deprecation warning + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/346) +* petab.calculate: compare all common columns + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/347) +* Fixed version checks + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/350) +* Create output directories in `write_*_df` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/352) +* Handle `observableTransformation` in `petab.v1.simulate.sample_noise` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/383) +* Fixed residual calculation in `v1.calculate` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/395) +* Allow empty string "" in columns to be overridden with default values in priors + (by @PaulJonasJost in https://github.com/PEtab-dev/libpetab-python/pull/384) +* Fixed `goodness_of_fit` plot and add color parameter + (by @plakrisenko in https://github.com/PEtab-dev/libpetab-python/pull/402 + & https://github.com/PEtab-dev/libpetab-python/pull/437, + by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/440) +* Plot without vis spec without `ids_per_plot` + (by @PaulJonasJost in https://github.com/PEtab-dev/libpetab-python/pull/386) + +**Deprecations** + +* Using any PEtab-v2-related functionality currently in `petab.v1` is + deprecated and will be removed in a subsequent release. + Use `petab.v2` instead. + + This affects, for example, PEtab-v2-specific constants `petab.v1.C` + (use `petab.v2.C` instead), `petab.v1.mapping`, + or anything mapping-table-related in `petab.v1.Problem`. + +**Features** + +* Substantially extended and updated `petab.v2` for working with PEtab v2 problems. + + PEtab v2 is still in [draft](https://petab.readthedocs.io/en/latest/v2/documentation_data_format.html) stage -- feedback is welcome! + + * PEtab v2 support for `petablint` + * The library uses pydantic-based objects for most PEtab entities instead of + plain DataFrames + * Functionality for converting PEtab v1 problems to (the current state of) PEtab v2 + + At least until PEtab v2 is finalized, the `petab.v2` API may change rapidly, and should not be considered stable. + +* SbmlModel enhancements + (by @fbergmann in https://github.com/PEtab-dev/libpetab-python/pull/333) +* Added `SbmlModel.from_antimony` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/331) +* Added `SbmlModel.{to_antimony,to_sbml_str}` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/371) +* Enable passing the base path to `Problem.from_yaml` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/327) +* Functions for adding conditions/observables/parameter to `Problem` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/328) +* Added `evaluate: bool` argument to math parser (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/365) +* Added petab-compatible sympy string-printer (`PetabStrPrinter`/`petab_math_str`) (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/364) +* Prettified linter output (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/401) +* Store problem configuration in `Problem` (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/326) +* Store path info in *Table objects (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/416) + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.5.0...v0.6.0 + ## 0.5 series ### 0.5.0 diff --git a/petab/version.py b/petab/version.py index ca57250b..1b607c15 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,3 +1,3 @@ """PEtab library version""" -__version__ = "0.5.0" +__version__ = "0.6.0" From 004be14ea25c0b813a02a73cfa86aa5188c9e332 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 2 Oct 2025 07:31:23 +0200 Subject: [PATCH 098/141] Add `id` field to `v2.ProblemConfig` (#442) * Update schema * v2.Problem.id * v2.ProblemConfig.id Related to PEtab-dev/PEtab#646. Closes #441. --- petab/schemas/petab_schema.v2.0.0.yaml | 8 +++++++ petab/v2/core.py | 19 +++++++++++++++ tests/v2/test_core.py | 32 ++++++++++++++++++++++++++ 3 files changed, 59 insertions(+) diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index 7f1b7443..1a285070 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -23,6 +23,14 @@ properties: - type: integer description: Version of the PEtab format + id: + type: string + description: | + Identifier of the PEtab problem. + + This is optional and has no effect on the PEtab problem itself. + pattern: "^[a-zA-Z_]\\w*$" + parameter_files: type: array description: | diff --git a/petab/v2/core.py b/petab/v2/core.py index 0f55ea8e..b331c713 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -1630,6 +1630,18 @@ def mappings(self) -> list[Mapping]: chain.from_iterable(mt.mappings for mt in self.mapping_tables) ) + @property + def id(self) -> str | None: + """The ID of the PEtab problem if set, ``None`` otherwise.""" + return self.config.id + + @id.setter + def id(self, value: str): + """Set the ID of the PEtab problem.""" + if self.config is None: + self.config = ProblemConfig(format_version="2.0.0") + self.config.id = value + def get_optimization_parameters(self) -> list[str]: """ Get the list of optimization parameter IDs from parameter table. @@ -2327,6 +2339,10 @@ class ProblemConfig(BaseModel): ) #: The PEtab format version. format_version: str = "2.0.0" + + #: The problem ID. + id: str | None = None + #: The path to the parameter file, relative to ``base_path``. # TODO https://github.com/PEtab-dev/PEtab/pull/641: # rename to parameter_files in yaml for consistency with other files? @@ -2388,6 +2404,9 @@ def to_yaml(self, filename: str | Path): data["model_files"][model_id][C.MODEL_LOCATION] = str( data["model_files"][model_id]["location"] ) + if data["id"] is None: + # The schema requires a valid id or no id field at all. + del data["id"] write_yaml(data, filename) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 0a9e2429..e38f31f1 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -707,3 +707,35 @@ def test_petablint_v2(tmpdir): result = subprocess.run(["petablint", str(Path(tmpdir, "problem.yaml"))]) # noqa: S603,S607 assert result.returncode == 0 + + +def test_problem_id(tmpdir): + """Test that the problem ID works as expected.""" + from jsonschema import ValidationError + + def make_yaml(id_line: str) -> str: + return f""" + format_version: 2.0.0 + {id_line} + model_files: {{}} + parameter_files: [] + observable_files: [] + condition_files: [] + measurement_files: [] + """ + + filepath = Path(tmpdir, "problem.yaml") + with open(filepath, "w") as f: + f.write(make_yaml("id: my_problem_id")) + problem = Problem.from_yaml(filepath) + assert problem.id == "my_problem_id" + + with open(filepath, "w") as f: + f.write(make_yaml("id: ")) + with pytest.raises(ValidationError): + Problem.from_yaml(filepath) + + with open(filepath, "w") as f: + f.write(make_yaml("")) + problem = Problem.from_yaml(filepath) + assert problem.id is None From cd8f195050a722c30b15e3f4733ab7d1332a68d3 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 2 Oct 2025 16:25:55 +0200 Subject: [PATCH 099/141] Misc petab.v2 fixes/cleanup (#445) * Remove `Problem.from_dfs` which isn't really useful * Resolve some TODOs * Fix doctests * Fix `get_output_parameters` to not include observable IDs and make more efficient --- petab/v2/core.py | 82 +++++++++++++----------------------------------- petab/v2/lint.py | 72 ++++++++++++++++++++++-------------------- 2 files changed, 59 insertions(+), 95 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index b331c713..6d117f92 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -992,16 +992,13 @@ def _validate(self) -> Self: "Estimated parameter must have lower and upper bounds set" ) - # TODO: also if not estimated? - if ( - self.estimate - and self.lb is not None - and self.ub is not None - and self.lb >= self.ub - ): - raise ValueError("Lower bound must be less than upper bound.") + if self.lb is not None and self.ub is not None and self.lb > self.ub: + raise ValueError( + "Lower bound must be less than or equal to upper bound." + ) - # TODO priorType, priorParameters + # NOTE: priorType and priorParameters are currently checked in + # `CheckPriorDistribution` return self @@ -1294,50 +1291,6 @@ def from_yaml( mapping_tables=mapping_tables, ) - @staticmethod - def from_dfs( - model: Model = None, - condition_df: pd.DataFrame = None, - experiment_df: pd.DataFrame = None, - measurement_df: pd.DataFrame = None, - parameter_df: pd.DataFrame = None, - observable_df: pd.DataFrame = None, - mapping_df: pd.DataFrame = None, - config: ProblemConfig = None, - ): - """ - Construct a PEtab problem from dataframes. - - Parameters: - condition_df: PEtab condition table - experiment_df: PEtab experiment table - measurement_df: PEtab measurement table - parameter_df: PEtab parameter table - observable_df: PEtab observable table - mapping_df: PEtab mapping table - model: The underlying model - config: The PEtab problem configuration - """ - # TODO: do we really need this? - - observable_table = ObservableTable.from_df(observable_df) - condition_table = ConditionTable.from_df(condition_df) - experiment_table = ExperimentTable.from_df(experiment_df) - measurement_table = MeasurementTable.from_df(measurement_df) - mapping_table = MappingTable.from_df(mapping_df) - parameter_table = ParameterTable.from_df(parameter_df) - - return Problem( - models=[model], - condition_tables=[condition_table], - experiment_tables=[experiment_table], - observable_tables=[observable_table], - measurement_tables=[measurement_table], - parameter_tables=[parameter_table], - mapping_tables=[mapping_table], - config=config, - ) - @staticmethod def from_combine(filename: Path | str) -> Problem: """Read PEtab COMBINE archive (http://co.mbine.org/documents/archive). @@ -2235,6 +2188,7 @@ def model_dump(self, **kwargs) -> dict[str, Any]: 'experiment_files': [], 'extensions': {}, 'format_version': '2.0.0', + 'id': None, 'mapping_files': [], 'measurement_files': [], 'model_files': {}, @@ -2343,19 +2297,25 @@ class ProblemConfig(BaseModel): #: The problem ID. id: str | None = None - #: The path to the parameter file, relative to ``base_path``. - # TODO https://github.com/PEtab-dev/PEtab/pull/641: - # rename to parameter_files in yaml for consistency with other files? - # always a list? - parameter_files: list[AnyUrl | Path] = Field( - default=[], alias=C.PARAMETER_FILES - ) - + #: The paths to the parameter tables. + # Absolute or relative to `base_path`. + parameter_files: list[AnyUrl | Path] = [] + #: The model IDs and files used by the problem (`id->ModelFile`). model_files: dict[str, ModelFile] | None = {} + #: The paths to the measurement tables. + # Absolute or relative to `base_path`. measurement_files: list[AnyUrl | Path] = [] + #: The paths to the condition tables. + # Absolute or relative to `base_path`. condition_files: list[AnyUrl | Path] = [] + #: The paths to the experiment tables. + # Absolute or relative to `base_path`. experiment_files: list[AnyUrl | Path] = [] + #: The paths to the observable tables. + # Absolute or relative to `base_path`. observable_files: list[AnyUrl | Path] = [] + #: The paths to the mapping tables. + # Absolute or relative to `base_path`. mapping_files: list[AnyUrl | Path] = [] #: Extensions used by the problem. diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 4d864b57..20f5dfc1 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -992,11 +992,6 @@ def append_overrides(overrides): append_overrides(m.observable_parameters) append_overrides(m.noise_parameters) - # TODO remove `observable_ids` when - # `get_output_parameters` is updated for PEtab v2/v1.1, where - # observable IDs are allowed in observable formulae - observable_ids = {o.id for o in problem.observables} - # Add output parameters except for placeholders for formula_type, placeholder_sources in ( ( @@ -1021,9 +1016,7 @@ def append_overrides(overrides): **placeholder_sources, ) parameter_ids.update( - p - for p in output_parameters - if p not in placeholders and p not in observable_ids + p for p in output_parameters if p not in placeholders ) # Add condition table parametric overrides unless already defined in the @@ -1048,8 +1041,8 @@ def get_output_parameters( ) -> list[str]: """Get output parameters - Returns IDs of parameters used in observable and noise formulas that are - not defined in the model. + Returns IDs of symbols used in observable and noise formulas that are + not observables and that are not defined in the model. Arguments: problem: The PEtab problem @@ -1057,35 +1050,46 @@ def get_output_parameters( noise: Include parameters from noiseFormulas Returns: - List of output parameter IDs + List of output parameter IDs, including any placeholder parameters. """ - formulas = [] + # collect free symbols from observable and noise formulas, + # skipping observable IDs + candidates = set() if observables: - formulas.extend(o.formula for o in problem.observables) + candidates |= { + str_sym + for o in problem.observables + if o.formula is not None + for sym in o.formula.free_symbols + if (str_sym := str(sym)) != o.id + } if noise: - formulas.extend(o.noise_formula for o in problem.observables) - output_parameters = OrderedDict() + candidates |= { + str_sym + for o in problem.observables + if o.noise_formula is not None + for sym in o.noise_formula.free_symbols + if (str_sym := str(sym)) != o.id + } - for formula in formulas: - free_syms = sorted( - formula.free_symbols, - key=lambda symbol: symbol.name, - ) - for free_sym in free_syms: - sym = str(free_sym) - if problem.model.symbol_allowed_in_observable_formula(sym): - continue + output_parameters = OrderedDict() - # does it map to a model entity? - for mapping in problem.mappings: - if mapping.petab_id == sym and mapping.model_id is not None: - if problem.model.symbol_allowed_in_observable_formula( - mapping.model_id - ): - break - else: - # no mapping to a model entity, so it is an output parameter - output_parameters[sym] = None + # filter out symbols that are defined in the model or mapped to + # such symbols + for candidate in sorted(candidates): + if problem.model.symbol_allowed_in_observable_formula(candidate): + continue + + # does it map to a model entity? + for mapping in problem.mappings: + if mapping.petab_id == candidate and mapping.model_id is not None: + if problem.model.symbol_allowed_in_observable_formula( + mapping.model_id + ): + break + else: + # no mapping to a model entity, so it is an output parameter + output_parameters[candidate] = None return list(output_parameters.keys()) From 86e02901a65220efcaf59706faaa3a57ef7e906e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 2 Oct 2025 16:34:38 +0200 Subject: [PATCH 100/141] doc: petab1to2 (#444) Closes #388. --- petab/v2/petab1to2.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 5e5c5ae7..8e1c7e85 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -30,6 +30,17 @@ def petab1to2( Convert a PEtab problem from PEtab 1.0 to PEtab 2.0 format. + .. note:: + + Some aspects of PEtab v1 were not well-defined. For example, model + initialization order (e.g., applying initial assignments before or + after condition table overrides) and the impact of compartment size + changes were not specified. In such cases, we made assumptions that are + consistent with the clarified PEtab v2 specifications, + the PEtab test suite, or common practice. + Therefore, it is recommended to carefully review the generated PEtab v2 + problem to ensure it aligns with the expected behavior. + :param yaml_config: The PEtab problem as dictionary or YAML file name. :param output_dir: From 6f0891e2fed10ec68afbcf7312cb9c6804245227 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 4 Oct 2025 18:38:48 +0200 Subject: [PATCH 101/141] Update `ExperimentsToEventsConverter` to changed initialization semantics (#443) Due to https://github.com/PEtab-dev/PEtab/pull/645 ... * ... the first period of a PEtab experiment has to be applied before initial assignments are evaluated. That means, the changes have to be implemented as initial assignments instead of event assignments at the initial timepoint (or any pre-existing initial assignments would have to be included in the event assignment). * ... for any subsequent periods, the event assignments need to be modified. Compartment-size changes in PEtab no longer follow the SBML event assignment semantics. That means, we need event assignments for all concentration-based species inside such a compartment to preserve concentrations instead of amounts. Closes #452. --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/converters.py | 281 +++++++++++++++++++++++++++++++----- pyproject.toml | 1 + tests/v2/test_converters.py | 165 ++++++++++++++++++++- 3 files changed, 404 insertions(+), 43 deletions(-) diff --git a/petab/v2/converters.py b/petab/v2/converters.py index cdb8434c..1c9acfd8 100644 --- a/petab/v2/converters.py +++ b/petab/v2/converters.py @@ -6,6 +6,7 @@ from copy import deepcopy import libsbml +import sympy as sp from sbmlmath import sbml_math_to_sympy, set_math from .core import ( @@ -19,24 +20,26 @@ from .models._sbml_utils import add_sbml_parameter, check from .models.sbml_model import SbmlModel -__all__ = ["ExperimentsToEventsConverter"] +__all__ = ["ExperimentsToSbmlConverter"] -class ExperimentsToEventsConverter: - """Convert PEtab experiments to SBML events. +class ExperimentsToSbmlConverter: + """Convert PEtab experiments to SBML. For an SBML-model-based PEtab problem, this class converts the PEtab - experiments to events as far as possible. + experiments to initial assignments and events as far as possible. If the model already contains events, PEtab events are added with a higher priority than the existing events to guarantee that PEtab condition changes are applied before any pre-existing assignments. + This requires that all event priorities in the original model are numeric + constants. The PEtab problem must not contain any identifiers starting with ``_petab``. - All periods and condition changes that are represented by events - will be removed from the condition table. + All periods and condition changes that are represented by initial + assignments or events will be removed from the condition table. Each experiment will have at most one period with a start time of ``-inf`` and one period with a finite start time. The associated changes with these periods are only the pre-equilibration indicator @@ -92,9 +95,8 @@ def __init__(self, problem: Problem, default_priority: float = None): self._default_priority = default_priority self._preprocess() - def _get_experiment_indicator_condition_id( - self, experiment_id: str - ) -> str: + @staticmethod + def _get_experiment_indicator_condition_id(experiment_id: str) -> str: """Get the condition ID for the experiment indicator parameter.""" return f"_petab_experiment_condition_{experiment_id}" @@ -198,7 +200,9 @@ def convert(self) -> Problem: return self._new_problem def _convert_experiment(self, experiment: Experiment) -> None: - """Convert a single experiment to SBML events.""" + """ + Convert a single experiment to SBML events or initial assignments. + """ model = self._model experiment.sort_periods() has_preequilibration = experiment.has_preequilibration @@ -213,8 +217,14 @@ def _convert_experiment(self, experiment: Experiment) -> None: "model." ) add_sbml_parameter(model, id_=exp_ind_id, constant=False, value=0) - kept_periods = [] - for i_period, period in enumerate(experiment.periods): + kept_periods: list[ExperimentPeriod] = [] + # Collect values for initial assignments for the different experiments. + # All expressions must be combined into a single initial assignment + # per target. + # target_id -> [(experiment_indicator, target_value), ...] + period0_assignments: dict[str, list[tuple[str, sp.Basic]]] = {} + + for i_period, period in enumerate(experiment.sorted_periods): if period.is_preequilibration: # pre-equilibration cannot be represented in SBML, # so we need to keep this period in the Problem. @@ -229,18 +239,84 @@ def _convert_experiment(self, experiment: Experiment) -> None: # or the only non-equilibration period (handled above) continue - ev = self._create_period_start_event( - experiment=experiment, - i_period=i_period, - period=period, - ) - self._create_event_assignments_for_period( - ev, - [ - self._new_problem[condition_id] - for condition_id in period.condition_ids - ], - ) + # Encode the period changes in the SBML model as events + # that trigger at the start of the period or, + # for the first period, as initial assignments. + # Initial assignments are required for the first period, + # because other initial assignments may depend on + # the changed values. + # Additionally, tools that don't support events can still handle + # single-period experiments. + if i_period == 0: + exp_ind_id = self.get_experiment_indicator(experiment.id) + for change in self._new_problem.get_changes_for_period(period): + period0_assignments.setdefault( + change.target_id, [] + ).append((exp_ind_id, change.target_value)) + else: + ev = self._create_period_start_event( + experiment=experiment, + i_period=i_period, + period=period, + ) + self._create_event_assignments_for_period( + ev, + self._new_problem.get_changes_for_period(period), + ) + + # Create initial assignments for the first period + if period0_assignments: + free_symbols_in_assignments = set() + for target_id, changes in period0_assignments.items(): + # The initial value might only be changed for a subset of + # experiments. We need to keep the original initial value + # for all other experiments. + + # Is there an initial assignment for this target already? + # If not, fall back to the initial value of the target. + if ( + ia := model.getInitialAssignmentBySymbol(target_id) + ) is not None: + default = sbml_math_to_sympy(ia.getMath()) + else: + # use the initial value of the target as default + target = model.getElementBySId(target_id) + default = self._initial_value_from_element(target) + + # Only create the initial assignment if there is + # actually something to change. + if expr_cond_pairs := [ + (target_value, sp.Symbol(exp_ind) > 0.5) + for exp_ind, target_value in changes + if target_value != default + ]: + # Unlike events, we can't have different initial + # assignments for different experiments, so we need to + # combine all changes into a single piecewise + # expression. + + expr = sp.Piecewise( + *expr_cond_pairs, + (default, True), + ) + + # Create a new initial assignment if necessary, otherwise + # overwrite the existing one. + if ia is None: + ia = model.createInitialAssignment() + ia.setSymbol(target_id) + + set_math(ia, expr) + free_symbols_in_assignments |= expr.free_symbols + + # the target value may depend on parameters that are only + # introduced in the PEtab parameter table - those need + # to be added to the model + for sym in free_symbols_in_assignments: + if model.getElementBySId(sym.name) is None: + add_sbml_parameter( + model, id_=sym.name, constant=True, value=0 + ) if len(kept_periods) > 2: raise AssertionError("Expected at most two periods to be kept.") @@ -256,6 +332,46 @@ def _convert_experiment(self, experiment: Experiment) -> None: experiment.periods = kept_periods + @staticmethod + def _initial_value_from_element(target: libsbml.SBase) -> sp.Basic: + """Get the initial value of an SBML element. + + The value of the size attribute of compartments, + the initial concentration or amount of species (amount for + `hasOnlySubstanceUnits=true`, concentration otherwise), and + the value of parameters, not considering any initial assignment + constructs. + """ + if target is None: + raise ValueError("`target` is None.") + + if target.getTypeCode() == libsbml.SBML_COMPARTMENT: + return sp.Float(target.getSize()) + + if target.getTypeCode() == libsbml.SBML_SPECIES: + if target.getHasOnlySubstanceUnits(): + # amount-based -> return amount + if target.isSetInitialAmount(): + return sp.Float(target.getInitialAmount()) + return sp.Float(target.getInitialConcentration()) * sp.Symbol( + target.getCompartment() + ) + # concentration-based -> return concentration + if target.isSetInitialConcentration(): + return sp.Float(target.getInitialConcentration()) + + return sp.Float(target.getInitialAmount()) / sp.Symbol( + target.getCompartment() + ) + + if target.getTypeCode() == libsbml.SBML_PARAMETER: + return sp.Float(target.getValue()) + + raise NotImplementedError( + "Cannot create initial assignment for unsupported SBML " + f"entity type {target.getTypeCode()}." + ) + def _create_period_start_event( self, experiment: Experiment, i_period: int, period: ExperimentPeriod ) -> libsbml.Event: @@ -326,33 +442,120 @@ def get_experiment_indicator(experiment_id: str) -> str: @staticmethod def _create_event_assignments_for_period( - event: libsbml.Event, conditions: list[Condition] + event: libsbml.Event, changes: list[Change] ) -> None: - """Create an event assignments for a given period.""" - for condition in conditions: - for change in condition.changes: - ExperimentsToEventsConverter._change_to_event_assignment( - change, event + """Create event assignments for a given period. + + Converts PEtab ``Change``s to equivalent SBML event assignments. + + Note that the SBML event assignment formula is not necessarily the same + as the `targetValue` in PEtab. + In SBML, concentrations are treated as derived quantities. + Therefore, changing the size of a compartment will update the + concentrations of all contained concentration-based species. + In PEtab, such a change would not automatically update the species + concentrations, but only the compartment size. + + Therefore, to correctly implement a PEtab change of a compartment size + in SBML, we need to compensate for the automatic update of species + concentrations by adding event assignments for all contained + concentration-based species. + + :param event: The SBML event to which the assignments should be added. + :param changes: The PEtab condition changes that are to be applied + at the start of the period. + """ + _add_assignment = ExperimentsToSbmlConverter._add_assignment + sbml_model = event.getModel() + # collect IDs of compartments that are changed in this period + changed_compartments = { + change.target_id + for change in changes + if sbml_model.getElementBySId(change.target_id) is not None + and sbml_model.getElementBySId(change.target_id).getTypeCode() + == libsbml.SBML_COMPARTMENT + } + + for change in changes: + sbml_target = sbml_model.getElementBySId(change.target_id) + + if sbml_target is None: + raise ValueError( + f"Cannot create event assignment for change of " + f"`{change.target_id}`: No such entity in the SBML model." ) + target_type = sbml_target.getTypeCode() + if target_type == libsbml.SBML_COMPARTMENT: + # handle the actual compartment size change + _add_assignment(event, change.target_id, change.target_value) + + # Changing a compartment size affects all contained + # concentration-based species - we need to add event + # assignments for those to compensate for the automatic + # update of their concentrations. + # The event assignment will set the concentration to + # new_conc = assigned_amount / new_volume + # = assigned_conc * old_volume / new_volume + # <=> assigned_conc = new_conc * new_volume / old_volume + # Therefore, the event assignment is not just `new_conc`, + # but `new_conc * new_volume / old_volume`. + + # concentration-based species in the changed compartment + conc_species = [ + species.getId() + for species in sbml_model.getListOfSpecies() + if species.getCompartment() == change.target_id + and not species.getHasOnlySubstanceUnits() + ] + for species_id in conc_species: + if species_change := next( + (c for c in changes if c.target_id == species_id), None + ): + # there is an explicit change for this species + # in this period + new_conc = species_change.target_value + else: + # no explicit change, use the pre-event concentration + new_conc = sp.Symbol(species_id) + + _add_assignment( + event, + species_id, + # new_conc * new_volume / old_volume + new_conc + * change.target_value + / sp.Symbol(change.target_id), + ) + elif ( + target_type != libsbml.SBML_SPECIES + or sbml_target.getCompartment() not in changed_compartments + or sbml_target.getHasOnlySubstanceUnits() is True + ): + # Handle any changes other than compartments and + # concentration-based species inside resized compartments + # that we already handled above. + # Those translate directly to event assignments. + _add_assignment(event, change.target_id, change.target_value) + @staticmethod - def _change_to_event_assignment( - change: Change, event: libsbml.Event + def _add_assignment( + event: libsbml.Event, target_id: str, target_value: sp.Basic ) -> None: - """Convert a PEtab ``Change`` to an SBML event assignment.""" + """Add a single event assignment to the given event + and apply any necessary changes to the model.""" sbml_model = event.getModel() - ea = event.createEventAssignment() - ea.setVariable(change.target_id) - set_math(ea, change.target_value) + ea.setVariable(target_id) + set_math(ea, target_value) # target needs const=False, and target may not exist yet # (e.g., in case of output parameters added in the observable # table) - target = sbml_model.getElementBySId(change.target_id) + target = sbml_model.getElementBySId(target_id) if target is None: add_sbml_parameter( - sbml_model, id_=change.target_id, constant=False, value=0 + sbml_model, id_=target_id, constant=False, value=0 ) else: # We can safely change the `constant` attribute of the target. @@ -362,7 +565,7 @@ def _change_to_event_assignment( # the target value may depend on parameters that are only # introduced in the PEtab parameter table - those need # to be added to the model - for sym in change.target_value.free_symbols: + for sym in target_value.free_symbols: if sbml_model.getElementBySId(sym.name) is None: add_sbml_parameter( sbml_model, id_=sym.name, constant=True, value=0 diff --git a/pyproject.toml b/pyproject.toml index 3484e6cf..182c546c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,6 +37,7 @@ maintainers = [ [project.optional-dependencies] tests = [ "antimony>=2.14.0", + "copasi-basico>=0.85", "pysb", "pytest", "pytest-cov", diff --git a/tests/v2/test_converters.py b/tests/v2/test_converters.py index 8cdbaddf..d640a908 100644 --- a/tests/v2/test_converters.py +++ b/tests/v2/test_converters.py @@ -1,12 +1,14 @@ from math import inf +import pandas as pd + from petab.v2 import Change, Condition, Experiment, ExperimentPeriod, Problem -from petab.v2.converters import ExperimentsToEventsConverter +from petab.v2.converters import ExperimentsToSbmlConverter from petab.v2.models.sbml_model import SbmlModel def test_experiments_to_events_converter(): - """Test the ExperimentsToEventsConverter.""" + """Test the ExperimentsToSbmlConverter.""" ant_model = """ species X = 0 X' = 1 @@ -17,14 +19,15 @@ def test_experiments_to_events_converter(): problem.add_condition("c2", X=2) problem.add_experiment("e1", -inf, "c1", 10, "c2") - converter = ExperimentsToEventsConverter(problem) + converter = ExperimentsToSbmlConverter(problem) converted = converter.convert() assert converted.validate().has_errors() is False assert isinstance(converted.model, SbmlModel) sbml_model = converted.model.sbml_model - assert sbml_model.getNumEvents() == 2 + # one event -- the initial period is handled via initial assignments + assert sbml_model.getNumEvents() == 1 assert converted.conditions == [ Condition( id="_petab_preequilibration_on", @@ -74,3 +77,157 @@ def test_experiments_to_events_converter(): ], ), ] + + +def test_simulate_experiment_to_events(): + """ + Convert PEtab experiment to SBML events and compare BasiCO simulation + results. + """ + import basico + + # the basic model for the PEtab problem + ant_model1 = """ + compartment comp1 = 10 + compartment comp2 = 2 + # concentration-based species + species s1c_comp1 in comp1 = 1 + species s1c_comp2 in comp2 = 2 + species s2c_comp1 in comp1 = 3 + species s2c_comp2 in comp2 = 4 + # amount-based species + # (note: the initial values are concentrations nonetheless) + substanceOnly species s3a_comp1 in comp1 = 5 + substanceOnly species s3a_comp2 in comp2 = 6 + substanceOnly species s4a_comp1 in comp1 = 7 + substanceOnly species s4a_comp2 in comp2 = 8 + + # something dynamic + some_species in comp1 = 0 + some_species' = 1 + + # set time-derivatives, otherwise BasiCO won't include them in the result + s1c_comp1' = 0 + s1c_comp2' = 0 + s2c_comp1' = 0 + s2c_comp2' = 0 + s3a_comp1' = 0 + s3a_comp2' = 0 + s4a_comp1' = 0 + s4a_comp2' = 0 + """ + + # append events, equivalent to the expected PEtab conversion result + ant_model_expected = ( + ant_model1 + + """ + # resize compartment + # The size of comp1 should be set to 20, the concentrations of the + # contained concentration-based species and the amounts of the amount-based + # species should remain unchanged. comp2 and everything therein is + # unaffected. + # I.e., post-event: + # s1c_comp1 = 1, s2c_comp1 = 3, s3a_comp1 = 5, s4a_comp1 = 7 + at time >= 1: + comp1 = 20, + s1c_comp1 = s1c_comp1 * 20 / comp1, + s2c_comp1 = s2c_comp1 * 20 / comp1; + + # resize compartment *and* reassign concentration + # The size of comp2 should be set to 4, the concentration/amount of + # s1c_comp2/s3a_comp2 should be set to the given values, + # the amounts for amount-based and concentrations for concentration-based + # other species in comp2 should remain unchanged. + # I.e., post-event: + # comp2 = 4 + # s1c_comp2 = 5, s3a_comp2 = 16, + # s2c_comp2 = 4 (unchanged), s4a_comp2 = 8 (unchanged) + # The post-event concentrations of concentration-based species are + # (per SBML): + # new_conc = assigned_amount / new_volume + # = assigned_conc * old_volume / new_volume + # <=> assigned_conc = new_conc * new_volume / old_volume + # The post-event amounts of amount-based species are: + # new_amount = assigned_amount (independent of volume change) + at time >= 5: + comp2 = 4, + s3a_comp2 = 16, + s1c_comp2 = 5 * 4 / comp2, + s2c_comp2 = s2c_comp2 * 4 / comp2; + """ + ) + + # simulate expected model in BasiCO + sbml_expected = SbmlModel.from_antimony(ant_model_expected).to_sbml_str() + basico.load_model(sbml_expected) + # output timepoints (initial, pre-/post-event, ...) + timepoints = [0, 0.9, 1.1, 4.9, 5.1, 10] + # Simulation will return all species as concentrations + df_expected = basico.run_time_course(values=timepoints) + # fmt: off + assert ( + df_expected + == pd.DataFrame( + {'Values[some_species]': {0.0: 0.0, 0.9: 0.9, + 1.1: 1.0999999999999996, 4.9: 4.9, + 5.1: 5.100000000000001, 10.0: 10.0}, + 's1c_comp1': {0.0: 1.0, 0.9: 1.0, 1.1: 1.0, 4.9: 1.0, 5.1: 1.0, + 10.0: 1.0}, + 's2c_comp1': {0.0: 3.0, 0.9: 3.0, 1.1: 3.0, 4.9: 3.0, 5.1: 3.0, + 10.0: 3.0}, + 's3a_comp1': {0.0: 5.0, 0.9: 5.0, 1.1: 2.5, 4.9: 2.5, 5.1: 2.5, + 10.0: 2.5}, + 's4a_comp1': {0.0: 7.0, 0.9: 7.0, 1.1: 3.5, 4.9: 3.5, 5.1: 3.5, + 10.0: 3.5}, + 's1c_comp2': {0.0: 2.0, 0.9: 2.0, 1.1: 2.0, 4.9: 2.0, 5.1: 5.0, + 10.0: 5.0}, + 's2c_comp2': {0.0: 4.0, 0.9: 4.0, 1.1: 4.0, 4.9: 4.0, 5.1: 4.0, + 10.0: 4.0}, + 's3a_comp2': {0.0: 6.0, 0.9: 6.0, 1.1: 6.0, 4.9: 6.0, 5.1: 4.0, + 10.0: 4.0}, + 's4a_comp2': {0.0: 8.0, 0.9: 8.0, 1.1: 8.0, 4.9: 8.0, 5.1: 4.0, + 10.0: 4.0}, + 'Compartments[comp1]': {0.0: 10.0, 0.9: 10.0, 1.1: 20.0, + 4.9: 20.0, 5.1: 20.0, 10.0: 20.0}, + 'Compartments[comp2]': {0.0: 2.0, 0.9: 2.0, 1.1: 2.0, 4.9: 2.0, + 5.1: 4.0, 10.0: 4.0}} + ) + ).all().all() + # fmt: on + + # construct PEtab test problem + problem = Problem() + problem.model = SbmlModel.from_antimony(ant_model1) + problem.add_condition("c0", comp1=10) + problem.add_condition("c1", comp1=20) + problem.add_condition("c2", comp2=4, s1c_comp2=5, s3a_comp2=16) + problem.add_experiment("e1", 0, "c0", 1, "c1", 5, "c2") + problem.assert_valid() + + # convert PEtab experiments to SBML events and simulate in BasiCO + converter = ExperimentsToSbmlConverter(problem) + converted = converter.convert() + # set experiment indicator to simulate experiment "e1" + converted.model.sbml_model.getParameter( + "_petab_experiment_indicator_e1" + ).setValue(1) + sbml_actual = converted.model.to_sbml_str() + basico.load_model(sbml_actual) + df_actual = basico.run_time_course(values=timepoints) + + # compare results + with pd.option_context( + "display.max_rows", + None, + "display.max_columns", + None, + "display.width", + None, + ): + print("Expected:") + print(df_expected) + print("Actual:") + print(df_actual) + + for col in df_expected.columns: + assert (df_expected[col] == df_actual[col]).all() From 1c3b7591d451986415ea6a7d97e56facf72e216d Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 15 Oct 2025 15:27:48 +0200 Subject: [PATCH 102/141] Fix serialization of priorParameters (#449) Incorrectly expected strings instead of floats. --- petab/v2/core.py | 6 ++++-- tests/v2/test_core.py | 23 +++++++++++++++++++++++ 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 6d117f92..57916a8f 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -977,8 +977,10 @@ def _serialize_prior_distribution( return str(prior_distribution) @field_serializer("prior_parameters") - def _serialize_prior_parameters(self, prior_parameters: list[str], _info): - return C.PARAMETER_SEPARATOR.join(prior_parameters) + def _serialize_prior_parameters( + self, prior_parameters: list[float], _info + ) -> str: + return C.PARAMETER_SEPARATOR.join(map(str, prior_parameters)) @model_validator(mode="after") def _validate(self) -> Self: diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index e38f31f1..0a1fe915 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -266,6 +266,29 @@ def test_parameter(): with pytest.raises(ValidationError, match="less than"): Parameter(id="k1", lb=2, ub=1) + assert Parameter( + id="k1", estimate=True, lb=1, ub=2, prior_parameters=[1, 2] + ).model_dump() == { + "id": "k1", + "lb": 1.0, + "ub": 2.0, + "nominal_value": None, + "estimate": "true", + "prior_distribution": "", + "prior_parameters": "1.0;2.0", + } + assert Parameter( + id="k1", estimate=False, nominal_value="8" + ).model_dump() == { + "id": "k1", + "lb": None, + "ub": None, + "nominal_value": 8.0, + "estimate": "false", + "prior_distribution": "", + "prior_parameters": "", + } + def test_experiment(): Experiment(id="experiment1") From 880741a2042dacb25a3fcee5485d6d9c8e49647f Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 15 Oct 2025 16:06:44 +0200 Subject: [PATCH 103/141] Add `v2.Problem.{get_output_parameters,get_x_nominal_dict}` (#447) * Add `v2.Problem.{get_output_parameters,get_x_nominal_dict}` * Move `get_output_parameters` from v2.lint to v2.Problem * Add `Problem.get_x_nominal_dict` * Test Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/observables.py | 2 +- petab/v2/core.py | 80 ++++++++++++++++++++++++++++++++++++++++ petab/v2/lint.py | 81 +++++------------------------------------ tests/v2/test_core.py | 74 +++++++++++++++++++++++++++++++++++++ 4 files changed, 165 insertions(+), 72 deletions(-) diff --git a/petab/v1/observables.py b/petab/v1/observables.py index 411c2a4c..38c539c7 100644 --- a/petab/v1/observables.py +++ b/petab/v1/observables.py @@ -84,7 +84,7 @@ def get_output_parameters( ) -> list[str]: """Get output parameters - Returns IDs of parameters used in observable and noise formulas that are + Returns IDs of parameters used in observable or noise formulas that are not defined in the model. Arguments: diff --git a/petab/v2/core.py b/petab/v2/core.py index 57916a8f..0e3f905e 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -1693,6 +1693,27 @@ def get_x_nominal(self, free: bool = True, fixed: bool = True) -> list: return self._apply_mask(v, free=free, fixed=fixed) + def get_x_nominal_dict( + self, free: bool = True, fixed: bool = True + ) -> dict[str, float]: + """Get parameter nominal values as dict. + + :param free: + Whether to return free parameters, i.e. parameters to estimate. + :param fixed: + Whether to return fixed parameters, i.e. parameters not to + estimate. + :returns: + A dictionary mapping parameter IDs to their nominal values. + """ + return dict( + zip( + self.get_x_ids(free=free, fixed=fixed), + self.get_x_nominal(free=free, fixed=fixed), + strict=True, + ) + ) + @property def x_nominal(self) -> list: """Parameter table nominal values""" @@ -2259,6 +2280,65 @@ def get_measurements_for_experiment( if measurement.experiment_id == experiment.id ] + def get_output_parameters( + self, observable: bool = True, noise: bool = True + ) -> list[str]: + """Get output parameters. + + Returns IDs of symbols used in observable and noise formulas that are + not observables and that are not defined in the model. + + :param observable: + Include parameters from observableFormulas + :param noise: + Include parameters from noiseFormulas + :returns: + List of output parameter IDs, including any placeholder parameters. + """ + # collect free symbols from observable and noise formulas, + # skipping observable IDs + candidates = set() + if observable: + candidates |= { + str_sym + for o in self.observables + if o.formula is not None + for sym in o.formula.free_symbols + if (str_sym := str(sym)) != o.id + } + if noise: + candidates |= { + str_sym + for o in self.observables + if o.noise_formula is not None + for sym in o.noise_formula.free_symbols + if (str_sym := str(sym)) != o.id + } + + output_parameters = [] + + # filter out symbols that are defined in the model or mapped to + # such symbols + for candidate in sorted(candidates): + if self.model.symbol_allowed_in_observable_formula(candidate): + continue + + # does it map to a model entity? + for mapping in self.mappings: + if ( + mapping.petab_id == candidate + and mapping.model_id is not None + ): + if self.model.symbol_allowed_in_observable_formula( + mapping.model_id + ): + break + else: + # no mapping to a model entity, so it is an output parameter + output_parameters.append(candidate) + + return output_parameters + class ModelFile(BaseModel): """A file in the PEtab problem configuration.""" diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 20f5dfc1..a80599a9 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -396,7 +396,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: if problem.model else set() ) - allowed_targets |= set(get_output_parameters(problem)) + allowed_targets |= set(problem.get_output_parameters()) allowed_targets |= { m.petab_id for m in problem.mappings if m.model_id is not None } @@ -932,7 +932,7 @@ def get_valid_parameters_for_parameter_table( parameter_ids[mapping.petab_id] = None # add output parameters from observable table - output_parameters = get_output_parameters(problem) + output_parameters = problem.get_output_parameters() for p in output_parameters: if p not in invalid: parameter_ids[p] = None @@ -996,19 +996,18 @@ def append_overrides(overrides): for formula_type, placeholder_sources in ( ( # Observable formulae - {"observables": True, "noise": False}, + {"observable": True, "noise": False}, # can only contain observable placeholders - {"noise": False, "observables": True}, + {"noise": False, "observable": True}, ), ( # Noise formulae - {"observables": False, "noise": True}, + {"observable": False, "noise": True}, # can contain noise and observable placeholders - {"noise": True, "observables": True}, + {"noise": True, "observable": True}, ), ): - output_parameters = get_output_parameters( - problem, + output_parameters = problem.get_output_parameters( **formula_type, ) placeholders = get_placeholders( @@ -1034,69 +1033,9 @@ def append_overrides(overrides): return parameter_ids -def get_output_parameters( - problem: Problem, - observables: bool = True, - noise: bool = True, -) -> list[str]: - """Get output parameters - - Returns IDs of symbols used in observable and noise formulas that are - not observables and that are not defined in the model. - - Arguments: - problem: The PEtab problem - observables: Include parameters from observableFormulas - noise: Include parameters from noiseFormulas - - Returns: - List of output parameter IDs, including any placeholder parameters. - """ - # collect free symbols from observable and noise formulas, - # skipping observable IDs - candidates = set() - if observables: - candidates |= { - str_sym - for o in problem.observables - if o.formula is not None - for sym in o.formula.free_symbols - if (str_sym := str(sym)) != o.id - } - if noise: - candidates |= { - str_sym - for o in problem.observables - if o.noise_formula is not None - for sym in o.noise_formula.free_symbols - if (str_sym := str(sym)) != o.id - } - - output_parameters = OrderedDict() - - # filter out symbols that are defined in the model or mapped to - # such symbols - for candidate in sorted(candidates): - if problem.model.symbol_allowed_in_observable_formula(candidate): - continue - - # does it map to a model entity? - for mapping in problem.mappings: - if mapping.petab_id == candidate and mapping.model_id is not None: - if problem.model.symbol_allowed_in_observable_formula( - mapping.model_id - ): - break - else: - # no mapping to a model entity, so it is an output parameter - output_parameters[candidate] = None - - return list(output_parameters.keys()) - - def get_placeholders( problem: Problem, - observables: bool = True, + observable: bool = True, noise: bool = True, ) -> list[str]: """Get all placeholder parameters from observable table observableFormulas @@ -1104,7 +1043,7 @@ def get_placeholders( Arguments: problem: The PEtab problem - observables: Include parameters from observableFormulas + observable: Include parameters from observableFormulas noise: Include parameters from noiseFormulas Returns: @@ -1115,7 +1054,7 @@ def get_placeholders( # {observable,noise}Parameters placeholders = [] for o in problem.observables: - if observables: + if observable: placeholders.extend(map(str, o.observable_placeholders)) if noise: placeholders.extend(map(str, o.noise_placeholders)) diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 0a1fe915..64e1ad41 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -762,3 +762,77 @@ def make_yaml(id_line: str) -> str: f.write(make_yaml("")) problem = Problem.from_yaml(filepath) assert problem.id is None + + +def test_parameter_accessors(): # pylint: disable=W0621 + """ + Test the petab.Problem functions to get parameter values. + """ + petab_problem = Problem() + petab_problem += Parameter( + id="par1", lb=0, ub=100, nominal_value=7, estimate=True + ) + petab_problem += Parameter( + id="par2", lb=0.1, ub=100, nominal_value=8, estimate=True + ) + petab_problem += Parameter( + id="par3", lb=0.1, ub=200, nominal_value=9, estimate=False + ) + + assert petab_problem.x_ids == ["par1", "par2", "par3"] + assert petab_problem.x_free_ids == ["par1", "par2"] + assert petab_problem.x_fixed_ids == ["par3"] + assert petab_problem.lb == [0, 0.1, 0.1] + assert petab_problem.ub == [100, 100, 200] + assert petab_problem.x_nominal == [7, 8, 9] + assert petab_problem.x_nominal_free == [7, 8] + assert petab_problem.x_nominal_fixed == [9] + + assert ( + petab_problem.get_x_nominal_dict() + == petab_problem.get_x_nominal_dict(free=True, fixed=True) + == { + "par1": 7, + "par2": 8, + "par3": 9, + } + ) + assert petab_problem.get_x_nominal_dict(free=True, fixed=False) == { + "par1": 7, + "par2": 8, + } + assert petab_problem.get_x_nominal_dict(free=False, fixed=True) == { + "par3": 9, + } + + +def test_get_output_parameters(): + """Test Problem.get_output_parameters""" + petab_problem = Problem() + assert petab_problem.get_output_parameters() == [] + + petab_problem += Parameter(id="p1", lb=0, ub=100, estimate=True) + petab_problem.models.append(SbmlModel.from_antimony("p2 = 1")) + assert petab_problem.get_output_parameters() == [] + + petab_problem += Observable( + id="obs1", formula="p1 + p2", noise_formula="p1 * p2" + ) + assert petab_problem.get_output_parameters() == ["p1"] + + petab_problem += Observable( + id="obs1", + formula="p3 + p4", + noise_formula="p3 * p5", + ) + assert ( + petab_problem.get_output_parameters() + == petab_problem.get_output_parameters(observable=True, noise=True) + == ["p1", "p3", "p4", "p5"] + ) + assert petab_problem.get_output_parameters( + observable=True, noise=False + ) == ["p1", "p3", "p4"] + assert petab_problem.get_output_parameters( + observable=False, noise=True + ) == ["p1", "p3", "p5"] From f30fe6691ed8896663d4e4d972710d119e4492a4 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 16 Oct 2025 19:34:34 +0200 Subject: [PATCH 104/141] Misc fixes & annotations (#450) * type annotations * fix accessing non-existing model * remove misleading comments --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v1/core.py | 7 +++++-- petab/v1/measurements.py | 2 -- petab/v2/core.py | 25 ++++++++++++++++++------- 3 files changed, 23 insertions(+), 11 deletions(-) diff --git a/petab/v1/core.py b/petab/v1/core.py index 1149c67e..6a142781 100644 --- a/petab/v1/core.py +++ b/petab/v1/core.py @@ -133,7 +133,9 @@ def get_notnull_columns(df: pd.DataFrame, candidates: Iterable): ] -def get_observable_replacement_id(groupvars, groupvar) -> str: +def get_observable_replacement_id( + groupvars: list[str], groupvar: Sequence +) -> str: """Get the replacement ID for an observable. Arguments: @@ -141,7 +143,8 @@ def get_observable_replacement_id(groupvars, groupvar) -> str: The columns of a PEtab measurement table that should be unique between observables in a flattened PEtab problem. groupvar: - A specific grouping of `groupvars`. + A specific grouping of `groupvars`. Same length and order as + `groupvars`. Returns: The observable replacement ID. diff --git a/petab/v1/measurements.py b/petab/v1/measurements.py index 8b23907b..f23a21c1 100644 --- a/petab/v1/measurements.py +++ b/petab/v1/measurements.py @@ -307,7 +307,6 @@ def assert_overrides_match_parameter_count( row.get(OBSERVABLE_PARAMETERS, None) ) ) - # No overrides are also allowed if actual != expected: formula = observable_df.loc[row[OBSERVABLE_ID], OBSERVABLE_FORMULA] raise AssertionError( @@ -324,7 +323,6 @@ def assert_overrides_match_parameter_count( try: expected = noise_parameters_count[row[OBSERVABLE_ID]] - # No overrides are also allowed if len(replacements) != expected: raise AssertionError( f"Mismatch of noise parameter overrides in:\n{row}\n" diff --git a/petab/v2/core.py b/petab/v2/core.py index 0e3f905e..4ced4b9c 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -71,6 +71,8 @@ "ParameterTable", ] +logger = logging.getLogger(__name__) + def _is_finite_or_neg_inf(v: float, info: ValidationInfo) -> float: if not np.isfinite(v) and v != -np.inf: @@ -112,7 +114,7 @@ def _valid_petab_id(v: str) -> str: return v -def _valid_petab_id_or_none(v: str) -> str: +def _valid_petab_id_or_none(v: str) -> str | None: """Field validator for optional PEtab IDs.""" if not v: return None @@ -252,7 +254,7 @@ def __getitem__(self, id_: str) -> T: @classmethod @abstractmethod - def from_df(cls, df: pd.DataFrame) -> BaseTable[T]: + def from_df(cls, df: pd.DataFrame, **kwargs) -> BaseTable[T]: """Create a table from a DataFrame.""" pass @@ -1143,7 +1145,11 @@ def __str__(self): f"{observables}, {measurements}, {parameters}" ) - def __getitem__(self, key): + def __getitem__( + self, key + ) -> ( + Condition | Experiment | Observable | Measurement | Parameter | Mapping + ): """Get PEtab entity by ID. This allows accessing PEtab entities such as conditions, experiments, @@ -1202,7 +1208,7 @@ def from_yaml( from .petab1to2 import petab1to2 if format_version[0] == 1 and yaml_file: - logging.debug( + logger.debug( "Auto-upgrading problem from PEtab 1.0 to PEtab 2.0" ) with TemporaryDirectory() as tmpdirname: @@ -2320,7 +2326,9 @@ def get_output_parameters( # filter out symbols that are defined in the model or mapped to # such symbols for candidate in sorted(candidates): - if self.model.symbol_allowed_in_observable_formula(candidate): + if self.model and self.model.symbol_allowed_in_observable_formula( + candidate + ): continue # does it map to a model entity? @@ -2329,8 +2337,11 @@ def get_output_parameters( mapping.petab_id == candidate and mapping.model_id is not None ): - if self.model.symbol_allowed_in_observable_formula( - mapping.model_id + if ( + self.model + and self.model.symbol_allowed_in_observable_formula( + mapping.model_id + ) ): break else: From 75f22220775427e959375090b132f0873b0000fc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 23 Oct 2025 07:37:26 +0200 Subject: [PATCH 105/141] Update `test_simulate_experiment_to_events` for antimony 3.1.0 In antimony>=3.1.0, initial values for substanceOnly species are interpreted as amounts. Before, they were interpreted as concentrations. Update `test_simulate_experiment_to_events` accordingly and require >=3.1.0 for tests. --- pyproject.toml | 2 +- tests/v2/test_converters.py | 11 ++++++----- 2 files changed, 7 insertions(+), 6 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 182c546c..1f3dcb51 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -36,7 +36,7 @@ maintainers = [ [project.optional-dependencies] tests = [ - "antimony>=2.14.0", + "antimony>=3.1.0", "copasi-basico>=0.85", "pysb", "pytest", diff --git a/tests/v2/test_converters.py b/tests/v2/test_converters.py index d640a908..7f031fc7 100644 --- a/tests/v2/test_converters.py +++ b/tests/v2/test_converters.py @@ -96,11 +96,12 @@ def test_simulate_experiment_to_events(): species s2c_comp1 in comp1 = 3 species s2c_comp2 in comp2 = 4 # amount-based species - # (note: the initial values are concentrations nonetheless) - substanceOnly species s3a_comp1 in comp1 = 5 - substanceOnly species s3a_comp2 in comp2 = 6 - substanceOnly species s4a_comp1 in comp1 = 7 - substanceOnly species s4a_comp2 in comp2 = 8 + # (note that in antimony<3.1.0 the initial values are concentrations + # nonetheless) + substanceOnly species s3a_comp1 in comp1 = 5 * comp1 + substanceOnly species s3a_comp2 in comp2 = 6 * comp2 + substanceOnly species s4a_comp1 in comp1 = 7 * comp1 + substanceOnly species s4a_comp2 in comp2 = 8 * comp2 # something dynamic some_species in comp1 = 0 From 97bf7703e0016e0760091c9830c5dce5277f8a98 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 27 Oct 2025 12:53:43 +0100 Subject: [PATCH 106/141] Fix `_v1getattr` --- petab/__init__.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/petab/__init__.py b/petab/__init__.py index c6a40871..348f1269 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -26,7 +26,7 @@ def __getattr__(name): return importlib.import_module("petab.v1") if name == "v2": return importlib.import_module("petab.v2") - if name not in ("__path__", "__all__"): + if name not in ("__path__", "__all__", "__wrapped__"): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", @@ -37,7 +37,7 @@ def __getattr__(name): def _v1getattr(name, module): - if name not in ("__path__", "__all__"): + if name not in ("__path__", "__all__", "__wrapped__"): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", From 88452ef223568bbcc6e395bde83c0304f0de181c Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 28 Oct 2025 14:24:22 +0100 Subject: [PATCH 107/141] Add PySBModel.to_str (#451) Convenient for debugging. --- petab/v1/models/pysb_model.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/petab/v1/models/pysb_model.py b/petab/v1/models/pysb_model.py index 1a615e0f..6927ecb9 100644 --- a/petab/v1/models/pysb_model.py +++ b/petab/v1/models/pysb_model.py @@ -93,14 +93,19 @@ def from_file( ) def to_file(self, filename: str | Path | None = None) -> None: - from pysb.export import export + model_source = self.to_str() - model_source = export(self.model, "pysb_flat") with open( filename or _generate_path(self.rel_path, self.base_path), "w" ) as f: f.write(model_source) + def to_str(self) -> str: + """Get the PySB model Python code as a string.""" + from pysb.export import export + + return export(self.model, "pysb_flat") + @property def model_id(self): return self._model_id From c2efc2bbddd378182e34e303ec9f104fa8578c2e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 29 Oct 2025 14:51:13 +0100 Subject: [PATCH 108/141] v2: validate mappings (#452) https://petab.readthedocs.io/en/latest/v2/documentation_data_format.html#mapping-table: > The petabEntityId may be the same as the modelEntityId, but it must not be used to alias an entity that already has a valid PEtab identifier. This restriction is to avoid unnecessary complexity in the PEtab problem files. --- petab/v2/core.py | 16 ++++++++++++++++ tests/v2/test_core.py | 34 ++++++++++++++++++++++++++++++++-- 2 files changed, 48 insertions(+), 2 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 4ced4b9c..5e8c59d0 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -846,6 +846,22 @@ class Mapping(BaseModel): populate_by_name=True, extra="allow", validate_assignment=True ) + @model_validator(mode="after") + def _validate(self) -> Self: + if ( + self.model_id + and self.model_id != self.petab_id + and is_valid_identifier(self.model_id) + ): + raise ValueError( + "Aliasing of entities that already have a valid identifier " + "is not allowed. Simplify your PEtab problem by removing the " + f"mapping entry for `{self.petab_id} -> {self.model_id}`, " + f"and replacing all occurrences of `{self.petab_id}` with " + f"`{self.model_id}`." + ) + return self + class MappingTable(BaseTable[Mapping]): """PEtab mapping table.""" diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 64e1ad41..060fbb32 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -524,12 +524,12 @@ def test_modify_problem(): check_dtype=False, ) - problem.add_mapping("new_petab_id", "some_model_entity_id") + problem.add_mapping("new_petab_id", "1some_model_entity_id") exp_mapping_df = pd.DataFrame( data={ PETAB_ENTITY_ID: ["new_petab_id"], - MODEL_ENTITY_ID: ["some_model_entity_id"], + MODEL_ENTITY_ID: ["1some_model_entity_id"], NAME: [None], } ).set_index([PETAB_ENTITY_ID]) @@ -836,3 +836,33 @@ def test_get_output_parameters(): assert petab_problem.get_output_parameters( observable=False, noise=True ) == ["p1", "p3", "p5"] + + +def test_mapping_validation(): + """Test that invalid mapping entries raise errors.""" + + # alias invalid model entity ID + Mapping( + petab_id="valid_id", + model_id=" 1_invalid", + ) + + with pytest.raises(ValidationError, match="Invalid ID"): + # invalid petab entity ID + Mapping( + petab_id="1_invalid", + model_id="valid_id", + ) + + with pytest.raises(ValidationError, match="Aliasing.*not allowed"): + # unnecessary aliasing is forbidden + Mapping( + petab_id="forbidden_alias_of_valid_id", + model_id="valid_id", + ) + + # missing model_id is valid (annotation-only entry) + Mapping(petab_id="valid_id", name="some name") + + # identity mapping is valid + Mapping(petab_id="valid_id", model_id="valid_id", name="some name") From 553cf1d44ec74d18148c79bf2e5cc1273b490935 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 29 Oct 2025 12:01:03 +0100 Subject: [PATCH 109/141] Prepare v0.7.0 --- CHANGELOG.md | 27 +++++++++++++++++++++++++++ petab/version.py | 2 +- 2 files changed, 28 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 8712d235..cde085ab 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # PEtab changelog +## 0.7 series + +## 0.7.0 series + +**Fixes** + +* Misc minor `petab.v2` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/445 + and https://github.com/PEtab-dev/libpetab-python/pull/450) +* Fixed serialization of `priorParameters` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/449) + +**Features** + +* Added `PySBModel.to_str` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/451) +* Added `id` field to `v2.ProblemConfig` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/442) +* Updated `ExperimentsToEventsConverter` to changed initialization semantics + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/443) +* Added `v2.Problem.{get_output_parameters,get_x_nominal_dict}` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/447) +* Extended PEtab v2 mapping table validation + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/452) + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.6.0...v0.7.0 + ## 0.6 series ### 0.6.0 diff --git a/petab/version.py b/petab/version.py index 1b607c15..2ea32e4d 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,3 +1,3 @@ """PEtab library version""" -__version__ = "0.6.0" +__version__ = "0.7.0" From 8dc6c1c4b801fba5acc35fcd25308a659d01050e Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Sat, 1 Nov 2025 21:35:02 +0100 Subject: [PATCH 110/141] Fix v2.Problem.id --- petab/v2/core.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 5e8c59d0..6535043d 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -1610,7 +1610,7 @@ def mappings(self) -> list[Mapping]: @property def id(self) -> str | None: """The ID of the PEtab problem if set, ``None`` otherwise.""" - return self.config.id + return self.config.id if self.config else None @id.setter def id(self, value: str): From 10840052fadd747980da4a0837a3170c1bcddf1a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 5 Nov 2025 17:27:18 +0100 Subject: [PATCH 111/141] v2: Remove log10-normal distribution (#456) Remove LOG10_NORMAL from `petab.v2.{PriorDistribution,NoiseDistribution}` which has been removed from PEtab v2 (https://github.com/PEtab-dev/PEtab/pull/644). Also update v1->v2 conversion accordingly. --- petab/v2/C.py | 3 --- petab/v2/calculate.py | 5 +---- petab/v2/core.py | 5 ----- petab/v2/lint.py | 1 - petab/v2/petab1to2.py | 17 ++++++++++++++--- 5 files changed, 15 insertions(+), 16 deletions(-) diff --git a/petab/v2/C.py b/petab/v2/C.py index e680450e..e640ae5c 100644 --- a/petab/v2/C.py +++ b/petab/v2/C.py @@ -199,8 +199,6 @@ GAMMA = "gamma" #: Laplace distribution LAPLACE = "laplace" -#: Log10-normal distribution. -LOG10_NORMAL = "log10-normal" #: Log-Laplace distribution LOG_LAPLACE = "log-laplace" #: Log-normal distribution @@ -221,7 +219,6 @@ EXPONENTIAL, GAMMA, LAPLACE, - LOG10_NORMAL, LOG_LAPLACE, LOG_NORMAL, LOG_UNIFORM, diff --git a/petab/v2/calculate.py b/petab/v2/calculate.py index 830c2d89..f377b1b7 100644 --- a/petab/v2/calculate.py +++ b/petab/v2/calculate.py @@ -447,10 +447,7 @@ def calculate_single_llh( The computed likelihood for the given values. """ # PEtab v2: - if noise_distribution == LOG10_NORMAL and scale == LIN: - noise_distribution = NORMAL - scale = LOG10 - elif noise_distribution == LOG_NORMAL and scale == LIN: + if noise_distribution == LOG_NORMAL and scale == LIN: noise_distribution = NORMAL scale = LOG diff --git a/petab/v2/core.py b/petab/v2/core.py index 6535043d..a8394caf 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -148,8 +148,6 @@ class NoiseDistribution(str, Enum): LOG_NORMAL = C.LOG_NORMAL #: Log-Laplace distribution LOG_LAPLACE = C.LOG_LAPLACE - #: Log10-Normal - LOG10_NORMAL = C.LOG10_NORMAL class PriorDistribution(str, Enum): @@ -168,8 +166,6 @@ class PriorDistribution(str, Enum): GAMMA = C.GAMMA #: Laplace distribution. LAPLACE = C.LAPLACE - #: Log10-normal distribution. - LOG10_NORMAL = C.LOG10_NORMAL #: Log-Laplace distribution LOG_LAPLACE = C.LOG_LAPLACE #: Log-normal distribution. @@ -195,7 +191,6 @@ class PriorDistribution(str, Enum): PriorDistribution.EXPONENTIAL: Exponential, PriorDistribution.GAMMA: Gamma, PriorDistribution.LAPLACE: Laplace, - PriorDistribution.LOG10_NORMAL: Normal, PriorDistribution.LOG_LAPLACE: Laplace, PriorDistribution.LOG_NORMAL: Normal, PriorDistribution.LOG_UNIFORM: Uniform, diff --git a/petab/v2/lint.py b/petab/v2/lint.py index a80599a9..178abc79 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -802,7 +802,6 @@ class CheckPriorDistribution(ValidationTask): PriorDistribution.EXPONENTIAL: 1, PriorDistribution.GAMMA: 2, PriorDistribution.LAPLACE: 2, - PriorDistribution.LOG10_NORMAL: 2, PriorDistribution.LOG_LAPLACE: 2, PriorDistribution.LOG_NORMAL: 2, PriorDistribution.LOG_UNIFORM: 2, diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index 8e1c7e85..e3bd4f2d 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -59,7 +59,7 @@ def petab1to2( return v2.Problem.from_yaml(Path(tmp_dir, Path(yaml_config).name)) -def petab_files_1to2(yaml_config: Path | str, output_dir: Path | str): +def petab_files_1to2(yaml_config: Path | str | dict, output_dir: Path | str): """Convert PEtab files from PEtab 1.0 to PEtab 2.0. @@ -404,7 +404,8 @@ def update_noise_dist(row): f"observable `{row[v1.C.OBSERVABLE_ID]}'" f" is not supported in PEtab v2. " "Using `log-normal` instead.", - stacklevel=2, + # call to `petab1to2` + stacklevel=9, ) new_dist = v2.C.LOG_NORMAL @@ -490,10 +491,20 @@ def update_prior(row): if pscale != v1.C.LIN: new_prior_type = f"{pscale}-{new_prior_type}" + if new_prior_type == "log10-normal": + warnings.warn( + f"Prior distribution `{new_prior_type}' for parameter " + f"`{row.name}' is not supported in PEtab v2. " + "Using `log-normal` instead.", + # call to `petab1to2` + stacklevel=9, + ) + new_prior_type = v2.C.LOG_NORMAL + if new_prior_type not in v2.C.PRIOR_DISTRIBUTIONS: raise NotImplementedError( f"PEtab v2 does not support prior type `{new_prior_type}' " - f"required for parameter `{row.index}'." + f"required for parameter `{row.name}'." ) return new_prior_type From fde12db9427b05f870d76c4fe8e67ec3d7b2c66a Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 5 Nov 2025 17:28:00 +0100 Subject: [PATCH 112/141] Remove petab.v2-is-experimental warnings (#457) This has been communicated via the release notes. I don't think we need to annoy users with these warnings. --- petab/v1/problem.py | 1 - petab/v2/__init__.py | 8 -------- pytest.ini | 2 -- 3 files changed, 11 deletions(-) diff --git a/petab/v1/problem.py b/petab/v1/problem.py index 4f7df659..6da82a2f 100644 --- a/petab/v1/problem.py +++ b/petab/v1/problem.py @@ -302,7 +302,6 @@ def get_path(filename): f"{yaml_config[FORMAT_VERSION]}." ) if major_version == 2: - warn("Support for PEtab2.0 is experimental!", stacklevel=2) warn( "Using petab.v1.Problem with PEtab2.0 is deprecated. " "Use petab.v2.Problem instead.", diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index c897c9c4..427fd3c9 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -3,14 +3,6 @@ Contains all functionality related to handling PEtab 2.0 problems. """ -from warnings import warn - -warn( - "Support for PEtab2.0 and all of petab.v2 is experimental " - "and subject to changes!", - stacklevel=1, -) - # TODO: move this module to v2 from petab.v1.mapping import ( # noqa: F403, F401, E402 get_mapping_df, diff --git a/pytest.ini b/pytest.ini index b5f0c04d..721d63d9 100644 --- a/pytest.ini +++ b/pytest.ini @@ -7,7 +7,5 @@ filterwarnings = error # TODO: until tests are reorganized for petab.v1 ignore::DeprecationWarning - ignore:Support for PEtab2.0 and all of petab.v2 is experimental:UserWarning - ignore:Support for PEtab2.0 is experimental:UserWarning ignore:.*inspect.getargspec\(\) is deprecated.*:DeprecationWarning ignore:.*Passing unrecognized arguments to super\(PyDevIPCompleter6\).*:DeprecationWarning From d83ca2b9167b5254475ebe8289fa814672ab4ec0 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 5 Nov 2025 22:49:31 +0100 Subject: [PATCH 113/141] Add `v2.Problem.__repr__` (#458) Add `v2.Problem.__repr__`, and include ID and all models in `__str__`. --- petab/v2/core.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index a8394caf..31296bf0 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -1133,8 +1133,13 @@ def __init__( self.mapping_tables = mapping_tables or [MappingTable()] self.parameter_tables = parameter_tables or [ParameterTable()] + def __repr__(self): + return f"<{self.__class__.__name__} id={self.id!r}>" + def __str__(self): - model = f"with model ({self.model})" if self.model else "without model" + pid = repr(self.id) if self.id else "without ID" + + model = f"with models {self.models}" if self.model else "without model" ne = len(self.experiments) experiments = f"{ne} experiments" @@ -1152,7 +1157,7 @@ def __str__(self): parameters = f"{nest} estimated parameters" return ( - f"PEtab Problem {model}, {conditions}, {experiments}, " + f"PEtab Problem {pid} {model}, {conditions}, {experiments}, " f"{observables}, {measurements}, {parameters}" ) From d209a292b942f43c3f4bc55c8dd5c8dfda85cc49 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 11 Nov 2025 16:31:54 +0100 Subject: [PATCH 114/141] Handle KeyError in CheckInitialChangeSymbols (#459) We don't want KeyErrors in petablint output. --- petab/v2/lint.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 178abc79..1fe83144 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -768,8 +768,15 @@ def run(self, problem: Problem) -> ValidationIssue | None: if condition_id in valid_conditions: continue - # we assume that all referenced condition IDs are valid - condition = id_to_condition[condition_id] + try: + condition = id_to_condition[condition_id] + except KeyError: + messages.append( + f"Unable to validate changes for condition " + f"{condition_id} applied at the start of " + f"experiment {experiment.id}, as the condition " + "does not exist." + ) used_symbols = { str(sym) From 107fbb9f7ae69590f43db37b69ab47c1ef816de0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 25 Nov 2025 10:13:00 +0100 Subject: [PATCH 115/141] build(deps): bump actions/checkout from 5 to 6 (#460) Bumps [actions/checkout](https://github.com/actions/checkout) from 5 to 6. - [Release notes](https://github.com/actions/checkout/releases) - [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md) - [Commits](https://github.com/actions/checkout/compare/v5...v6) --- updated-dependencies: - dependency-name: actions/checkout dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- .github/workflows/deploy.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index ba6b3c6a..d45e27ad 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -17,7 +17,7 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Prepare python ${{ matrix.python-version }} uses: actions/setup-python@v6 diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index c77a94a7..9aa2bbf6 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -15,7 +15,7 @@ jobs: steps: - name: Check out repository - uses: actions/checkout@v5 + uses: actions/checkout@v6 - name: Set up python uses: actions/setup-python@v6 with: From 542941251bdb5f938f608af9aa2cc700afabc2fa Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 3 Dec 2025 07:55:37 +0100 Subject: [PATCH 116/141] Fix missing imports / exports --- petab/v2/__init__.py | 1 + petab/v2/core.py | 1 + 2 files changed, 2 insertions(+) diff --git a/petab/v2/__init__.py b/petab/v2/__init__.py index 427fd3c9..a9f018c5 100644 --- a/petab/v2/__init__.py +++ b/petab/v2/__init__.py @@ -4,6 +4,7 @@ """ # TODO: move this module to v2 +from petab.v1.distributions import * # noqa: F401, E402 from petab.v1.mapping import ( # noqa: F403, F401, E402 get_mapping_df, write_mapping_df, diff --git a/petab/v2/core.py b/petab/v2/core.py index 31296bf0..beadbc2e 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -69,6 +69,7 @@ "Parameter", "ParameterScale", "ParameterTable", + "PriorDistribution", ] logger = logging.getLogger(__name__) From 33dd776bcd91c23f91a4e1eed1afd88b1f074bab Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 3 Dec 2025 15:56:15 +0100 Subject: [PATCH 117/141] Fix return type in petab.v2.calculate (#464) Annotation says float, but functions returned np.float64. --- petab/v2/calculate.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/petab/v2/calculate.py b/petab/v2/calculate.py index f377b1b7..854e51c9 100644 --- a/petab/v2/calculate.py +++ b/petab/v2/calculate.py @@ -308,14 +308,14 @@ def calculate_chi2( chi2s = [ calculate_chi2_for_table_from_residuals(df) for df in residual_dfs ] - return sum(chi2s) + return float(sum(chi2s)) def calculate_chi2_for_table_from_residuals( residual_df: pd.DataFrame, ) -> float: """Compute chi2 value for a single residual table.""" - return (np.array(residual_df[RESIDUAL]) ** 2).sum() + return float((np.array(residual_df[RESIDUAL]) ** 2).sum()) def calculate_llh( @@ -362,7 +362,7 @@ def calculate_llh( measurement_df, simulation_df, observable_df, parameter_df ) llhs.append(_llh) - return sum(llhs) + return float(sum(llhs)) def calculate_llh_for_table( @@ -423,7 +423,7 @@ def calculate_llh_for_table( measurement, simulation, obs_scale, noise_distr, noise_value ) llhs.append(llh) - return sum(llhs) + return float(sum(llhs)) def calculate_single_llh( From 3d4ff0777b664843952507215af77d37a69d3049 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 4 Dec 2025 12:06:12 +0100 Subject: [PATCH 118/141] Test with Python 3.14 (#448) --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index d45e27ad..5b5a9bd4 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.10", "3.13"] + python-version: ["3.10", "3.14"] runs-on: ${{ matrix.platform }} steps: From 041246e33f0887002876c788aa126880af64c4c0 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 4 Dec 2025 15:53:36 +0100 Subject: [PATCH 119/141] Remove pyarrow dependency --- pyproject.toml | 3 --- 1 file changed, 3 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 1f3dcb51..e0c62cd7 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -13,9 +13,6 @@ requires-python = ">=3.10" dependencies = [ "numpy>=1.15.1", "pandas>=1.2.0", - # remove when pandas >= 3, see also - # https://github.com/pandas-dev/pandas/issues/54466 - "pyarrow", "python-libsbml>=5.17.0", "sympy", "colorama", From 1dd3b7b4074e9f891815db0c47e4c78f9491f229 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 4 Dec 2025 19:32:44 +0100 Subject: [PATCH 120/141] Implement sampling for v2 prior distributions (#461) --- petab/v1/distributions.py | 27 ++++++++++++++++++++++----- tests/v1/test_distributions.py | 5 +++++ 2 files changed, 27 insertions(+), 5 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index 411add56..de7a638c 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -508,6 +508,9 @@ def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return cauchy.ppf(q, loc=self._loc, scale=self._scale) + def _sample(self, shape=None) -> np.ndarray | float: + return cauchy.rvs(loc=self._loc, scale=self._scale, size=shape) + @property def loc(self) -> float: """The location parameter of the underlying distribution.""" @@ -541,14 +544,16 @@ class ChiSquare(Distribution): def __init__( self, - dof: int, + dof: int | float, trunc: tuple[float, float] | None = None, log: bool | float = False, ): - if not dof.is_integer() or dof < 1: - raise ValueError( - f"`dof' must be a positive integer, but was `{dof}'." - ) + if isinstance(dof, float): + if not dof.is_integer() or dof < 1: + raise ValueError( + f"`dof' must be a positive integer, but was `{dof}'." + ) + dof = int(dof) self._dof = dof super().__init__(log=log, trunc=trunc) @@ -565,6 +570,9 @@ def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return chi2.ppf(q, df=self._dof) + def _sample(self, shape=None) -> np.ndarray | float: + return chi2.rvs(df=self._dof, size=shape) + @property def dof(self) -> int: """The degrees of freedom parameter.""" @@ -602,6 +610,9 @@ def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return expon.ppf(q, scale=self._scale) + def _sample(self, shape=None) -> np.ndarray | float: + return expon.rvs(scale=self._scale, size=shape) + @property def scale(self) -> float: """The scale parameter of the underlying distribution.""" @@ -650,6 +661,9 @@ def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return gamma.ppf(q, a=self._shape, scale=self._scale) + def _sample(self, shape=None) -> np.ndarray | float: + return gamma.rvs(a=self._shape, scale=self._scale, size=shape) + @property def shape(self) -> float: """The shape parameter of the underlying distribution.""" @@ -700,6 +714,9 @@ def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return rayleigh.ppf(q, scale=self._scale) + def _sample(self, shape=None) -> np.ndarray | float: + return rayleigh.rvs(scale=self._scale, size=shape) + @property def scale(self) -> float: """The scale parameter of the underlying distribution.""" diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py index e06d9edc..7b7cd4aa 100644 --- a/tests/v1/test_distributions.py +++ b/tests/v1/test_distributions.py @@ -34,6 +34,11 @@ Normal(2, 1, log=10), Laplace(1, 2, trunc=(1, 2)), Laplace(1, 0.5, log=True, trunc=(0.5, 8)), + Cauchy(2, 1), + ChiSquare(4), + Exponential(1), + Gamma(3, 5), + Rayleigh(3), ], ) def test_sample_matches_pdf(distribution): From a16510830397a9f71d915c8f1e2e44302fb69a5f Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 4 Dec 2025 19:45:02 +0100 Subject: [PATCH 121/141] Require Python>=3.11 per nep-0029 (#462) --- .github/workflows/ci_tests.yml | 2 +- .github/workflows/deploy.yml | 2 +- .readthedocs.yaml | 4 ++-- README.md | 6 +++--- petab/v2/core.py | 11 +++++++++-- pyproject.toml | 2 +- 6 files changed, 17 insertions(+), 10 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 5b5a9bd4..5d9f85b7 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.10", "3.14"] + python-version: ["3.11", "3.14"] runs-on: ${{ matrix.platform }} steps: diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 9aa2bbf6..47cdc51d 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -19,7 +19,7 @@ jobs: - name: Set up python uses: actions/setup-python@v6 with: - python-version: 3.11 + python-version: 3.13 - name: Install dependencies / build sdist run: | diff --git a/.readthedocs.yaml b/.readthedocs.yaml index 90c2fc8f..51f9841e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -8,9 +8,9 @@ sphinx: fail_on_warning: false build: - os: "ubuntu-22.04" + os: "ubuntu-24.04" tools: - python: "3.10" + python: "3.12" python: install: diff --git a/README.md b/README.md index bf05c459..7888714e 100644 --- a/README.md +++ b/README.md @@ -17,13 +17,13 @@ Documentation of the PEtab format in general is available at ## Installation -The PEtab library is available on [pypi](https://pypi.org/project/petab/) +The PEtab library is available on [PyPI](https://pypi.org/project/petab/) and the easiest way to install it is running pip3 install petab -It will require Python>=3.10 to run. (We are following the -[numpy Python support policy](https://numpy.org/neps/nep-0029-deprecation_policy.html)). +`petab` requires Python>=3.11. We are following +[NumPy's Python support policy](https://numpy.org/neps/nep-0029-deprecation_policy.html). Development versions of the PEtab library can be installed using diff --git a/petab/v2/core.py b/petab/v2/core.py index beadbc2e..9727b21d 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -14,7 +14,15 @@ from math import nan from numbers import Number from pathlib import Path -from typing import TYPE_CHECKING, Annotated, Any, Generic, TypeVar, get_args +from typing import ( + TYPE_CHECKING, + Annotated, + Any, + Generic, + Self, + TypeVar, + get_args, +) import numpy as np import pandas as pd @@ -31,7 +39,6 @@ field_validator, model_validator, ) -from typing_extensions import Self from .._utils import _generate_path from ..v1 import ( diff --git a/pyproject.toml b/pyproject.toml index e0c62cd7..2d36dbba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" name = "petab" dynamic = ["version", "readme"] description = "Parameter estimation tabular data" -requires-python = ">=3.10" +requires-python = ">=3.11" dependencies = [ "numpy>=1.15.1", "pandas>=1.2.0", From fd1fa76dc399bb7b72c95a02a91ac690c1557803 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 11 Dec 2025 09:20:39 +0100 Subject: [PATCH 122/141] Add LogUniform distribution (#465) Add an explicit LogUniform distribution class. The interpretation of the distribution parameter is different from the existing `Uniform(a, b, log=True)`. In PEtab v2, X ~ LogUniform(a, b) <=> ln(X) ~ Uniform(ln(a), ln(b)). However, in PEtab v1, a `parameterScaleUniform` prior for a parameterScale=log parameter is interpreted as ln(X) ~ Uniform(a, b). --- petab/v1/distributions.py | 44 ++++++++++++++++++++++++++++++++++ petab/v2/core.py | 9 +++++-- tests/v1/test_distributions.py | 18 ++++++++++++++ 3 files changed, 69 insertions(+), 2 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index de7a638c..f8e807da 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -36,6 +36,7 @@ "Normal", "Rayleigh", "Uniform", + "LogUniform", ] @@ -382,6 +383,10 @@ class Uniform(Distribution): If ``False``, no transformation is applied. If a transformation is applied, the lower and upper bounds are the lower and upper bounds of the underlying uniform distribution. + Note that this differs from the usual definition of a log-uniform + distribution, where the logarithm of the variable is uniformly + distributed between the logarithms of the bounds (see also + :class:`LogUniform`). """ def __init__( @@ -411,6 +416,45 @@ def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: return uniform.ppf(q, loc=self._low, scale=self._high - self._low) +class LogUniform(Distribution): + """A log-uniform or reciprocal distribution. + + A random variable is log-uniformly distributed between ``low`` and ``high`` + if its logarithm is uniformly distributed between ``log(low)`` and + ``log(high)``. + + :param low: The lower bound of the distribution. + :param high: The upper bound of the distribution. + :param trunc: The truncation limits of the distribution. + """ + + def __init__( + self, + low: float, + high: float, + trunc: tuple[float, float] | None = None, + ): + self._logbase = np.exp(1) + self._low = self._log(low) + self._high = self._log(high) + super().__init__(log=self._logbase, trunc=trunc) + + def __repr__(self): + return self._repr({"low": self._low, "high": self._high}) + + def _sample(self, shape=None) -> np.ndarray | float: + return np.random.uniform(low=self._low, high=self._high, size=shape) + + def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return uniform.pdf(x, loc=self._low, scale=self._high - self._low) + + def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: + return uniform.cdf(x, loc=self._low, scale=self._high - self._low) + + def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: + return uniform.ppf(q, loc=self._low, scale=self._high - self._low) + + class Laplace(Distribution): """A (log-)Laplace distribution. diff --git a/petab/v2/core.py b/petab/v2/core.py index 9727b21d..22453878 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -201,7 +201,7 @@ class PriorDistribution(str, Enum): PriorDistribution.LAPLACE: Laplace, PriorDistribution.LOG_LAPLACE: Laplace, PriorDistribution.LOG_NORMAL: Normal, - PriorDistribution.LOG_UNIFORM: Uniform, + PriorDistribution.LOG_UNIFORM: LogUniform, PriorDistribution.NORMAL: Normal, PriorDistribution.RAYLEIGH: Rayleigh, PriorDistribution.UNIFORM: Uniform, @@ -1060,7 +1060,12 @@ def prior_dist(self) -> Distribution: # `Uniform.__init__` does not accept the `trunc` parameter low = max(self.prior_parameters[0], self.lb) high = min(self.prior_parameters[1], self.ub) - return cls(low, high, log=log) + return cls(low, high) + + if cls == LogUniform: + # Mind the different interpretation of distribution parameters for + # Uniform(..., log=True) and LogUniform!! + return cls(*self.prior_parameters, trunc=[self.lb, self.ub]) return cls(*self.prior_parameters, log=log, trunc=[self.lb, self.ub]) diff --git a/tests/v1/test_distributions.py b/tests/v1/test_distributions.py index 7b7cd4aa..f4b3e3fe 100644 --- a/tests/v1/test_distributions.py +++ b/tests/v1/test_distributions.py @@ -1,4 +1,5 @@ import sys +from math import exp import numpy as np import pytest @@ -115,3 +116,20 @@ def cdf(x): assert_allclose( distribution.pdf(sample), reference_pdf, rtol=1e-10, atol=1e-14 ) + + +def test_log_uniform(): + """Test Uniform(a, b, log=True) vs LogUniform(a, b).""" + # support between exp(1) and exp(2) + dist = Uniform(1, 2, log=True) + assert dist.pdf(exp(0)) == 0 + assert dist.pdf(exp(1)) > 0 + assert dist.pdf(exp(2)) > 0 + assert dist.pdf(exp(3)) == 0 + + # support between 1 and 2 + dist = LogUniform(1, 2) + assert dist.pdf(0) == 0 + assert dist.pdf(1) > 0 + assert dist.pdf(2) > 0 + assert dist.pdf(3) == 0 From f0c30759217304df9c32d4631ac1fe8a3d91baac Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 11 Dec 2025 10:48:31 +0100 Subject: [PATCH 123/141] Add `v2.Problem.has_{map,ml}_objective` (#463) * Add `v2.Problem.has_{map,ml}_objective` To check for the type of objective function encoded in the PEtab problem. * No implicit prior in Parameter.prior_dist * Separate startpoints and priors --------- Co-authored-by: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> --- petab/v2/core.py | 70 +++++++++++++++++++++++++++++++++++++++---- petab/v2/lint.py | 2 +- tests/v2/test_core.py | 19 ++++++++++++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/petab/v2/core.py b/petab/v2/core.py index 22453878..fb206502 100644 --- a/petab/v2/core.py +++ b/petab/v2/core.py @@ -1026,13 +1026,17 @@ def _validate(self) -> Self: return self @property - def prior_dist(self) -> Distribution: - """Get the prior distribution of the parameter.""" - if self.estimate is False: + def prior_dist(self) -> Distribution | None: + """Get the prior distribution of the parameter. + + :return: The prior distribution of the parameter, or None if no prior + distribution is set. + """ + if not self.estimate: raise ValueError(f"Parameter `{self.id}' is not estimated.") if self.prior_distribution is None: - return Uniform(self.lb, self.ub) + return None if not (cls := _prior_to_cls.get(self.prior_distribution)): raise ValueError( @@ -1820,12 +1824,66 @@ def x_fixed_indices(self) -> list[int]: """Parameter table non-estimated parameter indices.""" return [i for i, p in enumerate(self.parameters) if not p.estimate] + @property + def has_map_objective(self) -> bool: + """Whether this problem encodes a maximum a posteriori (MAP) objective. + + A PEtab problem is considered to have a MAP objective if there is a + prior distribution specified for at least one estimated parameter. + + :returns: ``True`` if MAP objective, ``False`` otherwise. + """ + return any( + p.prior_distribution is not None + for p in self.parameters + if p.estimate + ) + + @property + def has_ml_objective(self) -> bool: + """Whether this problem encodes a maximum likelihood (ML) objective. + + A PEtab problem is considered to have an ML objective if there are no + prior distributions specified for any estimated parameters. + + :returns: ``True`` if ML objective, ``False`` otherwise. + """ + return not self.has_map_objective + def get_priors(self) -> dict[str, Distribution]: """Get prior distributions. - :returns: The prior distributions for the estimated parameters. + Note that this will default to uniform distributions over the + parameter bounds for parameters without an explicit prior. + + :returns: The prior distributions for the estimated parameters in case + the problem has a MAP objective, an empty dictionary otherwise. + """ + if not self.has_map_objective: + return {} + + return { + p.id: p.prior_dist if p.prior_distribution else Uniform(p.lb, p.ub) + for p in self.parameters + if p.estimate + } + + def get_startpoint_distributions(self) -> dict[str, Distribution]: + """Get distributions for sampling startpoints. + + The distributions are the prior distributions for estimated parameters + that have a prior distribution defined, and uniform distributions + over the parameter bounds for estimated parameters without an explicit + prior. + + :returns: Mapping of parameter IDs to distributions for sampling + startpoints. """ - return {p.id: p.prior_dist for p in self.parameters if p.estimate} + return { + p.id: p.prior_dist if p.prior_distribution else Uniform(p.lb, p.ub) + for p in self.parameters + if p.estimate + } def sample_parameter_startpoints(self, n_starts: int = 100, **kwargs): """Create 2D array with starting points for optimization""" diff --git a/petab/v2/lint.py b/petab/v2/lint.py index 1fe83144..687d58f2 100644 --- a/petab/v2/lint.py +++ b/petab/v2/lint.py @@ -843,7 +843,7 @@ def run(self, problem: Problem) -> ValidationIssue | None: # TODO: check distribution parameter domains more specifically try: - if parameter.estimate: + if parameter.estimate and parameter.prior_dist is not None: # .prior_dist fails for non-estimated parameters _ = parameter.prior_dist.sample(1) except Exception as e: diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 060fbb32..2cbe3e46 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -866,3 +866,22 @@ def test_mapping_validation(): # identity mapping is valid Mapping(petab_id="valid_id", model_id="valid_id", name="some name") + + +def test_objective_type(): + """Test that MAP and ML problems are recognized correctly.""" + problem = Problem() + problem += Parameter(id="par1", lb=0, ub=100, estimate=True) + assert problem.has_ml_objective is True + assert problem.has_map_objective is False + + problem += Parameter( + id="par2", + lb=0, + ub=100, + estimate=True, + prior_distribution="normal", + prior_parameters=[50, 10], + ) + assert problem.has_map_objective is True + assert problem.has_ml_objective is False From 1e20a12a7b920893e9349a46c6d5e393724c9001 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 11 Dec 2025 10:55:28 +0100 Subject: [PATCH 124/141] Test with 3.13 until the Python 3.14 cpython dataclass issues are resolved --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 5d9f85b7..9b170727 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.11", "3.14"] + python-version: ["3.11", "3.13"] runs-on: ${{ matrix.platform }} steps: From baeb4a52955ba2c198d139ec4c0e8c517dc68d72 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Fri, 12 Dec 2025 07:45:28 +0100 Subject: [PATCH 125/141] Update changelog, bump version (#467) --- CHANGELOG.md | 35 ++++++++++++++++++++++++++++++++++- petab/version.py | 2 +- 2 files changed, 35 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index cde085ab..207e9813 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,8 +1,41 @@ # PEtab changelog + +## 0.8 series + +### 0.8.0 + +**Fixes** +* Handle `KeyError` in CheckInitialChangeSymbols + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/459) +* Fix return type in `petab.v2.calculate` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/464) +* Implement sampling for v2 prior distributions + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/461) + +**Features** +* Add `v2.Problem.__repr__` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/458) +* Add `LogUniform` distribution + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/465) +* Add `v2.Problem.has_{map,ml}_objective` + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/463) + +**Other** +* Require Python>=3.11 per nep-0029 + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/462) +* v2: Remove `log10-normal` distribution per updated spec + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/456) +* Remove `petab.v2`-is-experimental warnings + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/457) + It still is, but we no longer spam users with warnings. + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.7.0...v0.8.0 + + ## 0.7 series -## 0.7.0 series +### 0.7.0 **Fixes** diff --git a/petab/version.py b/petab/version.py index 2ea32e4d..ea406796 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,3 +1,3 @@ """PEtab library version""" -__version__ = "0.7.0" +__version__ = "0.8.0" From ae5c9e4596d200fa4ead61f7c7967de5a87ad196 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Dec 2025 07:35:58 +0100 Subject: [PATCH 126/141] build(deps): bump actions/cache from 4 to 5 (#468) Bumps [actions/cache](https://github.com/actions/cache) from 4 to 5. - [Release notes](https://github.com/actions/cache/releases) - [Changelog](https://github.com/actions/cache/blob/main/RELEASES.md) - [Commits](https://github.com/actions/cache/compare/v4...v5) --- updated-dependencies: - dependency-name: actions/cache dependency-version: '5' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 9b170727..a73ab191 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -31,7 +31,7 @@ jobs: shell: bash - name: Cache - uses: actions/cache@v4 + uses: actions/cache@v5 with: path: ${{ steps.pip_cache_dir.outputs.dir }} key: ${{ runner.os }}-${{ matrix.python-version }}-${{ hashFiles('**/.ci_pip_reqs.txt') }}-${{ hashFiles('**/setup.py') }} From d57d9fed8d8d5f8592e76d0b15676e05397c3b4b Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Wed, 21 Jan 2026 22:09:51 +0100 Subject: [PATCH 127/141] Require pandas<3 Require pandas<3 for now. Related to #469. --- CHANGELOG.md | 5 +++++ petab/version.py | 2 +- pyproject.toml | 2 +- 3 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 207e9813..18dd2866 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,11 @@ ## 0.8 series +### 0.8.1 + +The only change in this release is requiring pandas<3 until we support +the new pandas 3 API. + ### 0.8.0 **Fixes** diff --git a/petab/version.py b/petab/version.py index ea406796..316610ff 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,3 +1,3 @@ """PEtab library version""" -__version__ = "0.8.0" +__version__ = "0.8.1" diff --git a/pyproject.toml b/pyproject.toml index 2d36dbba..6efd02a4 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ description = "Parameter estimation tabular data" requires-python = ">=3.11" dependencies = [ "numpy>=1.15.1", - "pandas>=1.2.0", + "pandas>=1.2.0,<3", "python-libsbml>=5.17.0", "sympy", "colorama", From 441d9f65c8e29959cce8708a85f6821427997b63 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 26 Jan 2026 15:33:39 +0100 Subject: [PATCH 128/141] GHA: Test with Python3.14 (#472) Reverts 1e20a12a7b920893e9349a46c6d5e393724c9001. --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index a73ab191..94098ab5 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.11", "3.13"] + python-version: ["3.11", "3.14"] runs-on: ${{ matrix.platform }} steps: From 4b64d8e13d587e0d3a91410d4991b693c734a5b8 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 27 Jan 2026 08:39:33 +0100 Subject: [PATCH 129/141] v2: Update schema (#470) Update PEtab v2 yaml schema to the latest version from https://github.com/PEtab-dev/PEtab/. --- petab/schemas/petab_schema.v2.0.0.yaml | 39 +++++++++++++++----------- 1 file changed, 22 insertions(+), 17 deletions(-) diff --git a/petab/schemas/petab_schema.v2.0.0.yaml b/petab/schemas/petab_schema.v2.0.0.yaml index 1a285070..5b6f1be7 100644 --- a/petab/schemas/petab_schema.v2.0.0.yaml +++ b/petab/schemas/petab_schema.v2.0.0.yaml @@ -1,7 +1,6 @@ # For syntax see: https://json-schema.org/understanding-json-schema -#$schema: "https://json-schema.org/draft/2019-09/meta/core" -$schema: "http://json-schema.org/draft-06/schema" -description: PEtab parameter estimation problem config file schema +$schema: "https://json-schema.org/draft/2020-12/schema" +description: PEtab 2.0 parameter estimation problem configuration schema. definitions: list_of_files: @@ -9,19 +8,21 @@ definitions: description: List of files. items: type: string - description: File name or URL. + description: | + File name or URL, absolute or relative to the location of the PEtab + problem configuration file. version_number: type: string pattern: ^([1-9][0-9]*!)?(0|[1-9][0-9]*)(\.(0|[1-9][0-9]*))*((a|b|rc)(0|[1-9][0-9]*))?(\.post(0|[1-9][0-9]*))?(\.dev(0|[1-9][0-9]*))?$ - description: Version number (corresponding to PEP 440). + description: Version number. properties: format_version: - anyof: + anyOf: - $ref: "#/definitions/version_number" - type: integer - description: Version of the PEtab format + description: Version of the PEtab format. id: type: string @@ -32,17 +33,12 @@ properties: pattern: "^[a-zA-Z_]\\w*$" parameter_files: - type: array - description: | - List of PEtab parameter files. - items: - type: string - description: | - File name (absolute or relative) or URL to a PEtab parameter table. + description: List of PEtab parameter files. + $ref: "#/definitions/list_of_files" model_files: type: object - description: One or multiple models + description: One or multiple models. # the model ID patternProperties: @@ -51,7 +47,9 @@ properties: properties: location: type: string - description: Model file name or URL + description: | + Model file name or URL, absolute or relative to the location of + the PEtab problem configuration file. language: type: string description: | @@ -94,9 +92,14 @@ properties: properties: version: $ref: "#/definitions/version_number" - + required: + type: boolean + description: | + Indicates whether the extension is required for the + mathematical interpretation of the problem. required: - version + - required additionalProperties: true additionalProperties: false @@ -107,3 +110,5 @@ required: - model_files - observable_files - measurement_files + +additionalProperties: false From 44c8062ce1b87a74a0ba1bd2551de0cdc2a13ff1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 27 Jan 2026 09:08:40 +0100 Subject: [PATCH 130/141] pandas 3.0 compatibility (#471) Make library compatible with pandas 3.0. Closes #469. --- petab/v1/conditions.py | 2 +- petab/v1/lint.py | 16 ++++++++-------- petab/v1/visualize/data_overview.py | 2 +- pyproject.toml | 2 +- tests/v1/test_petab.py | 2 +- 5 files changed, 12 insertions(+), 12 deletions(-) diff --git a/petab/v1/conditions.py b/petab/v1/conditions.py index 5dc46565..8caf04dc 100644 --- a/petab/v1/conditions.py +++ b/petab/v1/conditions.py @@ -111,7 +111,7 @@ def get_parametric_overrides(condition_df: pd.DataFrame) -> list[str]: result = [] for column in constant_parameters: - if np.issubdtype(condition_df[column].dtype, np.number): + if not pd.api.types.is_string_dtype(condition_df[column].dtype): continue floatified = condition_df.loc[:, column].apply(core.to_float_if_float) diff --git a/petab/v1/lint.py b/petab/v1/lint.py index 6d06c492..434b5030 100644 --- a/petab/v1/lint.py +++ b/petab/v1/lint.py @@ -129,7 +129,7 @@ def check_condition_df( ) for column_name in req_cols: - if not np.issubdtype(df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(df[column_name].dtype): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) @@ -173,14 +173,14 @@ def check_measurement_df( _check_df(df, MEASUREMENT_DF_REQUIRED_COLS, "measurement") for column_name in MEASUREMENT_DF_REQUIRED_COLS: - if not np.issubdtype(df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(df[column_name].dtype): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) for column_name in MEASUREMENT_DF_OPTIONAL_COLS: - if column_name in df and not np.issubdtype( - df[column_name].dtype, np.number + if column_name in df and pd.api.types.is_string_dtype( + df[column_name].dtype ): assert_no_leading_trailing_whitespace( df[column_name].values, column_name @@ -243,7 +243,7 @@ def check_parameter_df( check_ids(df.index.values, kind="parameter") for column_name in PARAMETER_DF_REQUIRED_COLS[1:]: # 0 is PARAMETER_ID - if not np.issubdtype(df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(df[column_name].dtype): assert_no_leading_trailing_whitespace( df[column_name].values, column_name ) @@ -304,14 +304,14 @@ def check_observable_df(observable_df: pd.DataFrame) -> None: check_ids(observable_df.index.values, kind="observable") for column_name in OBSERVABLE_DF_REQUIRED_COLS[1:]: - if not np.issubdtype(observable_df[column_name].dtype, np.number): + if pd.api.types.is_string_dtype(observable_df[column_name].dtype): assert_no_leading_trailing_whitespace( observable_df[column_name].values, column_name ) for column_name in OBSERVABLE_DF_OPTIONAL_COLS: - if column_name in observable_df and not np.issubdtype( - observable_df[column_name].dtype, np.number + if column_name in observable_df and pd.api.types.is_string_dtype( + observable_df[column_name].dtype ): assert_no_leading_trailing_whitespace( observable_df[column_name].values, column_name diff --git a/petab/v1/visualize/data_overview.py b/petab/v1/visualize/data_overview.py index 349b503c..41f22ed2 100644 --- a/petab/v1/visualize/data_overview.py +++ b/petab/v1/visualize/data_overview.py @@ -69,7 +69,7 @@ def get_data_per_observable(measurement_df: pd.DataFrame) -> pd.DataFrame: my_measurements[PREEQUILIBRATION_CONDITION_ID] = ( my_measurements[PREEQUILIBRATION_CONDITION_ID] .astype("object") - .fillna("", inplace=True) + .fillna("") ) index.append(PREEQUILIBRATION_CONDITION_ID) diff --git a/pyproject.toml b/pyproject.toml index 6efd02a4..2d36dbba 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -12,7 +12,7 @@ description = "Parameter estimation tabular data" requires-python = ">=3.11" dependencies = [ "numpy>=1.15.1", - "pandas>=1.2.0,<3", + "pandas>=1.2.0", "python-libsbml>=5.17.0", "sympy", "colorama", diff --git a/tests/v1/test_petab.py b/tests/v1/test_petab.py index 564dcb7f..7fe6cb9e 100644 --- a/tests/v1/test_petab.py +++ b/tests/v1/test_petab.py @@ -508,7 +508,7 @@ def test_flatten_timepoint_specific_output_overrides(): ) pd.testing.assert_frame_equal( - problem.observable_df, observable_df_expected + problem.observable_df, observable_df_expected, check_dtype=False ) pd.testing.assert_frame_equal( problem.measurement_df, measurement_df_expected From c571de9d4cec59a415abbf8ac2da211fd99478fb Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 10 Mar 2026 15:32:06 +0100 Subject: [PATCH 131/141] Fix warnings (#475) * Fix false import warnings when running under pytest * Fix parameter names in warnings --- petab/__init__.py | 10 +++++++++- petab/v2/petab1to2.py | 4 ++-- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/petab/__init__.py b/petab/__init__.py index 348f1269..23b376c2 100644 --- a/petab/__init__.py +++ b/petab/__init__.py @@ -26,7 +26,15 @@ def __getattr__(name): return importlib.import_module("petab.v1") if name == "v2": return importlib.import_module("petab.v2") - if name not in ("__path__", "__all__", "__wrapped__"): + if name not in ( + "__path__", + "__all__", + "__wrapped__", + # accessed under pytest + "_pytestfixturefunction", + "__test__", + "__bases__", + ): warn( f"Accessing `petab.{name}` is deprecated and will be removed in " f"the next major release. Please use `petab.v1.{name}` instead.", diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index e3bd4f2d..a5101a2e 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -494,7 +494,7 @@ def update_prior(row): if new_prior_type == "log10-normal": warnings.warn( f"Prior distribution `{new_prior_type}' for parameter " - f"`{row.name}' is not supported in PEtab v2. " + f"`{row[v1.C.PARAMETER_ID]}' is not supported in PEtab v2. " "Using `log-normal` instead.", # call to `petab1to2` stacklevel=9, @@ -504,7 +504,7 @@ def update_prior(row): if new_prior_type not in v2.C.PRIOR_DISTRIBUTIONS: raise NotImplementedError( f"PEtab v2 does not support prior type `{new_prior_type}' " - f"required for parameter `{row.name}'." + f"required for parameter `{row[v1.C.PARAMETER_ID]}'." ) return new_prior_type From 7a60b2ea9302044872bf1a0c3edd3d391669d2f5 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Thu, 12 Mar 2026 08:10:00 +0100 Subject: [PATCH 132/141] Release 0.8.2 (#476) Update changelog, bump version --- CHANGELOG.md | 12 ++++++++++++ petab/version.py | 2 +- 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 18dd2866..fe7be354 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -3,6 +3,18 @@ ## 0.8 series +### 0.8.2 + +**Fixes** +* We now support pandas>=3.0 + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/471) +* Updated to the latest PEtab v2 schema + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/470) +* Fixed some warnings + (by @dweindl in https://github.com/PEtab-dev/libpetab-python/pull/475) + +**Full Changelog**: https://github.com/PEtab-dev/libpetab-python/compare/v0.8.1...v0.8.2 + ### 0.8.1 The only change in this release is requiring pandas<3 until we support diff --git a/petab/version.py b/petab/version.py index 316610ff..ab7ae256 100644 --- a/petab/version.py +++ b/petab/version.py @@ -1,3 +1,3 @@ """PEtab library version""" -__version__ = "0.8.1" +__version__ = "0.8.2" From 076f9efb89e9fb4c920654b340d37b165e8c7075 Mon Sep 17 00:00:00 2001 From: Polina Lakrisenko Date: Wed, 18 Mar 2026 15:32:42 +0100 Subject: [PATCH 133/141] Goodness of fit fix (#473) * fix mean of residuals in plot_goodness_of_fit * add possibility to choose between normalized and unnormalized errors --- petab/v1/visualize/plot_residuals.py | 34 +++++++++++++++++++++------- 1 file changed, 26 insertions(+), 8 deletions(-) diff --git a/petab/v1/visualize/plot_residuals.py b/petab/v1/visualize/plot_residuals.py index 46e83fb9..a1f2ec9b 100644 --- a/petab/v1/visualize/plot_residuals.py +++ b/petab/v1/visualize/plot_residuals.py @@ -136,6 +136,7 @@ def plot_goodness_of_fit( size: tuple = (10, 7), color=None, ax: plt.Axes | None = None, + normalized_error: bool = True, ) -> matplotlib.axes.Axes: """ Plot goodness of fit. @@ -154,6 +155,10 @@ def plot_goodness_of_fit( `matplotlib.pyplot.scatter`. ax: Axis object. + normalized_error: + Type of error to display. + If True, mean of squared normalized residuals is shown, + otherwise mean of squared residuals. Returns ------- @@ -168,12 +173,26 @@ def plot_goodness_of_fit( "are needed for goodness_of_fit" ) - residual_df = calculate_residuals( - measurement_dfs=petab_problem.measurement_df, - simulation_dfs=simulations_df, - observable_dfs=petab_problem.observable_df, - parameter_dfs=petab_problem.parameter_df, - )[0] + if normalized_error: + residual_df = calculate_residuals( + measurement_dfs=petab_problem.measurement_df, + simulation_dfs=simulations_df, + observable_dfs=petab_problem.observable_df, + parameter_dfs=petab_problem.parameter_df, + normalize=True, + )[0] + error_name = "mean of squared\nnormalized residuals" + else: + residual_df = calculate_residuals( + measurement_dfs=petab_problem.measurement_df, + simulation_dfs=simulations_df, + observable_dfs=petab_problem.observable_df, + parameter_dfs=petab_problem.parameter_df, + normalize=False, + )[0] + error_name = "mean of squared residuals" + error = np.mean(np.power(residual_df["residual"], 2)) + slope, intercept, r_value, p_value, std_err = stats.linregress( simulations_df["simulation"], petab_problem.measurement_df["measurement"], @@ -199,7 +218,6 @@ def plot_goodness_of_fit( ax.plot(x, x, linestyle="--", color="gray") ax.plot(x, intercept + slope * x, "r", label="fitted line") - mse = np.mean(np.abs(residual_df["residual"])) ax.text( 0.1, 0.70, @@ -207,7 +225,7 @@ def plot_goodness_of_fit( f"slope: {slope:.2f}\n" f"intercept: {intercept:.2f}\n" f"p-value: {p_value:.2e}\n" - f"mean squared error: {mse:.2e}\n", + f"{error_name}: {error:.2e}\n", transform=ax.transAxes, ) From f8ba6b669f7fb980d9d965f1835feadc4493bdf0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 31 Mar 2026 08:00:16 +0200 Subject: [PATCH 134/141] build(deps): bump codecov/codecov-action from 5 to 6 (#478) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 5 to 6. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v5...v6) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-version: '6' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 94098ab5..ee477781 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -51,7 +51,7 @@ jobs: run: tox -e unit - name: Coverage - uses: codecov/codecov-action@v5 + uses: codecov/codecov-action@v6 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml From f32149d403ff16d92dd2e234e797d1cae70a13a7 Mon Sep 17 00:00:00 2001 From: Maren Philipps <55318391+m-philipps@users.noreply.github.com> Date: Sun, 10 May 2026 15:04:01 +0200 Subject: [PATCH 135/141] Minor improvement in v1 to v2 converter (#479) * add Warning when removing `PARAMETER_SCALE`, `INITIALIZATION_PRIOR_TYPE`, `INITIALIZATION_PRIOR_PARAMETERS` * Use the experiments table column order that is suggested in the PEtab format * Fix description and redundancy in v2 `get_experiment_df` --- petab/v2/experiments.py | 6 ++---- petab/v2/petab1to2.py | 17 +++++++++++++++-- tests/v2/test_conversion.py | 7 +++++++ tests/v2/test_core.py | 6 ++++++ 4 files changed, 30 insertions(+), 6 deletions(-) diff --git a/petab/v2/experiments.py b/petab/v2/experiments.py index 9837b953..3a06ea76 100644 --- a/petab/v2/experiments.py +++ b/petab/v2/experiments.py @@ -11,16 +11,14 @@ def get_experiment_df( experiments_file: str | pd.DataFrame | Path | None, ) -> pd.DataFrame | None: """ - Read the provided observable file into a ``pandas.Dataframe``. + Read the provided experiments file into a ``pandas.Dataframe``. Arguments: experiments_file: Name of the file to read from or pandas.Dataframe. Returns: - Observable DataFrame + Experiments DataFrame """ - if experiments_file is None: - return experiments_file if isinstance(experiments_file, str | Path): experiments_file = pd.read_csv( diff --git a/petab/v2/petab1to2.py b/petab/v2/petab1to2.py index a5101a2e..de809acf 100644 --- a/petab/v2/petab1to2.py +++ b/petab/v2/petab1to2.py @@ -178,15 +178,15 @@ def create_experiment_id(sim_cond_id: str, preeq_cond_id: str) -> str: experiments.append( { v2.C.EXPERIMENT_ID: exp_id, - v2.C.CONDITION_ID: preeq_cond_id, v2.C.TIME: v2.C.TIME_PREEQUILIBRATION, + v2.C.CONDITION_ID: preeq_cond_id, } ) experiments.append( { v2.C.EXPERIMENT_ID: exp_id, - v2.C.CONDITION_ID: sim_cond_id, v2.C.TIME: 0, + v2.C.CONDITION_ID: sim_cond_id, } ) if experiments: @@ -523,6 +523,19 @@ def update_prior(row): errors="ignore", ) # some columns were dropped in PEtab v2 + if v1.C.INITIALIZATION_PRIOR_TYPE in df and ( + df[v1.C.INITIALIZATION_PRIOR_TYPE].notna().any() + ): + warnings.warn( + "Initialisation priors in parameter table are not supported " + "in PEtab v2.", + stacklevel=9, + ) + if not (df[v1.C.PARAMETER_SCALE] == v1.C.LIN).all(): + warnings.warn( + "Parameter scales are not supported in PEtab v2.", + stacklevel=9, + ) df.drop( columns=[ v1.C.INITIALIZATION_PRIOR_TYPE, diff --git a/tests/v2/test_conversion.py b/tests/v2/test_conversion.py index eb8f9d45..21949714 100644 --- a/tests/v2/test_conversion.py +++ b/tests/v2/test_conversion.py @@ -33,6 +33,13 @@ def test_petab1to2_remote(): @pytest.mark.filterwarnings( "ignore:.*Using `log-normal` instead.*:UserWarning" ) +@pytest.mark.filterwarnings( + "ignore:.*Initialisation priors in parameter table are not supported.*:" + "UserWarning" +) +@pytest.mark.filterwarnings( + "ignore:.*Parameter scales are not supported in PEtab v2.*:UserWarning" +) @parametrize_or_skip def test_benchmark_collection(problem_id): """Test that we can upgrade all benchmark collection models.""" diff --git a/tests/v2/test_core.py b/tests/v2/test_core.py index 2cbe3e46..22dbf0e1 100644 --- a/tests/v2/test_core.py +++ b/tests/v2/test_core.py @@ -47,6 +47,9 @@ def test_observable_table_round_trip(): assert observables == observables2 +@pytest.mark.filterwarnings( + "ignore:.*Parameter scales are not supported in PEtab v2.*:UserWarning" +) def test_condition_table_round_trip(): with tempfile.TemporaryDirectory() as tmp_dir: petab1to2(example_dir_fujita / "Fujita.yaml", tmp_dir) @@ -59,6 +62,9 @@ def test_condition_table_round_trip(): assert conditions == conditions2 +@pytest.mark.filterwarnings( + "ignore:.*Parameter scales are not supported in PEtab v2.*:UserWarning" +) def test_assert_valid(): problem = petab1to2(example_dir_fujita / "Fujita.yaml") problem.assert_valid() From f674bd593e7f62395f75a819cd194abde84b5520 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 9 Jun 2026 07:27:21 +0200 Subject: [PATCH 136/141] build(deps): bump codecov/codecov-action from 6 to 7 (#484) Bumps [codecov/codecov-action](https://github.com/codecov/codecov-action) from 6 to 7. - [Release notes](https://github.com/codecov/codecov-action/releases) - [Changelog](https://github.com/codecov/codecov-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/codecov/codecov-action/compare/v6...v7) --- updated-dependencies: - dependency-name: codecov/codecov-action dependency-version: '7' dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- .github/workflows/ci_tests.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index ee477781..00c07431 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -51,7 +51,7 @@ jobs: run: tox -e unit - name: Coverage - uses: codecov/codecov-action@v6 + uses: codecov/codecov-action@v7 with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml From 1292091188cf539fd0bb24b52cb025c9c5658ffc Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Tue, 9 Jun 2026 12:23:54 +0200 Subject: [PATCH 137/141] Lazy scipy imports (#486) For now, keep scipy optional, unless probability distributions are really needed. Fixes #483. --- petab/v1/distributions.py | 123 +++++++++++++++++++++++++------------- 1 file changed, 82 insertions(+), 41 deletions(-) diff --git a/petab/v1/distributions.py b/petab/v1/distributions.py index f8e807da..aed5fe2f 100644 --- a/petab/v1/distributions.py +++ b/petab/v1/distributions.py @@ -15,15 +15,10 @@ from typing import Any import numpy as np -from scipy.stats import ( - cauchy, - chi2, - expon, - gamma, - laplace, - norm, - rayleigh, - uniform, + +_SCIPY_IMPORT_ERROR = ( + "scipy is required for this functionality. " + "Install it with: pip install scipy" ) __all__ = [ @@ -342,6 +337,11 @@ def __init__( trunc: tuple[float, float] | None = None, log: bool | float = False, ): + try: + from scipy.stats import norm + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = norm self._loc = loc self._scale = scale super().__init__(log=log, trunc=trunc) @@ -353,13 +353,13 @@ def _sample(self, shape=None) -> np.ndarray | float: return np.random.normal(loc=self._loc, scale=self._scale, size=shape) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return norm.pdf(x, loc=self._loc, scale=self._scale) + return self._dist.pdf(x, loc=self._loc, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return norm.cdf(x, loc=self._loc, scale=self._scale) + return self._dist.cdf(x, loc=self._loc, scale=self._scale) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return norm.ppf(q, loc=self._loc, scale=self._scale) + return self._dist.ppf(q, loc=self._loc, scale=self._scale) @property def loc(self) -> float: @@ -396,6 +396,11 @@ def __init__( *, log: bool | float = False, ): + try: + from scipy.stats import uniform + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = uniform self._low = low self._high = high super().__init__(log=log) @@ -407,13 +412,13 @@ def _sample(self, shape=None) -> np.ndarray | float: return np.random.uniform(low=self._low, high=self._high, size=shape) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return uniform.pdf(x, loc=self._low, scale=self._high - self._low) + return self._dist.pdf(x, loc=self._low, scale=self._high - self._low) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return uniform.cdf(x, loc=self._low, scale=self._high - self._low) + return self._dist.cdf(x, loc=self._low, scale=self._high - self._low) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return uniform.ppf(q, loc=self._low, scale=self._high - self._low) + return self._dist.ppf(q, loc=self._low, scale=self._high - self._low) class LogUniform(Distribution): @@ -434,6 +439,11 @@ def __init__( high: float, trunc: tuple[float, float] | None = None, ): + try: + from scipy.stats import uniform + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = uniform self._logbase = np.exp(1) self._low = self._log(low) self._high = self._log(high) @@ -446,13 +456,13 @@ def _sample(self, shape=None) -> np.ndarray | float: return np.random.uniform(low=self._low, high=self._high, size=shape) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return uniform.pdf(x, loc=self._low, scale=self._high - self._low) + return self._dist.pdf(x, loc=self._low, scale=self._high - self._low) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return uniform.cdf(x, loc=self._low, scale=self._high - self._low) + return self._dist.cdf(x, loc=self._low, scale=self._high - self._low) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return uniform.ppf(q, loc=self._low, scale=self._high - self._low) + return self._dist.ppf(q, loc=self._low, scale=self._high - self._low) class Laplace(Distribution): @@ -479,6 +489,11 @@ def __init__( trunc: tuple[float, float] | None = None, log: bool | float = False, ): + try: + from scipy.stats import laplace + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = laplace self._loc = loc self._scale = scale super().__init__(log=log, trunc=trunc) @@ -490,13 +505,13 @@ def _sample(self, shape=None) -> np.ndarray | float: return np.random.laplace(loc=self._loc, scale=self._scale, size=shape) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return laplace.pdf(x, loc=self._loc, scale=self._scale) + return self._dist.pdf(x, loc=self._loc, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return laplace.cdf(x, loc=self._loc, scale=self._scale) + return self._dist.cdf(x, loc=self._loc, scale=self._scale) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return laplace.ppf(q, loc=self._loc, scale=self._scale) + return self._dist.ppf(q, loc=self._loc, scale=self._scale) @property def loc(self) -> float: @@ -536,6 +551,11 @@ def __init__( trunc: tuple[float, float] | None = None, log: bool | float = False, ): + try: + from scipy.stats import cauchy + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = cauchy self._loc = loc self._scale = scale super().__init__(log=log, trunc=trunc) @@ -544,16 +564,16 @@ def __repr__(self): return self._repr({"loc": self._loc, "scale": self._scale}) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return cauchy.pdf(x, loc=self._loc, scale=self._scale) + return self._dist.pdf(x, loc=self._loc, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return cauchy.cdf(x, loc=self._loc, scale=self._scale) + return self._dist.cdf(x, loc=self._loc, scale=self._scale) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return cauchy.ppf(q, loc=self._loc, scale=self._scale) + return self._dist.ppf(q, loc=self._loc, scale=self._scale) def _sample(self, shape=None) -> np.ndarray | float: - return cauchy.rvs(loc=self._loc, scale=self._scale, size=shape) + return self._dist.rvs(loc=self._loc, scale=self._scale, size=shape) @property def loc(self) -> float: @@ -592,6 +612,12 @@ def __init__( trunc: tuple[float, float] | None = None, log: bool | float = False, ): + try: + from scipy.stats import chi2 + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = chi2 + if isinstance(dof, float): if not dof.is_integer() or dof < 1: raise ValueError( @@ -606,16 +632,16 @@ def __repr__(self): return self._repr({"dof": self._dof}) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return chi2.pdf(x, df=self._dof) + return self._dist.pdf(x, df=self._dof) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return chi2.cdf(x, df=self._dof) + return self._dist.cdf(x, df=self._dof) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return chi2.ppf(q, df=self._dof) + return self._dist.ppf(q, df=self._dof) def _sample(self, shape=None) -> np.ndarray | float: - return chi2.rvs(df=self._dof, size=shape) + return self._dist.rvs(df=self._dof, size=shape) @property def dof(self) -> int: @@ -639,6 +665,11 @@ def __init__( scale: float, trunc: tuple[float, float] | None = None, ): + try: + from scipy.stats import expon + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = expon self._scale = scale super().__init__(log=False, trunc=trunc) @@ -646,16 +677,16 @@ def __repr__(self): return self._repr({"scale": self._scale}) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return expon.pdf(x, scale=self._scale) + return self._dist.pdf(x, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return expon.cdf(x, scale=self._scale) + return self._dist.cdf(x, scale=self._scale) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return expon.ppf(q, scale=self._scale) + return self._dist.ppf(q, scale=self._scale) def _sample(self, shape=None) -> np.ndarray | float: - return expon.rvs(scale=self._scale, size=shape) + return self._dist.rvs(scale=self._scale, size=shape) @property def scale(self) -> float: @@ -689,6 +720,11 @@ def __init__( trunc: tuple[float, float] | None = None, log: bool | float = False, ): + try: + from scipy.stats import gamma + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = gamma self._shape = shape self._scale = scale super().__init__(log=log, trunc=trunc) @@ -697,16 +733,16 @@ def __repr__(self): return self._repr({"shape": self._shape, "scale": self._scale}) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return gamma.pdf(x, a=self._shape, scale=self._scale) + return self._dist.pdf(x, a=self._shape, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return gamma.cdf(x, a=self._shape, scale=self._scale) + return self._dist.cdf(x, a=self._shape, scale=self._scale) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return gamma.ppf(q, a=self._shape, scale=self._scale) + return self._dist.ppf(q, a=self._shape, scale=self._scale) def _sample(self, shape=None) -> np.ndarray | float: - return gamma.rvs(a=self._shape, scale=self._scale, size=shape) + return self._dist.rvs(a=self._shape, scale=self._scale, size=shape) @property def shape(self) -> float: @@ -743,6 +779,11 @@ def __init__( trunc: tuple[float, float] | None = None, log: bool | float = False, ): + try: + from scipy.stats import rayleigh + except ImportError as e: + raise ImportError(_SCIPY_IMPORT_ERROR) from e + self._dist = rayleigh self._scale = scale super().__init__(log=log, trunc=trunc) @@ -750,16 +791,16 @@ def __repr__(self): return self._repr({"scale": self._scale}) def _pdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return rayleigh.pdf(x, scale=self._scale) + return self._dist.pdf(x, scale=self._scale) def _cdf_untransformed_untruncated(self, x) -> np.ndarray | float: - return rayleigh.cdf(x, scale=self._scale) + return self._dist.cdf(x, scale=self._scale) def _ppf_untransformed_untruncated(self, q) -> np.ndarray | float: - return rayleigh.ppf(q, scale=self._scale) + return self._dist.ppf(q, scale=self._scale) def _sample(self, shape=None) -> np.ndarray | float: - return rayleigh.rvs(scale=self._scale, size=shape) + return self._dist.rvs(scale=self._scale, size=shape) @property def scale(self) -> float: From ae4ea94b1588cadc8c0b0b6997914b3b21c8f719 Mon Sep 17 00:00:00 2001 From: Dilan Pathirana <59329744+dilpath@users.noreply.github.com> Date: Thu, 11 Jun 2026 15:24:10 +0200 Subject: [PATCH 138/141] Update python requirements (#487) --- .github/workflows/ci_tests.yml | 4 ++-- .github/workflows/deploy.yml | 2 +- pyproject.toml | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/ci_tests.yml b/.github/workflows/ci_tests.yml index 00c07431..b8e2b1b0 100644 --- a/.github/workflows/ci_tests.yml +++ b/.github/workflows/ci_tests.yml @@ -12,7 +12,7 @@ jobs: strategy: matrix: platform: [windows-latest, macos-latest, ubuntu-latest] - python-version: ["3.11", "3.14"] + python-version: ["3.12", "3.x"] runs-on: ${{ matrix.platform }} steps: @@ -55,4 +55,4 @@ jobs: with: token: ${{ secrets.CODECOV_TOKEN }} files: ./coverage.xml - if: matrix.platform == 'ubuntu-latest' + if: matrix.platform == 'ubuntu-latest' && matrix.python-version == '3.x' diff --git a/.github/workflows/deploy.yml b/.github/workflows/deploy.yml index 47cdc51d..9e76dcda 100644 --- a/.github/workflows/deploy.yml +++ b/.github/workflows/deploy.yml @@ -19,7 +19,7 @@ jobs: - name: Set up python uses: actions/setup-python@v6 with: - python-version: 3.13 + python-version: 3.x - name: Install dependencies / build sdist run: | diff --git a/pyproject.toml b/pyproject.toml index 2d36dbba..e0e665e3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -9,7 +9,7 @@ build-backend = "setuptools.build_meta" name = "petab" dynamic = ["version", "readme"] description = "Parameter estimation tabular data" -requires-python = ">=3.11" +requires-python = ">=3.12" dependencies = [ "numpy>=1.15.1", "pandas>=1.2.0", From 17a70e88b1edb4112a59ab99df16d87ffa919ab7 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 16 Jun 2026 13:20:31 +0200 Subject: [PATCH 139/141] build(deps-dev): bump jinja2 from 3.0.3 to 3.1.6 (#488) Bumps [jinja2](https://github.com/pallets/jinja) from 3.0.3 to 3.1.6. - [Release notes](https://github.com/pallets/jinja/releases) - [Changelog](https://github.com/pallets/jinja/blob/main/CHANGES.rst) - [Commits](https://github.com/pallets/jinja/compare/3.0.3...3.1.6) --- updated-dependencies: - dependency-name: jinja2 dependency-version: 3.1.6 dependency-type: direct:development ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e0e665e3..93369264 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -45,7 +45,7 @@ quality = [ ] reports = [ # https://github.com/spatialaudio/nbsphinx/issues/641 - "Jinja2==3.0.3", + "Jinja2==3.1.6", ] antimony = [ "antimony>=2.14.0", From 4bbffd0f81149dc2beeeb4dd1960f7efc173cbb4 Mon Sep 17 00:00:00 2001 From: Bill Hlavacek Date: Sun, 21 Jun 2026 22:51:20 -0600 Subject: [PATCH 140/141] Fix PetabStrPrinter for non-integer rational exponents (#489) A non-integer Rational exponent (e.g. a square root, exponent 1/2) is a sympy Atom but prints as the multi-token "1/2", so PetabStrPrinter emitted `sqrt(a)` as the unparenthesized `a ^ 1/2`. Since `^` binds tighter than `/`, that re-parses as `(a^1)/2 = a/2` -- a silent round-trip corruption (`petab_math_str(sympify_petab(...))` is not the identity for square roots). The `not exp.is_Atom` guard added in #421 covers non-atomic exponents but not this atomic-yet-multi-token case; parenthesize a non-integer rational exponent explicitly, so `petab_math_str(sqrt(a)) == "a ^ (1/2)"`, which re-parses correctly. Integer powers and the #421 cases are unchanged. --- petab/v1/math/printer.py | 5 ++++- tests/v1/math/test_math.py | 4 ++++ 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/petab/v1/math/printer.py b/petab/v1/math/printer.py index a421989c..f2146233 100644 --- a/petab/v1/math/printer.py +++ b/petab/v1/math/printer.py @@ -41,7 +41,10 @@ def _print_Pow(self, expr: sp.Pow): str_exp = self._print(exp) if not base.is_Atom: str_base = f"({str_base})" - if not exp.is_Atom: + # A non-integer Rational exponent (e.g. sqrt -> 1/2) is an Atom but + # prints as the multi-token "1/2", so without parentheses "x ^ 1/2" + # re-parses as (x^1)/2. Parenthesize it explicitly. + if not exp.is_Atom or (exp.is_Rational and not exp.is_Integer): str_exp = f"({str_exp})" return f"{str_base} ^ {str_exp}" diff --git a/tests/v1/math/test_math.py b/tests/v1/math/test_math.py index 60bb04b5..03bd0cbf 100644 --- a/tests/v1/math/test_math.py +++ b/tests/v1/math/test_math.py @@ -43,6 +43,10 @@ def test_printer(): assert petab_math_str(BooleanTrue()) == "true" assert petab_math_str(BooleanFalse()) == "false" assert petab_math_str((a + b) ** (c + d)) == "(a + b) ^ (c + d)" + # A non-integer rational exponent must be parenthesized, else "a ^ 1/2" + # re-parses as (a^1)/2 (i.e. sqrt(a) would round-trip to a/2). + assert petab_math_str(sp.sqrt(a)) == "a ^ (1/2)" + assert petab_math_str(a ** sp.Rational(2, 3)) == "a ^ (2/3)" def read_cases(): From 5a3b361081367cdd22f4d000ed8ee8debbec3cf1 Mon Sep 17 00:00:00 2001 From: Daniel Weindl Date: Mon, 22 Jun 2026 07:11:39 +0200 Subject: [PATCH 141/141] Require numpy>=2.1 (#491) As per NEP 29. --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 93369264..0295cfa6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -11,7 +11,7 @@ dynamic = ["version", "readme"] description = "Parameter estimation tabular data" requires-python = ">=3.12" dependencies = [ - "numpy>=1.15.1", + "numpy>=2.1", "pandas>=1.2.0", "python-libsbml>=5.17.0", "sympy",