Skip to content

chains_utils

Utilities to be used with output MCMC chains of PTArcade.

Functions

params_loader

params_loader(
    file: str | Path,
) -> dict[str, tuple[float, float] | None]

Load a parameter file and return a dictionary of the parameters with their respective values or None.

PARAMETER DESCRIPTION
file

The path to the prior.txt file.

TYPE: str | Path

RETURNS DESCRIPTION
params

A dictionary with the parameters' prior ranges.

TYPE: dict[str, tuple[float, float] | None]

Source code in src/ptarcade/chains_utils.py
def params_loader(file: str | Path) -> dict[str, tuple[float, float] | None]:
    """Load a parameter file and return a dictionary of the parameters with their respective values or None.

    Parameters
    ----------
    file : str | Path
        The path to the prior.txt file.

    Returns
    -------
    params : dict[str, tuple[float, float] | None]
        A dictionary with the parameters' prior ranges.
    """
    params = {}

    with open(file) as f:
        for line in f:
            if ':' not in line:
                continue

            key = line.split(":")[0]

            if "Uniform" in line:
                minimum = float(re.search('pmin=(.*?),', line).group(1)) # type: ignore
                maximum = float(re.search('pmax=(.*?)\\)', line).group(1)) # type: ignore

                params[key] = (minimum, maximum)

            elif "Normal" in line:
                dim = len(re.search('\\[(.*?)\\]', line).group(1).split()) # type: ignore
                for i in range(dim):
                    params[f"{key}_{i}"] = None # type: ignore

            else:
                params[key] = None # type: ignore

    return params # type: ignore

convert_chains_to_hdf

convert_chains_to_hdf(
    chains_dir: str | Path,
    burn_frac: float = 0.0,
    quick_import: bool = False,
    chain_name: str = "chain_1.txt",
    dest_path: Path | None = None,
    **kwargs
) -> None

Convert the raw output of PTArcade to HDF format and write to disk.

PARAMETER DESCRIPTION
chains_dir

Name of the directory containing the chains.

TYPE: str | Path

burn_frac

Fraction of the chain that is removed from the head (default is 0).

TYPE: float DEFAULT: 0.0

quick_import

Flag to skip importing the rednoise portion of chains (default is False).

TYPE: bool DEFAULT: False

chain_name

The name of the chain files, include the file extension of the chain files. Compressed files with extension ".gz" can be used (default is "chain_1.txt").

TYPE: str DEFAULT: 'chain_1.txt'

dest_path

The destination path including filename (default is to save in chains_dir with a unique timestamp).

TYPE: Path | None DEFAULT: None

**kwargs

Additional arguments passed to pandas.DataFrame.to_hdf

TYPE: dict DEFAULT: {}

RETURNS DESCRIPTION
None

Saves an HDF5 file to dest_path in "table" format. There is one group for each chain ("chain_N"), with a "table" dataset in each group that contains the MCMC samples. There is also a single group for parameters called "parameters", also with a dataset called "table".

Source code in src/ptarcade/chains_utils.py
def convert_chains_to_hdf(
    chains_dir: str | Path,
    burn_frac: float = 0.0,
    quick_import: bool = False,  # noqa: FBT001, FBT002
    chain_name: str = "chain_1.txt",
    dest_path: Path | None = None,
    **kwargs,
) -> None:
    """Convert the raw output of PTArcade to HDF format and write to disk.

    Parameters
    ----------
    chains_dir : str | Path
        Name of the directory containing the chains.
    burn_frac : float, optional
        Fraction of the chain that is removed from the head (default is 0).
    quick_import : bool, optional
        Flag to skip importing the rednoise portion of chains (default is False).
    chain_name : str, optional
        The name of the chain files, include the file extension of the chain files.
        Compressed files with extension ".gz" can be used (default is "chain_1.txt").
    dest_path : Path | None
        The destination path including filename (default is to save
        in chains_dir with a unique timestamp).
    **kwargs : dict
        Additional arguments passed to [pandas.DataFrame.to_hdf][]

    Returns
    -------
    None
        Saves an HDF5 file to `dest_path` in "table" format. There is one group for each
        chain ("chain_N"), with a "table" dataset in each group that contains the
        MCMC samples. There is also a single group for parameters called "parameters",
        also with a dataset called "table".

    """
    if isinstance(chains_dir, str):
        chains_dir = Path(chains_dir)

    use_index = kwargs.pop("index", False)
    complevel = kwargs.pop("complevel", 9)
    complib = kwargs.pop("complib", "blosc:lz4")

    if dest_path is None:
        dest_path = (
            chains_dir / f"chains-{chains_dir.name}-{dt.now().strftime('%Y%m%d_%H%M%S')}").with_suffix( # noqa: DTZ005
            ".h5",
        )

    params, dataframes = import_to_dataframe(chains_dir, burn_frac, quick_import, chain_name, merge_chains=False)

    for num, frame in enumerate(dataframes):
        frame.to_hdf(dest_path, key=f"chain_{num}", index=use_index, complevel=complevel, complib=complib, **kwargs)

    params.to_hdf(dest_path, key="parameters", index=use_index, complevel=complevel, complib=complib, **kwargs)
    print(f"Successfully converted to HDF. Saved at {dest_path}.")

import_to_dataframe

import_to_dataframe(
    chains_dir: str | Path,
    burn_frac: float = 0.0,
    quick_import: bool = False,
    chain_name: str = "chain_1.txt",
    merge_chains: bool = True,
) -> tuple[pd.DataFrame, pd.DataFrame]

Import the chains and their parameter file as a pandas dataframe.

Given a chain_dir that contains chains, this function imports the chains, removes burn_frac, and returns a dictionary of parameters: values and a numpy array of the merged chains.

PARAMETER DESCRIPTION
chains_dir

Name of the directory containing the chains.

TYPE: str | Path

burn_frac

Fraction of the chain that is removed from the head (default is 0).

TYPE: float DEFAULT: 0.0

quick_import

Flag to skip importing the rednoise portion of chains (default is False).

TYPE: bool DEFAULT: False

chain_name

The name of the chain files, include the file extension of the chain files. Compressed files with extension ".gz" and HDF5 files with extension ".h5" can be used (default is "chain_1.txt").

TYPE: str DEFAULT: 'chain_1.txt'

merge_chains

Whether to merge the chains into one dataframe (default is True).

TYPE: bool DEFAULT: True

RETURNS DESCRIPTION
params

Dataframe containing the parameter names and their values.

TYPE: DataFrame

chains

Dataframe array containing the merged chains without the burn-in region. Can also optionally return a list of unmerged chains if merg_chains is False.

TYPE: DataFrame | list[DataFrame]

RAISES DESCRIPTION
SystemExit

Raised when the chains have different parameters.

Source code in src/ptarcade/chains_utils.py
def import_to_dataframe(
    chains_dir: str | Path,
    burn_frac: float = 0.0,
    quick_import: bool = False,  # noqa: FBT001, FBT002
    chain_name: str = "chain_1.txt",
    merge_chains: bool = True, # noqa: FBT001, FBT002
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """Import the chains and their parameter file as a pandas dataframe.

    Given a `chain_dir` that contains chains, this function imports the chains, removes `burn_frac`, and returns
    a dictionary of parameters: values and a numpy array of the merged chains.

    Parameters
    ----------
    chains_dir : str | Path
        Name of the directory containing the chains.
    burn_frac : float, optional
        Fraction of the chain that is removed from the head (default is 0).
    quick_import : bool, optional
        Flag to skip importing the rednoise portion of chains (default is False).
    chain_name : str, optional
        The name of the chain files, include the file extension of the chain files. Compressed files
        with extension ".gz" and HDF5 files with extension ".h5" can be used (default is "chain_1.txt").
    merge_chains : bool, optional
        Whether to merge the chains into one dataframe (default is True).

    Returns
    -------
    params : pandas.DataFrame
        Dataframe containing the parameter names and their values.
    chains : pandas.DataFrame | list[pandas.DataFrame]
        Dataframe array containing the merged chains without the burn-in region. Can also optionally
        return a list of unmerged chains if `merg_chains` is False.

    Raises
    ------
    SystemExit
        Raised when the chains have different parameters.

    """
    print("Starting import from", chains_dir)

    start_time = time.time()

    if isinstance(chains_dir, str):
        chains_dir = Path(chains_dir)

    chain_ext = Path(chain_name).suffix

    if chain_ext == ".txt" or chain_ext == ".gz":
        # get all the chain directories
        directories = [x for x in chains_dir.iterdir()
                       if x.is_dir()]

        # get the parameter lits and check that all the chains have the same parameters
        params = params_loader(directories[0] / "priors.txt")

        for chain in directories:
            temp_par = params_loader(chain / "priors.txt")

            if temp_par != params:
                err = f"Chains have different parameters. {temp_par=} != {params=}"
                log.error(err)
                raise SystemExit

        n_pars = len(np.loadtxt(directories[0] / "chain_1.txt", max_rows=1))

        # add name for sampler parameters
        if n_pars - len(params) == 5:
            params.update(
                {
                    "nmodel": None,
                    "log_posterior": None,
                    "log_likelihood": None,
                    "acceptance_rate": None,
                    "n_parall": None,
                },
            )
        elif n_pars - len(params) == 4:
            params.update({"log_posterior": None, "log_likelihood": None, "acceptance_rate": None, "n_parall": None})

        # import and merge all the chains removing the burn-in
        name_list = list(params.keys())

        red_noise_filter = re.compile(".*red_noise.*")

        if quick_import and any(filter(red_noise_filter.match, name_list)): # type: ignore
            # Search reversed list for first occurence of "red_noise"
            # Return the index (remember, the list is reversed!)
            # The use of `next` and a generator makes it so that we don't have to
            # search the whole list, we stop when we get the first match
            red_noise_ind = next(i for i in enumerate(name_list[::-1]) if "red_noise" in i[1])[0]

            # Slice the list so that we begin directly after the index found above
            usecols = name_list[-1 * red_noise_ind :]

            params = {name: params[name] for name in usecols}
        else:
            usecols = name_list
        dtypes = {name: float for name in usecols}
        # import and merge all the chains removing the burn-in


        if merge_chains:
            chains = pd.concat(
                (
                    pd.read_csv(
                        (drct / chain_name),
                        sep="\t",
                        names=name_list,
                        dtype=dtypes,
                        usecols=usecols,
                    ).iloc[lambda x: int(len(x) * burn_frac) <= x.index]
                    for drct in directories
                ),
                ignore_index=True,
                sort=False,
            )

            chains = chains.dropna()

        else:
            chains = [
                pd.read_csv(
                    (drct / chain_name),
                    sep="\t",
                    names=name_list,
                    dtype=dtypes,
                    usecols=usecols,
                ).iloc[lambda x: int(len(x) * burn_frac) <= x.index]
                for drct in directories
            ]

        params = pd.DataFrame.from_dict(params).fillna(np.nan)  # need to fill `None` with a numeric value

    elif chain_ext == ".h5":

        with pd.HDFStore(chains_dir / chain_name) as h5:
            keys = h5.keys()
            chain_keys = [key for key in keys if "chain" in key]

            params = h5.get("/parameters")

            chains = pd.concat(
                [
                    pd.read_hdf(h5, key=chain_key).iloc[lambda x: int(len(x) * burn_frac) <= x.index]
                    for chain_key in chain_keys
                ],
                ignore_index=True,
                sort=False,
            )

            chains = chains.dropna()


    print(f"Finished importing   {chains_dir} in {time.time() - start_time:.2f}s")

    return params, chains

import_chains

import_chains(
    chains_dir: str | Path,
    burn_frac: float = 1 / 4,
    quick_import: bool = True,
    chain_name: str = "chain_1.txt",
) -> tuple[dict, NDArray]

Import the chains and their parameter file.

Given a chain_dir that contains chains, this function imports the chains, removes burn_frac, and returns a dictionary of parameters: values and a numpy array of the merged chains.

PARAMETER DESCRIPTION
chains_dir

Name of the directory containing the chains.

TYPE: str | Path

burn_frac

Fraction of the chain that is removed from the head (default is ¼).

TYPE: float DEFAULT: 1 / 4

quick_import

Flag to skip importing the rednoise portion of chains (default is True).

TYPE: bool DEFAULT: True

chain_name

The name of the chain files, include the file extension of the chain files. Compressed files with extension ".gz" and HDF5 files with extension ".h5" can be used (default is "chain_1.txt").

TYPE: str DEFAULT: 'chain_1.txt'

RETURNS DESCRIPTION
params

Dictionary containing the parameter names and their values.

TYPE: dict

mrgd_chain

Numpy array containing the merged chains without the burn-in region.

TYPE: NDArray

RAISES DESCRIPTION
SystemExit

Raised when the chains have different parameters.

Source code in src/ptarcade/chains_utils.py
def import_chains(chains_dir: str | Path,
                  burn_frac: float = 1/4,
                  quick_import: bool = True, # noqa: FBT001, FBT002
                  chain_name: str = "chain_1.txt") -> tuple[dict, NDArray]:
    """Import the chains and their parameter file.

    Given a `chain_dir` that contains chains, this function imports the chains, removes `burn_frac`, and returns
    a dictionary of parameters: values and a numpy array of the merged chains.

    Parameters
    ----------
    chains_dir : str | Path
        Name of the directory containing the chains.
    burn_frac : float, optional
        Fraction of the chain that is removed from the head (default is 1/4).
    quick_import : bool, optional
        Flag to skip importing the rednoise portion of chains (default is True).
    chain_name : str, optional
        The name of the chain files, include the file extension of the chain files. Compressed files
        with extension ".gz" and HDF5 files with extension ".h5" can be used (default is "chain_1.txt").

    Returns
    -------
    params : dict
        Dictionary containing the parameter names and their values.
    mrgd_chain : NDArray
        Numpy array containing the merged chains without the burn-in region.

    Raises
    ------
    SystemExit
        Raised when the chains have different parameters.

    """
    params, mrgd_chain = import_to_dataframe(chains_dir, burn_frac, quick_import, chain_name)

    mrgd_chain = mrgd_chain.to_numpy(dtype=float)
    params = params.to_dict(orient="list")

    return params, mrgd_chain

chain_filter

chain_filter(
    chain: NDArray,
    params: list[str],
    model_id: int | None,
    par_to_plot: list[str] | None,
) -> tuple[NDArray, list[str]]

Filter chains.

This function filters the rows in the provided chain according to the specified model and parameters. It selects rows that correspond to the specified model ID and parameters to plot their posteriors.

PARAMETER DESCRIPTION
chain

The Markov Chain Monte Carlo (MCMC) chain to be filtered. This should be a multi-dimensional array where each row represents a state in the chain, and each column represents a parameter.

TYPE: NDArray

params

The names of the parameters in the chain. This should be a list of strings with the same length as the number of columns in the chain.

TYPE: list[str]

model_id

The ID of the model to filter the chain for. This should be either 0 or 1. If None, the function will select rows for model 0.

TYPE: int | None

par_to_plot

The names of the parameters to filter the chain for. If None, the function will select all parameters except 'nmodel', 'log_posterior', 'log_likelihood', 'acceptance_rate', and 'n_parall', and parameters containing '+' or '-'.

TYPE: list[str] | None

RETURNS DESCRIPTION
chain

The filtered chain, containing only rows corresponding to the specified model ID and parameters.

TYPE: NDArray

filtered_par

The list of filtered parameter names.

TYPE: list[str]

RAISES DESCRIPTION
SystemExit

If the provided model_id is not an integer equal to 0 or 1.

Notes

This function filters the chain in-place, meaning that the original chain will be modified.

Source code in src/ptarcade/chains_utils.py
def chain_filter(
        chain: NDArray,
        params: list[str],
        model_id: int | None,
        par_to_plot: list[str] | None,
) -> tuple[NDArray, list[str]]:
    """Filter chains.

    This function filters the rows in the provided chain according to the specified model and parameters. It selects
    rows that correspond to the specified model ID and parameters to plot their posteriors.

    Parameters
    ----------
    chain : NDArray
        The Markov Chain Monte Carlo (MCMC) chain to be filtered. This should be a multi-dimensional array where each
        row represents a state in the chain, and each column represents a parameter.
    params : list[str]
        The names of the parameters in the chain. This should be a list of strings with the same length as the number
        of columns in the chain.
    model_id : int | None
        The ID of the model to filter the chain for. This should be either 0 or 1. If None, the function will select
        rows for model 0.
    par_to_plot : list[str] | None
        The names of the parameters to filter the chain for. If None, the function will select all parameters except
        'nmodel', 'log_posterior', 'log_likelihood', 'acceptance_rate', and 'n_parall', and parameters
        containing '+' or '-'.

    Returns
    -------
    chain : NDArray
        The filtered chain, containing only rows corresponding to the specified model ID and parameters.
    filtered_par: list[str]
        The list of filtered parameter names.

    Raises
    ------
    SystemExit
        If the provided `model_id` is not an integer equal to 0 or 1.

    Notes
    -----
    This function filters the chain in-place, meaning that the original chain will be modified.

    """
    if 'nmodel' in list(params):
        nmodel_idx = list(params).index('nmodel')

        if model_id is None:
            print("No model ID specified, posteriors are plotted for model 0")
            filter_model = chain[:, nmodel_idx] < 0.5
        elif model_id == 0:
            filter_model = chain[:, nmodel_idx] < 0.5
        elif model_id == 1:
            filter_model = chain[:, nmodel_idx] > 0.5
        else:
            err = f"'model_idx' can only be an integer equal to 0 or 1. I got {model_id=}."
            log.error(err)
            raise SystemExit

        chain = chain[filter_model]
    else:
        pass

    if par_to_plot:
        filter_par = [list(params).index(x) for x in par_to_plot if x in params]
    else:
        filter_par = [list(params).index(x)
                for x in params
                if x
                not in [
                    "nmodel",
                    "log_posterior",
                    "log_likelihood",
                    "acceptance_rate",
                    "n_parall"]
                and '+' not in x
                and '-' not in x]

    filtered_par = [par.replace('_','-') for par in params[filter_par]] # type: ignore
    filtered_par = [par.replace('np-', '') if 'gw-bhb-np' in par else par for par in filtered_par]

    return chain[:, filter_par], filtered_par

calc_df

calc_df(chain: NDArray) -> NDArray

Calculate dropout bayes factor

PARAMETER DESCRIPTION
chain

input dropout chain with shape assumed to be (n_bootstrap, n_samples)

TYPE: NDArray

RETURNS DESCRIPTION
bayes_facs

dropout bayes factor with shape (n_bootstrap,)

TYPE: NDArray

Source code in src/ptarcade/chains_utils.py
def calc_df(chain: NDArray) -> NDArray:
    """Calculate dropout bayes factor

    Parameters
    ----------
    chain : NDArray
        input dropout chain with shape assumed to be (n_bootstrap, n_samples)

    Returns
    -------
    bayes_facs : NDArray
        dropout bayes factor with shape (n_bootstrap,)
    """
    bayes_facs = np.full(chain.shape[0], 0.0)

    for ii, arr in enumerate(chain):
        bayes_facs[ii] = model_utils.odds_ratio(arr)[0]

    return bayes_facs

bf_bootstrap

bf_bootstrap(
    chain: NDArray, burn: int = 0
) -> tuple[float, float]

Compute mean and variance of bayes factor.

This function computes the mean and variance of the bayes factor after bootstrapping for a given chain.

PARAMETER DESCRIPTION
chain

The Markov Chain Monte Carlo (MCMC) chain to be analyzed. This should be a multi-dimensional array where each row represents a state in the chain, and each column represents a parameter.

TYPE: NDArray

burn

The burn-in period to be discarded from the start of the chain. This should be a non-negative integer. If not provided, no burn-in period will be discarded.

TYPE: int DEFAULT: 0

RETURNS DESCRIPTION
mean

The mean of the bootstrapped degrees of freedom distribution.

TYPE: float

var

The variance of the bootstrapped degrees of freedom distribution.

TYPE: float

Notes

This function uses the acor library to compute the autocorrelation time of the chain, which is then used to thin the chain. The thinned chain is then bootstrapped using the 'bootstrap' function with the calc_df user statistic, to obtain a distribution of degrees of freedom. The mean and variance of this distribution are then computed.

Source code in src/ptarcade/chains_utils.py
def bf_bootstrap(chain: NDArray, burn: int = 0) -> tuple[float, float]:
    """Compute mean and variance of bayes factor.

    This function computes the mean and variance of the bayes factor after bootstrapping
    for a given chain.

    Parameters
    ----------
    chain : NDArray
        The Markov Chain Monte Carlo (MCMC) chain to be analyzed. This should be a multi-dimensional array where each
        row represents a state in the chain, and each column represents a parameter.
    burn : int, optional
        The burn-in period to be discarded from the start of the chain. This should be a non-negative integer.
        If not provided, no burn-in period will be discarded.

    Returns
    -------
    mean : float
        The mean of the bootstrapped degrees of freedom distribution.
    var : float
        The variance of the bootstrapped degrees of freedom distribution.

    Notes
    -----
    This function uses the `acor` library to compute the autocorrelation time of the chain, which is then used to thin
    the chain. The thinned chain is then bootstrapped using the 'bootstrap' function with the `calc_df` user statistic,
    to obtain a distribution of degrees of freedom. The mean and variance of this distribution are then computed.

    """
    corr_len = int(acor.acor(chain[burn:,-1])[0])

    test = chain[burn::corr_len]

    df_dist_bs = bootstrap(test[:,0], n_bootstraps = 50000, user_statistic = calc_df)

    mean = np.mean(df_dist_bs)
    var = np.var(df_dist_bs)

    return mean, np.sqrt(var)

compute_bf

compute_bf(
    chain: NDArray,
    params: list[str],
    bootstrap: bool = False,
) -> tuple[float, float]

Compute the Bayes factor and estimate its uncertainty.

PARAMETER DESCRIPTION
chain

The Markov Chain Monte Carlo (MCMC) chain to be analyzed. This should be a multi-dimensional array where each row represents a state in the chain, and each column represents a parameter. The 'nmodel' and 'log_posterior' columns should be used to specify the model index and the log of the posterior probabilities.

TYPE: NDArray

params

The names of the parameters in the chain. This should be a list of strings with the same length as the number of columns in the chain. It is expected to contain 'nmodel' and 'log_posterior', which will be used to filter the chain based on the model index and compute the Bayes factor.

TYPE: list[str]

bootstrap

A flag indicating whether to compute the Bayes factor using a bootstrap method. If True, the Bayes factor will be computed using the 'get_bf' function. The bootsrap calculation is significantly slower. Defaults to False.

TYPE: bool DEFAULT: False

RETURNS DESCRIPTION
bf

The computed Bayes factor. This gives the evidence for model 0 over model 1. A higher value provides stronger evidence for model 0.

TYPE: float

unc

The computed uncertainty of the Bayes factor.

TYPE: float

RAISES DESCRIPTION
SystemExit

If nmodel or log_posterior is not found in the params list.

Source code in src/ptarcade/chains_utils.py
def compute_bf(chain: NDArray, params: list[str], bootstrap: bool = False) -> tuple[float, float]:  # noqa: FBT001, FBT002
    """Compute the Bayes factor and estimate its uncertainty.

    Parameters
    ----------
    chain : NDArray
        The Markov Chain Monte Carlo (MCMC) chain to be analyzed. This should be a multi-dimensional array where each
        row represents a state in the chain, and each column represents a parameter. The 'nmodel' and 'log_posterior'
        columns should be used to specify the model index and the log of the posterior probabilities.
    params : list[str]
        The names of the parameters in the chain. This should be a list of strings with the same length as the number of
        columns in the chain. It is expected to contain 'nmodel' and 'log_posterior', which will be used to filter the
        chain based on the model index and compute the Bayes factor.
    bootstrap : bool, optional
        A flag indicating whether to compute the Bayes factor using a bootstrap method. If True, the Bayes factor will
        be computed using the 'get_bf' function. The bootsrap calculation is significantly slower. Defaults to False.

    Returns
    -------
    bf : float
        The computed Bayes factor. This gives the evidence for model 0 over model 1. A higher value provides
        stronger evidence for model 0.
    unc : float
        The computed uncertainty of the Bayes factor.

    Raises
    ------
    SystemExit
        If `nmodel` or `log_posterior` is not found in the `params` list.

    """
    try:
        nmodel_idx = list(params).index('nmodel')
    except ValueError:
        err = ("'nmodel' was not found in params.\n"
               f"You supplied {params=}")
        log.error(err)
        raise SystemExit from None
    try:
        posterior_idx = list(params).index('log_posterior')
    except ValueError:
        err = ("'log_posterior' was not found in params.\n"
               f"You supplied {params=}")
        log.error(err)
        raise SystemExit from None


    if bootstrap:
        data = chain[:,[nmodel_idx, posterior_idx]]
        bf, unc = bf_bootstrap(data)

    else:
        bf, unc = model_utils.odds_ratio(chain[:, nmodel_idx], models=[0,1])

    return bf, unc

bisection

bisection(
    f: Callable[[Any], float],
    a: float,
    b: float,
    tol: float,
) -> float | None

Find roots for real-valued function using bisection method.

This function implements the bisection method for root finding of a real-valued function. It recursively divides the interval [a, b] into two subintervals until the absolute value of f evaluated at the midpoint is less than the specified tolerance, at which point it returns the midpoint as an approximation of the root.

PARAMETER DESCRIPTION
f

The function for which the root is to be found. It must be real-valued and continuous on the interval [a, b].

TYPE: Callable[[Any], float]

a

The left endpoint of the interval in which the root is sought. It must be less than b.

TYPE: float

b

The right endpoint of the interval in which the root is sought. It must be greater than a.

TYPE: float

tol

The tolerance for the root approximation. The function will return when the absolute value of f evaluated at the midpoint is less than tol. It must be greater than 0.

TYPE: float

RETURNS DESCRIPTION
float | None

The midpoint of the final subinterval if a root is found; None otherwise. The root approximation m is guaranteed to satisfy |f(m)| < tol if the function converges.

RAISES DESCRIPTION
SystemExit

If a is not less than b, or if tol is not greater than 0.

Notes

This is a recursive implementation of the bisection method. The bisection method assumes that the function f changes sign over the interval [a, b], which implies that a root exists in this interval by the Intermediate Value Theorem.

Source code in src/ptarcade/chains_utils.py
def bisection(f: Callable[[Any], float], a: float, b: float, tol: float) -> float | None:
    """Find roots for real-valued function using bisection method.

    This function implements the bisection method for root finding of a real-valued function. It recursively divides
    the interval [a, b] into two subintervals until the absolute value of f evaluated at the midpoint is less than
    the specified tolerance, at which point it returns the midpoint as an approximation of the root.

    Parameters
    ----------
    f : Callable[[Any], float]
        The function for which the root is to be found. It must be real-valued and continuous on the interval [a, b].
    a : float
        The left endpoint of the interval in which the root is sought. It must be less than b.
    b : float
        The right endpoint of the interval in which the root is sought. It must be greater than a.
    tol : float
        The tolerance for the root approximation. The function will return when the absolute value of f evaluated at
        the midpoint is less than tol. It must be greater than 0.

    Returns
    -------
    float | None
        The midpoint of the final subinterval if a root is found; None otherwise. The root approximation m is guaranteed
        to satisfy |f(m)| < tol if the function converges.

    Raises
    ------
    SystemExit
        If a is not less than b, or if tol is not greater than 0.

    Notes
    -----
    This is a recursive implementation of the bisection method. The bisection method assumes that the function f changes
    sign over the interval [a, b], which implies that a root exists in this interval by the Intermediate Value Theorem.

    """
    if a >= b:
        err = f"a is not less than b {a=} {b=}."
        log.error(err)
        raise SystemExit

    if tol <= 0:
        err = f"tol is not greater than 0 {tol=}."
        log.error(err)
        raise SystemExit

    if np.sign(f(a)) == np.sign(f(b)):
        return None

    m = (a + b) / 2

    if np.abs(f(m)) < tol:
        return m
    elif np.sign(f(a)) == np.sign(f(m)):
        return bisection(f, m, b, tol)
    elif np.sign(f(b)) == np.sign(f(m)):
        return bisection(f, a, m, tol)
    else:
        return None

k_ratio_aux_1D

k_ratio_aux_1D(
    sample: MCSamples,
    bf: float,
    par: str,
    par_range: list[float],
    k_ratio: float,
) -> float | None

Returns the bound value for a given k-ratio in a 1D posterior density plot.

PARAMETER DESCRIPTION
sample

An instance of the MCSamples class, containing the multivariate Monte Carlo samples on which the function is operating.

TYPE: MCSamples

bf

The Bayes factor for the exotic + SMBHB vs. SMBHB model. Represents the strength of evidence in favour of the exotic model.

TYPE: float

par

The name of the parameter for which the k-ratio bound should be computed.

TYPE: str

par_range

The lower and upper prior limits for the parameter. It is represented as a list where the first element is the lower limit and the second element is the upper limit.

TYPE: list[float]

k_ratio

The fraction of plateau height at which the height level is determined. This is used to compute the height_KB, which represents the height at which the bound is computed.

TYPE: float

RETURNS DESCRIPTION
k_val

The computed k-ratio bound value. This is the value of the parameter at which the 1D posterior density crosses the height_KB.

TYPE: float

RAISES DESCRIPTION
ValueError

If the integration or the bisection search fails due to numerical issues or if the specified parameter range does not contain a valid root.

Source code in src/ptarcade/chains_utils.py
def k_ratio_aux_1D(
    sample: MCSamples,
    bf: float,
    par: str,
    par_range: list[float],
    k_ratio: float) -> float | None:
    """Returns the bound value for a given k-ratio in a 1D posterior density plot.

    Parameters
    ----------
    sample : MCSamples
        An instance of the MCSamples class, containing the multivariate
        Monte Carlo samples on which the function is operating.
    bf : float
        The Bayes factor for the exotic + SMBHB vs. SMBHB model.
        Represents the strength of evidence in favour of the exotic model.
    par : str
        The name of the parameter for which the k-ratio bound should be computed.
    par_range : list[float]
        The lower and upper prior limits for the parameter. It is represented as a list where
        the first element is the lower limit and the second element is the upper limit.
    k_ratio : float
        The fraction of plateau height at which the height level is determined. This is used
        to compute the height_KB, which represents the height at which the bound is computed.

    Returns
    -------
    k_val : float
        The computed k-ratio bound value. This is the value of the parameter at which the
        1D posterior density crosses the height_KB.

    Raises
    ------
    ValueError
        If the integration or the bisection search fails due to numerical issues or
        if the specified parameter range does not contain a valid root.

    """

    density1D = MCSamples.get1DDensity(sample, par)

    norm = integrate.quad(density1D, par_range[0], par_range[1], full_output=1)[0]

    prior = 1/(par_range[1]-par_range[0])

    height_KB = k_ratio*prior/bf*norm

    k_val = bisection(f = (lambda x: density1D(x)-height_KB), a = par_range[0], b = par_range[1], tol = 10**(-8))

    return k_val

k_ratio_aux_2D

k_ratio_aux_2D(
    sample: MCSamples,
    bf: float,
    par_1: str,
    par_2: str,
    par_range_1: list[float],
    par_range_2: list[float],
    k_ratio: float,
) -> float

Returns the height level corresponding to the given k-ratio in a 2D posterior density plot.

PARAMETER DESCRIPTION
sample

An instance of the MCSamples class, containing the multivariate Monte Carlo samples on which the function is operating.

TYPE: MCSamples

bf

The Bayes factor for the exotic + SMBHB vs. SMBHB model. Represents the strength of evidence in favour of the exotic model.

TYPE: float

par_1

The names of the two parameters for which the k-ratio bound should be computed.

TYPE: str

par_2

The names of the two parameters for which the k-ratio bound should be computed.

TYPE: str

par_range_1

The lower and upper prior limits for the parameters. Each is represented as a list where the first element is the lower limit and the second element is the upper limit.

TYPE: list[float]

par_range_2

The lower and upper prior limits for the parameters. Each is represented as a list where the first element is the lower limit and the second element is the upper limit.

TYPE: list[float]

k_ratio

The fraction of plateau height at which the height level is determined. This is used to compute the height_KB, which represents the height at which the bound is computed.

TYPE: float

RETURNS DESCRIPTION
height_KB

The computed height level in the 2D posterior density plot. This is the height at which the density equals the computed height_KB.

TYPE: float

RAISES DESCRIPTION
ValueError

If the double integration fails due to numerical issues.

Source code in src/ptarcade/chains_utils.py
def k_ratio_aux_2D(
    sample: MCSamples,
    bf: float,
    par_1: str,
    par_2: str,
    par_range_1: list[float],
    par_range_2: list[float],
    k_ratio: float) -> float:
    """
    Returns the height level corresponding to the given k-ratio in a 2D posterior density plot.

    Parameters
    ----------
    sample : MCSamples
        An instance of the MCSamples class, containing the multivariate
        Monte Carlo samples on which the function is operating.
    bf : float
        The Bayes factor for the exotic + SMBHB vs. SMBHB model.
        Represents the strength of evidence in favour of the exotic model.
    par_1, par_2 : str
        The names of the two parameters for which the k-ratio bound should be computed.
    par_range_1, par_range_2 : list[float]
        The lower and upper prior limits for the parameters. Each is represented as a list
        where the first element is the lower limit and the second element is the upper limit.
    k_ratio : float
        The fraction of plateau height at which the height level is determined. This is used
        to compute the height_KB, which represents the height at which the bound is computed.

    Returns
    -------
    height_KB : float
        The computed height level in the 2D posterior density plot. This is the height at which
        the density equals the computed height_KB.

    Raises
    ------
    ValueError
        If the double integration fails due to numerical issues.

    """
    density2D = MCSamples.get2DDensity(sample, par_1, par_2)

    with warnings.catch_warnings():
        warnings.simplefilter("ignore")
        #Calculate prior value at each point
        norm = integrate.dblquad(density2D, par_range_2[0], par_range_2[1], par_range_1[0], par_range_1[1])[0]

    prior = 1/(par_range_2[1]-par_range_2[0]) * 1/(par_range_1[1]-par_range_1[0])

    height_KB = k_ratio*prior/bf*norm

    return height_KB

get_k_levels

get_k_levels(
    sample: MCSamples,
    pars: list[str],
    priors: dict,
    bf: float,
    k_ratio: float,
) -> tuple[NDArray, NDArray]

Compute and return the 1D and 2D k-ratio bounds for a given set of parameters.

PARAMETER DESCRIPTION
sample

An instance of the MCSamples class, containing the multivariate Monte Carlo samples on which the function is operating.

TYPE: MCSamples

pars

The list of all parameters for which the k-ratio bounds should be computed. The parameters 'gw-bhb-0' and 'gw-bhb-1' are excluded from this computation.

TYPE: list[str]

priors

A dictionary containing the lower and upper prior limits for each parameter. Each key-value pair in the dictionary corresponds to a parameter and its limits, respectively.

TYPE: dict

bf

The Bayes factor for the exotic + SMBHB vs. SMBHB model. Represents the strength of evidence in favour of the exotic model.

TYPE: float

k_ratio

The fraction of plateau height at which the height level is determined.

TYPE: float

RETURNS DESCRIPTION
NDArray

numpy array representing the 1D k-ratio bounds. Each element in the array is a list where the first elements are the parameter names and the last element is the computed k-ratio bound.

NDArray

numpy array representing the 2D k-ratio bounds. Each element in the array is a list where the first elements are the parameter names and the last element is the computed k-ratio bound.

Source code in src/ptarcade/chains_utils.py
def get_k_levels(sample: MCSamples,
                 pars: list[str],
                 priors: dict,
                 bf: float,
                 k_ratio: float) -> tuple[NDArray, NDArray]:
    """Compute and return the 1D and 2D k-ratio bounds for a given set of parameters.

    Parameters
    ----------
    sample : MCSamples
        An instance of the MCSamples class, containing the multivariate
        Monte Carlo samples on which the function is operating.
    pars : list[str]
        The list of all parameters for which the k-ratio bounds should be computed.
        The parameters 'gw-bhb-0' and 'gw-bhb-1' are excluded from this computation.
    priors : dict
        A dictionary containing the lower and upper prior limits for each parameter.
        Each key-value pair in the dictionary corresponds to a parameter and its limits,
        respectively.
    bf : float
        The Bayes factor for the exotic + SMBHB vs. SMBHB model.
        Represents the strength of evidence in favour of the exotic model.
    k_ratio : float
        The fraction of plateau height at which the height level is determined.

    Returns
    -------
    NDArray
        numpy array representing the 1D k-ratio bounds.
        Each element in the array is a list where the first elements are the parameter names
        and the last element is the computed k-ratio bound.
    NDArray
        numpy array representing the 2D k-ratio bounds.
        Each element in the array is a list where the first elements are the parameter names
        and the last element is the computed k-ratio bound.

    """
    np_pars = [p for p in pars if p not in ["gw-bhb-0", "gw-bhb-1"]]

    levels_2d = []
    for par_1, par_2 in combinations(np_pars, r=2):
        levels_2d.append(
            [par_1,
             par_2,
             k_ratio_aux_2D(
                sample=sample,
                bf=bf,
                par_1=par_1,
                par_2=par_2,
                par_range_1=priors[par_1],
                par_range_2=priors[par_2],
                k_ratio=k_ratio)])

    levels_1d = []
    for par in np_pars:
        levels_1d.append(
            [par,
            k_ratio_aux_1D(
                sample = sample,
                bf = bf,
                par = par,
                par_range = priors[par],
                k_ratio = k_ratio)])

    return np.array(levels_1d), np.array(levels_2d)

get_bayes_est

get_bayes_est(
    samples: MCSamples, params: list[str]
) -> dict[str, tuple[float, float]]

Compute and return the Bayesian estimates for a given set of parameters based on a sample of data.

PARAMETER DESCRIPTION
samples

An instance of the MCSamples class, containing the multivariate Monte Carlo samples on which the function is operating.

TYPE: MCSamples

params

The list of parameters for which the Bayesian estimates should be computed.

TYPE: list[str]

RETURNS DESCRIPTION
x

A dictionary representing the Bayesian estimates for each parameter. Each key-value pair in the dictionary corresponds to a parameter and its Bayesian estimate, respectively. Each estimate is represented as a tuple, where the first element is the mean and the second element is the standard deviation.

TYPE: dict[str, tuple[float, float]]

Source code in src/ptarcade/chains_utils.py
def get_bayes_est(samples: MCSamples, params: list[str]) -> dict[str, tuple[float, float]]:
    """Compute and return the Bayesian estimates for a given set of parameters based on a sample of data.

    Parameters
    ----------
    samples : MCSamples
        An instance of the MCSamples class, containing the multivariate
        Monte Carlo samples on which the function is operating.
    params : list[str]
        The list of parameters for which the Bayesian estimates should be computed.

    Returns
    -------
    x : dict[str, tuple[float, float]]
        A dictionary representing the Bayesian estimates for each parameter.
        Each key-value pair in the dictionary corresponds to a parameter and its
        Bayesian estimate, respectively. Each estimate is represented as a tuple,
        where the first element is the mean and the second element is the standard
        deviation.

    """
    x = list(samples.setMeans())
    xerr = list(np.sqrt(samples.getVars()))
    x = zip(x, xerr)
    x = dict(zip(params, x))
    return x

get_max_pos

get_max_pos(
    params: list[str],
    bayes_est: dict[str, tuple[float, float]],
    sample: MCSamples,
    priors: dict[str, tuple[float, float]],
    spc: int = 10,
) -> dict[str, float]

Compute and return the maximum posterior position for a given set of parameters.

PARAMETER DESCRIPTION
params

The list of parameters for which the maximum posterior position should be computed.

TYPE: list[str]

bayes_est

A dictionary containing the Bayesian estimates for each parameter. Each key-value pair in the dictionary corresponds to a parameter and its Bayesian estimate, respectively. Each estimate is represented as a tuple, where the first element is the mean and the second element is the standard deviation.

TYPE: dict[str, tuple[float, float]]

sample

An instance of the MCSamples class, containing the multivariate Monte Carlo samples on which the function is operating.

TYPE: MCSamples

priors

A dictionary containing the lower and upper prior limits for each parameter. Each key-value pair in the dictionary corresponds to a parameter and its limits, respectively.

TYPE: dict[str, tuple[float, float]]

spc

The number of equally spaced points to be considered within the bounds of each parameter when searching for the maximum posterior position. Default is 10.

TYPE: int DEFAULT: 10

RETURNS DESCRIPTION
out

A dictionary representing the maximum posterior positions for each parameter. Each key-value pair in the dictionary corresponds to a parameter and its maximum posterior position, respectively.

TYPE: dict[str, float]

Source code in src/ptarcade/chains_utils.py
def get_max_pos(params: list[str],
                bayes_est: dict[str, tuple[float, float]],
                sample: MCSamples,
                priors: dict[str, tuple[float, float]],
                spc: int = 10) -> dict[str, float]:
    """Compute and return the maximum posterior position for a given set of parameters.

    Parameters
    ----------
    params : list[str]
        The list of parameters for which the maximum posterior position should be computed.
    bayes_est : dict[str, tuple[float, float]]
        A dictionary containing the Bayesian estimates for each parameter.
        Each key-value pair in the dictionary corresponds to a parameter and its
        Bayesian estimate, respectively. Each estimate is represented as a tuple,
        where the first element is the mean and the second element is the standard
        deviation.
    sample : MCSamples
        An instance of the MCSamples class, containing the multivariate
        Monte Carlo samples on which the function is operating.
    priors : dict[str, tuple[float, float]]
        A dictionary containing the lower and upper prior limits for each parameter.
        Each key-value pair in the dictionary corresponds to a parameter and its limits,
        respectively.
    spc : int, optional
        The number of equally spaced points to be considered within the bounds of
        each parameter when searching for the maximum posterior position. Default is 10.

    Returns
    -------
    out : dict[str, float]
        A dictionary representing the maximum posterior positions for each parameter.
        Each key-value pair in the dictionary corresponds to a parameter and its
        maximum posterior position, respectively.

    """
    out = {}

    for par in params:
        bounds = priors.get(par)

        density =  MCSamples.get1DDensity(sample, par, normalized=False)

        mind = lambda x: -density(x)
        if not bounds:
            x = minimize(mind, np.array(bayes_est.get(par)[0]))
            x0 = x.x
            d0 = mind(x0)
        else:
            ini = np.linspace(bounds[0], bounds[1], spc)
            ini = [val for val in ini]
            ini.append(bayes_est.get(par)[0])
            x = []
            for i in range(len(ini)):
                x1 = minimize(mind, ini[i])
                x.append(x1.x)
                bnds = Bounds(bounds[0], bounds[1])
                x2 = minimize(mind, ini[i], bounds=bnds)
                x.append(x2.x)
            d0 = 0
            x0 = 0
            for val in x:
                if(mind(val)<d0):
                    x0 = val
                    d0 = mind(val)

        out[par] = x0[0]

    return out

get_c_levels

get_c_levels(
    sample: MCSamples, pars: list[str], levels: list[float]
) -> NDArray

Compute and return the highest posterior interval (HPI) for a given set of parameters and confidence levels.

PARAMETER DESCRIPTION
sample

An instance of the MCSamples class, containing the multivariate Monte Carlo samples on which the function is operating.

TYPE: MCSamples

pars

The list of parameters for which the HPI should be computed.

TYPE: list[str]

levels

The list of confidence levels for which the HPI should be computed. Each value in the list should be between 0 and 1.

TYPE: list[float]

RETURNS DESCRIPTION
NDArray

A numpy array representing the HPI for each parameter and each confidence level. Each element in the array is a list, where the first element is a parameter name and the second element is a list of HPIs for each confidence level.

Source code in src/ptarcade/chains_utils.py
def get_c_levels(sample: MCSamples, pars: list[str], levels: list[float]) -> NDArray:
    """Compute and return the highest posterior interval (HPI) for a given set of parameters and confidence levels.

    Parameters
    ----------
    sample : MCSamples
        An instance of the MCSamples class, containing the multivariate
        Monte Carlo samples on which the function is operating.
    pars : list[str]
        The list of parameters for which the HPI should be computed.
    levels : list[float]
        The list of confidence levels for which the HPI should be computed.
        Each value in the list should be between 0 and 1.

    Returns
    -------
    NDArray
        A numpy array representing the HPI for each parameter and each confidence level.
        Each element in the array is a list, where the first element is a parameter name and
        the second element is a list of HPIs for each confidence level.

    """
    hpi = []
    for par in pars:
        density = MCSamples.get1DDensity(sample, par)

        points = [[]] * len(levels)
        for idx, level in enumerate(levels):
            x = density.getLimits(level)
            if not x[-1] and not x[-2]:
                points[idx] = [x[0], x[1]]
            elif not x[-1]:
                points[idx] = [False, x[1]]
            elif not x[-2]:
                points[idx] = [x[0], False]
            else:
                points[idx] = [False, False]

        hpi.append([par, points])

    return np.array(hpi, dtype='object')