Sweeps and Suites¶

This notebook demonstrates synth-bench's systematic experiment API: severity_sweep, difficulty_sweep, experiment_grid, and BenchSuite.

In [1]:

Copied!





import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from synthbench import (
    BenchSuite,
    FriedmanDGP,
    LinearDGP,
    MissingDataCorruptor,
    difficulty_sweep,
    experiment_grid,
    severity_sweep,
)

plt.rcParams["figure.dpi"] = 72
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

from synthbench import (
    BenchSuite,
    FriedmanDGP,
    LinearDGP,
    MissingDataCorruptor,
    difficulty_sweep,
    experiment_grid,
    severity_sweep,
)

plt.rcParams["figure.dpi"] = 72

severity_sweep — systematically vary corruptor severity¶

In [2]:

Copied!





dgp = FriedmanDGP(task_type="classification")
results = severity_sweep(
    dgp,
    MissingDataCorruptor,
    severities=["low", "medium", "high"],
    n_samples=300,
    n_features=10,
    random_state=0,
)

rows = []
for sev, r in zip(["low", "medium", "high"], results, strict=False):
    be = r.metadata["bayes_error"]
    rows.append(
        {
            "severity": sev,
            "missing_frac": round(np.isnan(r.X).mean(), 3),
            "bayes_error": round(be, 4) if be is not None else None,
        }
    )
pd.DataFrame(rows)
dgp = FriedmanDGP(task_type="classification")
results = severity_sweep(
    dgp,
    MissingDataCorruptor,
    severities=["low", "medium", "high"],
    n_samples=300,
    n_features=10,
    random_state=0,
)

rows = []
for sev, r in zip(["low", "medium", "high"], results, strict=False):
    be = r.metadata["bayes_error"]
    rows.append(
        {
            "severity": sev,
            "missing_frac": round(np.isnan(r.X).mean(), 3),
            "bayes_error": round(be, 4) if be is not None else None,
        }
    )
pd.DataFrame(rows)

Out[2]:

	severity	missing_frac	bayes_error
0	low	0.05	None
1	medium	0.15	None
2	high	0.30	None

difficulty_sweep — systematically vary DGP complexity¶

In [3]:

Copied!





results = difficulty_sweep(
    LinearDGP,
    complexities=["low", "medium", "high"],
    n_samples=300,
    n_features=10,
    random_state=0,
    task_type="regression",
)

rows = []
for complexity, r in zip(["low", "medium", "high"], results, strict=False):
    er = r.metadata["effective_rank"]
    rows.append(
        {
            "complexity": complexity,
            "effective_rank": round(er, 2) if er is not None else None,
        }
    )
pd.DataFrame(rows)
results = difficulty_sweep(
    LinearDGP,
    complexities=["low", "medium", "high"],
    n_samples=300,
    n_features=10,
    random_state=0,
    task_type="regression",
)

rows = []
for complexity, r in zip(["low", "medium", "high"], results, strict=False):
    er = r.metadata["effective_rank"]
    rows.append(
        {
            "complexity": complexity,
            "effective_rank": round(er, 2) if er is not None else None,
        }
    )
pd.DataFrame(rows)

Out[3]:

	complexity	effective_rank
0	low	9.97
1	medium	9.97
2	high	9.96

experiment_grid — full factorial (n_samples x complexities x severities)¶

experiment_grid runs a single DGP class against a single corruptor class across all combinations of sample count, complexity, and severity. Keep n_samples small here — a 1x2x2 grid = 4 runs.

In [4]:

Copied!





grid = experiment_grid(
    LinearDGP,
    MissingDataCorruptor,
    n_samples_list=[200],
    complexities=["low", "high"],
    severities=["low", "high"],
    n_features=8,
    random_state=0,
    task_type="classification",
)

print(f"Grid produced {len(grid)} BenchResult objects")
rows = []
for (n_samples, complexity, severity), r in grid.items():
    be = r.metadata["bayes_error"]
    rows.append(
        {
            "n_samples": n_samples,
            "complexity": complexity,
            "severity": severity,
            "bayes_error": round(be, 4) if be is not None else None,
        }
    )
pd.DataFrame(rows)
grid = experiment_grid(
    LinearDGP,
    MissingDataCorruptor,
    n_samples_list=[200],
    complexities=["low", "high"],
    severities=["low", "high"],
    n_features=8,
    random_state=0,
    task_type="classification",
)

print(f"Grid produced {len(grid)} BenchResult objects")
rows = []
for (n_samples, complexity, severity), r in grid.items():
    be = r.metadata["bayes_error"]
    rows.append(
        {
            "n_samples": n_samples,
            "complexity": complexity,
            "severity": severity,
            "bayes_error": round(be, 4) if be is not None else None,
        }
    )
pd.DataFrame(rows)

Grid produced 4 BenchResult objects

Out[4]:

	n_samples	complexity	severity	bayes_error
0	200	low	low	None
1	200	low	high	None
2	200	high	low	None
3	200	high	high	None

BenchSuite — named curated collections¶

In [5]:

Copied!





suite = BenchSuite("easy-classification")
results = suite.run()

print(f"Suite '{suite.name}' -- {len(results)} datasets:")
rows = []
for name, r in results.items():
    be = r.metadata["bayes_error"]
    rows.append(
        {
            "entry": name,
            "n_samples": r.X.shape[0],
            "n_features": r.X.shape[1],
            "bayes_error": round(be, 4) if be is not None else None,
        }
    )
pd.DataFrame(rows).set_index("entry")
suite = BenchSuite("easy-classification")
results = suite.run()

print(f"Suite '{suite.name}' -- {len(results)} datasets:")
rows = []
for name, r in results.items():
    be = r.metadata["bayes_error"]
    rows.append(
        {
            "entry": name,
            "n_samples": r.X.shape[0],
            "n_features": r.X.shape[1],
            "bayes_error": round(be, 4) if be is not None else None,
        }
    )
pd.DataFrame(rows).set_index("entry")

Suite 'easy-classification' -- 3 datasets:

Out[5]:

	n_samples	n_features	bayes_error
entry
linear_low	500	10	0.450
tree_low	500	10	0.494
friedman_low	500	10	0.470

Reproducibility — same random_state yields identical results¶

In [6]:

Copied!





dgp = LinearDGP(task_type="regression", complexity="medium")
r1 = severity_sweep(
    dgp,
    MissingDataCorruptor,
    ["low", "medium", "high"],
    n_samples=200,
    random_state=99,
)
r2 = severity_sweep(
    dgp,
    MissingDataCorruptor,
    ["low", "medium", "high"],
    n_samples=200,
    random_state=99,
)

# Use equal_nan=True: MissingDataCorruptor introduces NaN, and NaN != NaN
# under standard equality.
identical = all(
    np.array_equal(a.X, b.X, equal_nan=True) for a, b in zip(r1, r2, strict=False)
)
print(f"Two runs with random_state=99 are identical: {identical}")
dgp = LinearDGP(task_type="regression", complexity="medium")
r1 = severity_sweep(
    dgp,
    MissingDataCorruptor,
    ["low", "medium", "high"],
    n_samples=200,
    random_state=99,
)
r2 = severity_sweep(
    dgp,
    MissingDataCorruptor,
    ["low", "medium", "high"],
    n_samples=200,
    random_state=99,
)

# Use equal_nan=True: MissingDataCorruptor introduces NaN, and NaN != NaN
# under standard equality.
identical = all(
    np.array_equal(a.X, b.X, equal_nan=True) for a, b in zip(r1, r2, strict=False)
)
print(f"Two runs with random_state=99 are identical: {identical}")

Two runs with random_state=99 are identical: True